PageRenderTime 52ms CodeModel.GetById 16ms RepoModel.GetById 0ms app.codeStats 0ms

/src/runtime/opengl.cpp

https://github.com/josephsieh/Halide
C++ | 1750 lines | 1334 code | 246 blank | 170 comment | 267 complexity | 30de71bc75c5da35488ea4cf13bf80e8 MD5 | raw file
  1. #include "runtime_internal.h"
  2. #include "../buffer_t.h"
  3. #include "HalideRuntime.h"
  4. #include "mini_opengl.h"
  5. // This constant is used to indicate that the application will take
  6. // responsibility for binding the output render target before calling the
  7. // Halide function.
  8. #define HALIDE_GLSL_CLIENT_BOUND ((uint64_t)-1)
  9. // Implementation note: all function that directly or indirectly access the
  10. // runtime state in halide_opengl_state must be declared as WEAK, otherwise
  11. // the behavior at runtime is undefined.
  12. // This function must be provided by the host environment to retrieve pointers
  13. // to OpenGL API functions.
  14. extern "C" void *halide_opengl_get_proc_address(void *user_context, const char *name);
  15. extern "C" int halide_opengl_create_context(void *user_context);
  16. extern "C" int isdigit(int c);
  17. // List of all OpenGL functions used by the runtime. The list is used to
  18. // declare and initialize the dispatch table in OpenGLState below.
  19. #define USED_GL_FUNCTIONS \
  20. GLFUNC(PFNGLDELETETEXTURESPROC, DeleteTextures); \
  21. GLFUNC(PFNGLGENTEXTURESPROC, GenTextures); \
  22. GLFUNC(PFNGLBINDTEXTUREPROC, BindTexture); \
  23. GLFUNC(PFNGLGETERRORPROC, GetError); \
  24. GLFUNC(PFNGLVIEWPORTPROC, Viewport); \
  25. GLFUNC(PFNGLGENBUFFERSPROC, GenBuffers); \
  26. GLFUNC(PFNGLDELETEBUFFERSPROC, DeleteBuffers); \
  27. GLFUNC(PFNGLBINDBUFFERPROC, BindBuffer); \
  28. GLFUNC(PFNGLBUFFERDATAPROC, BufferData); \
  29. GLFUNC(PFNGLTEXPARAMETERIPROC, TexParameteri); \
  30. GLFUNC(PFNGLTEXIMAGE2DPROC, TexImage2D); \
  31. GLFUNC(PFNGLTEXSUBIMAGE2DPROC, TexSubImage2D); \
  32. GLFUNC(PFNGLDISABLEPROC, Disable); \
  33. GLFUNC(PFNGLCREATESHADERPROC, CreateShader); \
  34. GLFUNC(PFNGLACTIVETEXTUREPROC, ActiveTexture); \
  35. GLFUNC(PFNGLSHADERSOURCEPROC, ShaderSource); \
  36. GLFUNC(PFNGLCOMPILESHADERPROC, CompileShader); \
  37. GLFUNC(PFNGLGETSHADERIVPROC, GetShaderiv); \
  38. GLFUNC(PFNGLGETSHADERINFOLOGPROC, GetShaderInfoLog); \
  39. GLFUNC(PFNGLDELETESHADERPROC, DeleteShader); \
  40. GLFUNC(PFNGLCREATEPROGRAMPROC, CreateProgram); \
  41. GLFUNC(PFNGLATTACHSHADERPROC, AttachShader); \
  42. GLFUNC(PFNGLLINKPROGRAMPROC, LinkProgram); \
  43. GLFUNC(PFNGLGETPROGRAMIVPROC, GetProgramiv); \
  44. GLFUNC(PFNGLGETPROGRAMINFOLOGPROC, GetProgramInfoLog); \
  45. GLFUNC(PFNGLUSEPROGRAMPROC, UseProgram); \
  46. GLFUNC(PFNGLDELETEPROGRAMPROC, DeleteProgram); \
  47. GLFUNC(PFNGLGETUNIFORMLOCATIONPROC, GetUniformLocation); \
  48. GLFUNC(PFNGLUNIFORM1IVPROC, Uniform1iv); \
  49. GLFUNC(PFNGLUNIFORM2IVPROC, Uniform2iv); \
  50. GLFUNC(PFNGLUNIFORM2IVPROC, Uniform4iv); \
  51. GLFUNC(PFNGLUNIFORM1FVPROC, Uniform1fv); \
  52. GLFUNC(PFNGLUNIFORM1FVPROC, Uniform4fv); \
  53. GLFUNC(PFNGLGENFRAMEBUFFERSPROC, GenFramebuffers); \
  54. GLFUNC(PFNGLDELETEFRAMEBUFFERSPROC, DeleteFramebuffers); \
  55. GLFUNC(PFNGLCHECKFRAMEBUFFERSTATUSPROC, CheckFramebufferStatus); \
  56. GLFUNC(PFNGLBINDFRAMEBUFFERPROC, BindFramebuffer); \
  57. GLFUNC(PFNGLFRAMEBUFFERTEXTURE2DPROC, FramebufferTexture2D); \
  58. GLFUNC(PFNGLGETATTRIBLOCATIONPROC, GetAttribLocation); \
  59. GLFUNC(PFNGLVERTEXATTRIBPOINTERPROC, VertexAttribPointer); \
  60. GLFUNC(PFNGLDRAWELEMENTSPROC, DrawElements); \
  61. GLFUNC(PFNGLENABLEVERTEXATTRIBARRAYPROC, EnableVertexAttribArray); \
  62. GLFUNC(PFNGLDISABLEVERTEXATTRIBARRAYPROC, DisableVertexAttribArray); \
  63. GLFUNC(PFNGLPIXELSTOREIPROC, PixelStorei); \
  64. GLFUNC(PFNGLREADPIXELS, ReadPixels); \
  65. GLFUNC(PFNGLGETSTRINGPROC, GetString); \
  66. GLFUNC(PFNGLGETINTEGERV, GetIntegerv); \
  67. GLFUNC(PFNGLGETSTRINGI, GetStringi)
  68. // ---------- Types ----------
  69. namespace Halide { namespace Runtime { namespace Internal {
  70. enum OpenGLProfile {
  71. OpenGL,
  72. OpenGLES
  73. };
  74. struct Argument {
  75. // The kind of data stored in an argument
  76. enum Kind {
  77. Invalid,
  78. Uniform, // uniform variable
  79. Varying, // varying attribute
  80. Inbuf, // input texture
  81. Outbuf // output texture
  82. };
  83. // The elementary data type of the argument
  84. enum Type {
  85. Void, Bool, Float, Int8, Int16, Int32, UInt8, UInt16, UInt32
  86. };
  87. char *name;
  88. Kind kind;
  89. Type type;
  90. Argument *next;
  91. };
  92. struct KernelInfo {
  93. char *source;
  94. Argument *arguments;
  95. GLuint shader_id;
  96. GLuint program_id;
  97. };
  98. // Information about each known texture.
  99. struct TextureInfo {
  100. GLuint id;
  101. GLint min[4];
  102. GLint extent[4];
  103. bool halide_allocated; // allocated by us or host app?
  104. TextureInfo *next;
  105. };
  106. struct ModuleState {
  107. KernelInfo *kernel;
  108. ModuleState *next;
  109. };
  110. // All persistent state maintained by the runtime.
  111. struct GlobalState {
  112. void init();
  113. bool initialized;
  114. // Information about the OpenGL platform we're running on.
  115. OpenGLProfile profile;
  116. int major_version, minor_version;
  117. bool have_vertex_array_objects;
  118. bool have_texture_rg;
  119. bool have_texture_float;
  120. // Various objects shared by all filter kernels
  121. GLuint framebuffer_id;
  122. GLuint vertex_array_object;
  123. GLuint vertex_buffer;
  124. GLuint element_buffer;
  125. // A list of all textures that are still active
  126. TextureInfo *textures;
  127. // Declare pointers used OpenGL functions
  128. #define GLFUNC(PTYPE,VAR) PTYPE VAR
  129. USED_GL_FUNCTIONS;
  130. #undef GLFUNC
  131. PFNGLGENVERTEXARRAYS GenVertexArrays;
  132. PFNGLBINDVERTEXARRAY BindVertexArray;
  133. PFNGLDELETEVERTEXARRAYS DeleteVertexArrays;
  134. };
  135. WEAK GlobalState global_state;
  136. // A list of module-specific state. Each module corresponds to a single Halide filter
  137. WEAK ModuleState *state_list;
  138. WEAK const char *kernel_marker = "/// KERNEL ";
  139. WEAK const char *input_marker = "/// IN_BUFFER ";
  140. WEAK const char *output_marker = "/// OUT_BUFFER ";
  141. WEAK const char *uniform_marker = "/// UNIFORM ";
  142. WEAK const char *varying_marker = "/// VARYING ";
  143. // ---------- Macros ----------
  144. // Convenience macro for accessing state of the OpenGL runtime
  145. #define ST global_state
  146. // Ensure that OpenGL runtime is correctly initialized. Used in all public API
  147. // functions.
  148. #define CHECK_INITIALIZED(ERRORCODE) \
  149. if (!ST.initialized) { \
  150. error(user_context) << "OpenGL runtime not initialized."; \
  151. return ERRORCODE; \
  152. }
  153. // Macro for error checking.
  154. #ifdef DEBUG_RUNTIME
  155. #define LOG_GLERROR(ERR) \
  156. error(user_context) << __FILE__ << ":" << __LINE__ << ": OpenGL error " << (ERR);
  157. #else
  158. #define LOG_GLERROR(ERR)
  159. #endif
  160. #define CHECK_GLERROR(ERRORCODE) do { \
  161. GLenum err = global_state.GetError(); \
  162. if (err != GL_NO_ERROR) { \
  163. LOG_GLERROR(err); \
  164. error(user_context) << __FILE__ << ":" << __LINE__ << "OpenGL error " << err; \
  165. return ERRORCODE; \
  166. }} while (0)
  167. // ---------- Helper functions ----------
  168. WEAK char *strndup(const char *s, size_t n) {
  169. char *p = (char*)malloc(n+1);
  170. memcpy(p, s, n);
  171. p[n] = '\0';
  172. return p;
  173. }
  174. WEAK GLuint get_texture_id(buffer_t *buf) {
  175. return buf->dev & 0xffffffff;
  176. }
  177. WEAK void debug_buffer(void *user_context, buffer_t *buf) {
  178. debug(user_context)
  179. << " dev: " << buf->dev << "\n"
  180. << " host: " << buf->host << "\n"
  181. << " extent: " << buf->extent[0] << " " << buf->extent[1]
  182. << " " << buf->extent[2] << " " << buf->extent[3] << "\n"
  183. << " stride: " << buf->stride[0] << " " << buf->stride[1]
  184. << " " << buf->stride[2] << " " << buf->stride[3] << "\n"
  185. << " min: " << buf->min[0] << " " << buf->min[1]
  186. << " " << buf->min[2] << " " << buf->min[3] << "\n"
  187. << " elem_size: " << buf->elem_size << "\n"
  188. << " host_dirty: " << buf->host_dirty << "\n"
  189. << " dev_dirty: " << buf->dev_dirty << "\n";
  190. }
  191. WEAK GLuint make_shader(void *user_context, GLenum type,
  192. const char *source, GLint *length) {
  193. debug(user_context) << "SHADER SOURCE:\n"
  194. << source << "\n";
  195. GLuint shader = ST.CreateShader(type);
  196. CHECK_GLERROR(1);
  197. if (*source == '\0') {
  198. debug(user_context) << "Halide GLSL: passed shader source is empty, using default.\n";
  199. const char *default_shader = "varying vec2 pixcoord;\n void main() { }";
  200. ST.ShaderSource(shader, 1, (const GLchar **)&default_shader, NULL);
  201. } else {
  202. ST.ShaderSource(shader, 1, (const GLchar **)&source, length);
  203. }
  204. CHECK_GLERROR(1);
  205. ST.CompileShader(shader);
  206. CHECK_GLERROR(1);
  207. GLint shader_ok = 0;
  208. ST.GetShaderiv(shader, GL_COMPILE_STATUS, &shader_ok);
  209. if (!shader_ok) {
  210. print(user_context) << "Could not compile shader:\n";
  211. GLint log_len;
  212. ST.GetShaderiv(shader, GL_INFO_LOG_LENGTH, &log_len);
  213. char *log = (char *)malloc(log_len);
  214. ST.GetShaderInfoLog(shader, log_len, NULL, log);
  215. print(user_context) << log << "\n";
  216. free(log);
  217. ST.DeleteShader(shader);
  218. return 0;
  219. }
  220. return shader;
  221. }
  222. // Check whether string starts with a given prefix.
  223. // Returns pointer to character after matched prefix if successful or NULL.
  224. WEAK const char *match_prefix(const char *s, const char *prefix) {
  225. if (0 == strncmp(s, prefix, strlen(prefix))) {
  226. return s + strlen(prefix);
  227. }
  228. return NULL;
  229. }
  230. // Parse declaration of the form "type name" and construct matching Argument.
  231. WEAK Argument *parse_argument(void *user_context, const char *src,
  232. const char *end) {
  233. const char *name;
  234. Argument::Type type = Argument::Void;
  235. if ((name = match_prefix(src, "float "))) {
  236. type = Argument::Float;
  237. } else if ((name = match_prefix(src, "bool "))) {
  238. type = Argument::Bool;
  239. } else if ((name = match_prefix(src, "int8_t "))) {
  240. type = Argument::Int8;
  241. } else if ((name = match_prefix(src, "int16_t "))) {
  242. type = Argument::Int16;
  243. } else if ((name = match_prefix(src, "int32_t "))) {
  244. type = Argument::Int32;
  245. } else if ((name = match_prefix(src, "uint8_t "))) {
  246. type = Argument::UInt8;
  247. } else if ((name = match_prefix(src, "uint16_t "))) {
  248. type = Argument::UInt16;
  249. } else if ((name = match_prefix(src, "uint32_t "))) {
  250. type = Argument::UInt32;
  251. }
  252. if (type == Argument::Void) {
  253. error(user_context) << "Internal error: argument type not supported";
  254. return NULL;
  255. }
  256. Argument *arg = (Argument *)malloc(sizeof(Argument));
  257. arg->name = strndup(name, end - name);
  258. arg->type = type;
  259. arg->kind = Argument::Invalid;
  260. arg->next = 0;
  261. return arg;
  262. }
  263. // Create KernelInfo for a piece of GLSL code
  264. WEAK KernelInfo *create_kernel(void *user_context, const char *src, int size) {
  265. KernelInfo *kernel = (KernelInfo *)malloc(sizeof(KernelInfo));
  266. kernel->source = strndup(src, size);
  267. kernel->arguments = NULL;
  268. kernel->program_id = 0;
  269. #ifdef DEBUG_RUNTIME
  270. {
  271. // Android logcat output clips at ~1000 character chunks by default;
  272. // to avoid clipping the interesting stuff, emit a line at a time.
  273. // This is less efficient, but it's DEBUG-only.
  274. debug(user_context) << "Compiling GLSL kernel (size = " << size << "):\n";
  275. const int kBufSize = 255;
  276. char buf[kBufSize + 1];
  277. const char* s = src;
  278. int d = 0;
  279. while (s < src + size) {
  280. while (*s != '\n' && *s != '\0' && d < kBufSize) {
  281. buf[d++] = *s++;
  282. }
  283. buf[d++] = '\0';
  284. debug(user_context) << buf << "\n";
  285. d = 0;
  286. while (*s == '\n' || *s == '\0') {
  287. s++;
  288. }
  289. }
  290. }
  291. #endif
  292. // Parse initial comment block
  293. const char *line = kernel->source;
  294. while (*line) {
  295. const char *next_line = strchr(line, '\n') + 1;
  296. if (!next_line)
  297. next_line = line + size;
  298. const char *args;
  299. if ((args = match_prefix(line, kernel_marker))) {
  300. // ignore
  301. } else if ((args = match_prefix(line, uniform_marker))) {
  302. if (Argument *arg =
  303. parse_argument(user_context, args, next_line - 1)) {
  304. arg->kind = Argument::Uniform;
  305. arg->next = kernel->arguments;
  306. kernel->arguments = arg;
  307. } else {
  308. halide_error(user_context, "Invalid VAR marker");
  309. goto error;
  310. }
  311. } else if ((args = match_prefix(line, varying_marker))) {
  312. if (Argument *arg =
  313. parse_argument(user_context, args, next_line - 1)) {
  314. arg->kind = Argument::Varying;
  315. arg->next = kernel->arguments;
  316. kernel->arguments = arg;
  317. } else {
  318. halide_error(user_context, "Invalid VARYING marker");
  319. goto error;
  320. }
  321. } else if ((args = match_prefix(line, input_marker))) {
  322. if (Argument *arg = parse_argument(user_context, args, next_line - 1)) {
  323. arg->kind = Argument::Inbuf;
  324. arg->next = kernel->arguments;
  325. kernel->arguments = arg;
  326. } else {
  327. error(user_context) << "Invalid IN_BUFFER marker";
  328. goto error;
  329. }
  330. } else if ((args = match_prefix(line, output_marker))) {
  331. if (Argument *arg = parse_argument(user_context, args, next_line - 1)) {
  332. arg->kind = Argument::Outbuf;
  333. arg->next = kernel->arguments;
  334. kernel->arguments = arg;
  335. } else {
  336. error(user_context) << "Invalid OUT_BUFFER marker";
  337. goto error;
  338. }
  339. } else {
  340. // Stop parsing if we encounter something we don't recognize
  341. break;
  342. }
  343. line = next_line;
  344. }
  345. // Arguments are currently in reverse order, flip the list.
  346. {
  347. Argument *cur = kernel->arguments;
  348. kernel->arguments = NULL;
  349. while (cur) {
  350. Argument *next = cur->next;
  351. cur->next = kernel->arguments;
  352. kernel->arguments = cur;
  353. cur = next;
  354. }
  355. }
  356. return kernel;
  357. error:
  358. free(kernel);
  359. return NULL;
  360. }
  361. // Delete all data associated with a kernel. Also release associated OpenGL
  362. // shader and program.
  363. WEAK void delete_kernel(void *user_context, KernelInfo *kernel) {
  364. ST.DeleteProgram(kernel->program_id);
  365. Argument *arg = kernel->arguments;
  366. while (arg) {
  367. Argument *next = arg->next;
  368. free(arg->name);
  369. free(arg);
  370. arg = next;
  371. }
  372. free(kernel->source);
  373. free(kernel);
  374. }
  375. // Vertices and their order in a triangle strip for rendering a quad
  376. // ranging from (-1,-1) to (1,1).
  377. WEAK GLfloat quad_vertices[] = {
  378. -1.0f, -1.0f, 1.0f, -1.0f,
  379. -1.0f, 1.0f, 1.0f, 1.0f
  380. };
  381. WEAK GLuint quad_indices[] = { 0, 1, 2, 3 };
  382. WEAK void GlobalState::init() {
  383. initialized = false;
  384. profile = OpenGL;
  385. major_version = 2;
  386. minor_version = 0;
  387. framebuffer_id = 0;
  388. vertex_array_object = vertex_buffer = element_buffer = 0;
  389. textures = NULL;
  390. have_vertex_array_objects = false;
  391. have_texture_rg = false;
  392. }
  393. WEAK int load_gl_func(void *user_context, const char *name, void **ptr) {
  394. void *p = halide_opengl_get_proc_address(user_context, name);
  395. if (!p) {
  396. error(user_context) << "Could not load function pointer for " << name;
  397. return -1;
  398. }
  399. *ptr = p;
  400. return 0;
  401. }
  402. WEAK bool extension_supported(void *user_context, const char *name) {
  403. if (ST.major_version >= 3) {
  404. GLint num_extensions = 0;
  405. ST.GetIntegerv(GL_NUM_EXTENSIONS, &num_extensions);
  406. for (int i = 0; i < num_extensions; i++) {
  407. const char *ext = (const char *)ST.GetStringi(GL_EXTENSIONS, i);
  408. if (strcmp(ext, name) == 0) {
  409. return true;
  410. }
  411. }
  412. } else {
  413. const char *start = (const char *)ST.GetString(GL_EXTENSIONS);
  414. if (!start) {
  415. return false;
  416. }
  417. while (const char *pos = strstr(start, name)) {
  418. const char *end = pos + strlen(name);
  419. // Ensure the found match is a full word, not a substring.
  420. if ((pos == start || pos[-1] == ' ') &&
  421. (*end == ' ' || *end == '\0')) {
  422. return true;
  423. }
  424. start = end;
  425. }
  426. }
  427. return false;
  428. }
  429. // Check for availability of various version- and extension-specific features
  430. // and hook up functions pointers as necessary
  431. WEAK void init_extensions(void *user_context) {
  432. if (ST.major_version >= 3) {
  433. ST.have_vertex_array_objects = true;
  434. load_gl_func(user_context, "glGenVertexArrays", (void**)&ST.GenVertexArrays);
  435. load_gl_func(user_context, "glBindVertexArray", (void**)&ST.BindVertexArray);
  436. load_gl_func(user_context, "glDeleteVertexArrays", (void**)&ST.DeleteVertexArrays);
  437. }
  438. ST.have_texture_rg =
  439. ST.major_version >= 3 ||
  440. (ST.profile == OpenGL &&
  441. extension_supported(user_context, "GL_ARB_texture_rg")) ||
  442. (ST.profile == OpenGLES &&
  443. extension_supported(user_context, "GL_EXT_texture_rg"));
  444. ST.have_texture_float =
  445. (ST.major_version >= 3) ||
  446. (ST.profile == OpenGL &&
  447. extension_supported(user_context, "GL_ARB_texture_float")) ||
  448. (ST.profile == OpenGLES &&
  449. extension_supported(user_context, "GL_OES_texture_float"));
  450. }
  451. WEAK const char *parse_int(const char *str, int *val) {
  452. if (!isdigit(*str)) return NULL;
  453. int v = 0;
  454. do {
  455. v = 10 * v + (*str++ - '0');
  456. } while (isdigit(*str));
  457. *val = v;
  458. return str;
  459. }
  460. WEAK const char *parse_opengl_version(const char *str, int *major, int *minor) {
  461. str = parse_int(str, major);
  462. if (str == NULL || *str != '.') {
  463. return NULL;
  464. }
  465. return parse_int(str + 1, minor);
  466. }
  467. // Initialize the OpenGL-specific parts of the runtime.
  468. WEAK int halide_opengl_init(void *user_context) {
  469. if (ST.initialized) {
  470. return 0;
  471. }
  472. global_state.init();
  473. // Make a context if there isn't one
  474. if (halide_opengl_create_context(user_context)) {
  475. error(user_context) << "Failed to make OpenGL context";
  476. return -1;
  477. }
  478. // Initialize pointers to core OpenGL functions.
  479. #define GLFUNC(TYPE, VAR) \
  480. if (load_gl_func(user_context, "gl" #VAR, (void**)&ST.VAR) < 0) { \
  481. return -1; \
  482. }
  483. USED_GL_FUNCTIONS;
  484. #undef GLFUNC
  485. const char *version = (const char *)ST.GetString(GL_VERSION);
  486. const char *gles_version = match_prefix(version, "OpenGL ES ");
  487. int major, minor;
  488. if (gles_version && parse_opengl_version(gles_version, &major, &minor)) {
  489. ST.profile = OpenGLES;
  490. ST.major_version = major;
  491. ST.minor_version = minor;
  492. } else if (parse_opengl_version(version, &major, &minor)) {
  493. ST.profile = OpenGL;
  494. ST.major_version = major;
  495. ST.minor_version = minor;
  496. } else {
  497. ST.profile = OpenGL;
  498. ST.major_version = 2;
  499. ST.minor_version = 0;
  500. }
  501. init_extensions(user_context);
  502. debug(user_context)
  503. << "Halide running on OpenGL "
  504. << ((ST.profile == OpenGL) ? "" : "ES ")
  505. << major << "." << minor << "\n"
  506. << " vertex_array_objects: "
  507. << (ST.have_vertex_array_objects ? "yes\n" : "no\n")
  508. << " texture_rg: "
  509. << (ST.have_texture_rg ? "yes\n" : "no\n")
  510. << " texture_float: "
  511. << (ST.have_texture_float ? "yes\n" : "no\n");
  512. // Initialize framebuffer.
  513. ST.GenFramebuffers(1, &ST.framebuffer_id);
  514. CHECK_GLERROR(1);
  515. // Initialize vertex and element buffers.
  516. GLuint buf[2];
  517. ST.GenBuffers(2, buf);
  518. ST.BindBuffer(GL_ARRAY_BUFFER, buf[0]);
  519. ST.BufferData(GL_ARRAY_BUFFER, sizeof(quad_vertices), quad_vertices, GL_STATIC_DRAW);
  520. ST.BindBuffer(GL_ARRAY_BUFFER, 0);
  521. ST.BindBuffer(GL_ELEMENT_ARRAY_BUFFER, buf[1]);
  522. ST.BufferData(GL_ELEMENT_ARRAY_BUFFER, sizeof(quad_indices), quad_indices, GL_STATIC_DRAW);
  523. ST.BindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0);
  524. ST.vertex_buffer = buf[0];
  525. ST.element_buffer = buf[1];
  526. if (ST.have_vertex_array_objects) {
  527. ST.GenVertexArrays(1, &ST.vertex_array_object);
  528. }
  529. CHECK_GLERROR(1);
  530. ST.initialized = true;
  531. return 0;
  532. }
  533. // Release all data allocated by the runtime.
  534. //
  535. // The OpenGL context itself is generally managed by the host application, so
  536. // we leave it untouched.
  537. WEAK void halide_opengl_release(void *user_context) {
  538. if (!ST.initialized) return;
  539. #ifdef DEBUG
  540. halide_printf(user_context, "halide_opengl_release\n");
  541. #endif
  542. ST.DeleteFramebuffers(1, &ST.framebuffer_id);
  543. ModuleState *mod = state_list;
  544. while (mod) {
  545. delete_kernel(user_context, mod->kernel);
  546. mod->kernel = NULL;
  547. ModuleState *next = mod->next;
  548. // do not call free(mod) to avoid dangling pointers: the module state
  549. // is still referenced in the code generated by Halide (see
  550. // CodeGen_GPU_Host::get_module_state).
  551. mod = next;
  552. }
  553. // Delete all textures that were allocated by us.
  554. TextureInfo *tex = ST.textures;
  555. int freed_textures = 0;
  556. while (tex) {
  557. TextureInfo *next = tex->next;
  558. if (tex->halide_allocated) {
  559. ST.DeleteTextures(1, &tex->id);
  560. CHECK_GLERROR();
  561. freed_textures++;
  562. }
  563. free(tex);
  564. tex = next;
  565. }
  566. if (freed_textures > 0) {
  567. debug(user_context) << "halide_opengl_release: deleted "
  568. << freed_textures << " dangling texture(s).\n";
  569. }
  570. ST.DeleteBuffers(1, &ST.vertex_buffer);
  571. ST.DeleteBuffers(1, &ST.element_buffer);
  572. if (ST.have_vertex_array_objects) {
  573. ST.DeleteVertexArrays(1, &ST.vertex_array_object);
  574. }
  575. ST = GlobalState();
  576. }
  577. // Determine OpenGL texture format and channel type for a given buffer_t.
  578. WEAK bool get_texture_format(void *user_context, buffer_t *buf,
  579. GLint *internal_format, GLint *format, GLint *type) {
  580. if (buf->elem_size == 1) {
  581. *type = GL_UNSIGNED_BYTE;
  582. } else if (buf->elem_size == 2) {
  583. *type = GL_UNSIGNED_SHORT;
  584. } else if (buf->elem_size == 4) {
  585. *type = GL_FLOAT;
  586. } else {
  587. error(user_context) << "GLSL: Only uint8, uint16, and float textures are supported.";
  588. return false;
  589. }
  590. const int channels = buf->extent[2];
  591. if (channels <= 2 && !ST.have_texture_rg) {
  592. error(user_context) << "GLSL: This version of OpenGL doesn't support <=2 channels.";
  593. return false;
  594. }
  595. if (channels == 1) {
  596. *format = GL_RED;
  597. } else if (channels == 2) {
  598. *format = GL_RG;
  599. } else if (channels == 3) {
  600. *format = GL_RGB;
  601. } else if (channels == 4) {
  602. *format = GL_RGBA;
  603. } else {
  604. error(user_context) << "GLSL: Only 3 or 4 color channels are supported.";
  605. return false;
  606. }
  607. switch (ST.profile) {
  608. case OpenGLES:
  609. // For OpenGL ES, the texture format has to match the pixel format
  610. // since there no conversion is performed during texture transfers.
  611. // See OES_texture_float.
  612. *internal_format = *format;
  613. break;
  614. case OpenGL:
  615. // For desktop OpenGL, the internal format specifiers include the
  616. // precise data type, see ARB_texture_float.
  617. if (*type == GL_FLOAT) {
  618. switch (*format) {
  619. case GL_RED: *internal_format = GL_R32F; break;
  620. case GL_RG: *internal_format = GL_RG32F; break;
  621. case GL_RGB: *internal_format = GL_RGB32F; break;
  622. case GL_RGBA: *internal_format = GL_RGBA32F; break;
  623. }
  624. } else {
  625. *internal_format = *format;
  626. }
  627. break;
  628. }
  629. return true;
  630. }
  631. WEAK TextureInfo *find_texture(GLuint tex) {
  632. TextureInfo *texinfo = ST.textures;
  633. while (texinfo && texinfo->id != tex) {
  634. texinfo = texinfo->next;
  635. }
  636. return texinfo;
  637. }
  638. // Allocate a new texture matching the dimension and color format of the
  639. // specified buffer.
  640. WEAK int halide_opengl_dev_malloc(void *user_context, buffer_t *buf) {
  641. if (int error = halide_opengl_init(user_context)) {
  642. return error;
  643. }
  644. if (!buf) {
  645. error(user_context) << "Invalid buffer";
  646. return 1;
  647. }
  648. // If the texture was already created by the host application, check that
  649. // it has the correct format. Otherwise, allocate and set up an
  650. // appropriate texture.
  651. GLuint tex = get_texture_id(buf);
  652. bool halide_allocated = false;
  653. GLint width, height;
  654. if (tex != 0) {
  655. #ifdef HAVE_GLES3
  656. ST.BindTexture(GL_TEXTURE_2D, tex);
  657. ST.GetTexLevelParameteriv(GL_TEXTURE_2D, 0, GL_TEXTURE_WIDTH, &width);
  658. ST.GetTexLevelParameteriv(GL_TEXTURE_2D, 0, GL_TEXTURE_HEIGHT, &height);
  659. CHECK_GLERROR(1);
  660. if (width < buf->extent[0] || height < buf->extent[1]) {
  661. error(user_context)
  662. << "Existing texture is smaller than buffer. "
  663. << "Texture size: " << width << "x" << height
  664. << ", buffer size: " << buf->extent[0] << "x" << buf->extent[1];
  665. return 1;
  666. }
  667. #endif
  668. } else {
  669. if (buf->extent[3] > 1) {
  670. error(user_context) << "3D textures are not supported";
  671. return 1;
  672. }
  673. // Generate texture ID
  674. ST.GenTextures(1, &tex);
  675. CHECK_GLERROR(1);
  676. // Set parameters for this texture: no interpolation and clamp to edges.
  677. ST.BindTexture(GL_TEXTURE_2D, tex);
  678. ST.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
  679. ST.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
  680. ST.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
  681. ST.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
  682. CHECK_GLERROR(1);
  683. // Create empty texture here and fill it with glTexSubImage2D later.
  684. GLint internal_format = 0;
  685. GLint format = 0;
  686. GLint type = GL_UNSIGNED_BYTE;
  687. if (!get_texture_format(user_context, buf, &internal_format, &format, &type)) {
  688. error(user_context) << "Invalid texture format";
  689. return 1;
  690. }
  691. width = buf->extent[0];
  692. height = buf->extent[1];
  693. ST.TexImage2D(GL_TEXTURE_2D, 0, internal_format,
  694. width, height, 0, format, type, NULL);
  695. CHECK_GLERROR(1);
  696. buf->dev = tex;
  697. halide_allocated = true;
  698. debug(user_context) << "Allocated texture " << tex
  699. << " of size " << width << " x " << height << "\n";
  700. ST.BindTexture(GL_TEXTURE_2D, 0);
  701. }
  702. // Record main information about texture and remember it for later. In
  703. // halide_opengl_dev_run we are only given the texture ID and not the full
  704. // buffer_t, so we copy the interesting information here. Note: there can
  705. // be multiple dev_malloc calls for the same buffer_t; only record texture
  706. // information once.
  707. if (!find_texture(tex)) {
  708. TextureInfo *texinfo = (TextureInfo*)malloc(sizeof(TextureInfo));
  709. texinfo->id = tex;
  710. for (int i=0; i<3; i++) {
  711. texinfo->min[i] = buf->min[i];
  712. texinfo->extent[i] = buf->extent[i];
  713. }
  714. texinfo->halide_allocated = halide_allocated;
  715. texinfo->next = ST.textures;
  716. ST.textures = texinfo;
  717. }
  718. return 0;
  719. }
  720. // Delete all texture information associated with a buffer. The OpenGL texture
  721. // itself is only deleted if it was actually allocated by Halide and not
  722. // provided by the host application.
  723. WEAK int halide_opengl_dev_free(void *user_context, buffer_t *buf) {
  724. CHECK_INITIALIZED(1);
  725. GLuint tex = get_texture_id(buf);
  726. if (tex == 0) {
  727. return 0;
  728. }
  729. // Look up corresponding TextureInfo and unlink it from the list.
  730. TextureInfo **ptr = &ST.textures;
  731. TextureInfo *texinfo = *ptr;
  732. for (; texinfo != NULL; ptr = &texinfo->next, texinfo = *ptr) {
  733. if (texinfo->id == tex) {
  734. *ptr = texinfo->next;
  735. texinfo->next = NULL;
  736. break;
  737. }
  738. }
  739. if (!texinfo) {
  740. error(user_context) << "Internal error: texture " << tex << " not found.";
  741. return 1;
  742. }
  743. // Delete texture if it was allocated by us.
  744. if (texinfo->halide_allocated) {
  745. debug(user_context) << "Deleting texture " << tex << "\n";
  746. ST.DeleteTextures(1, &tex);
  747. CHECK_GLERROR(1);
  748. buf->dev = 0;
  749. }
  750. free(texinfo);
  751. halide_release_jit_module();
  752. return 0;
  753. }
  754. // Called at the beginning of a code block generated by Halide. This function
  755. // is responsible for setting up the OpenGL environment and compiling the GLSL
  756. // code into a fragment shader.
  757. WEAK int halide_opengl_init_kernels(void *user_context, void **state_ptr,
  758. const char *src, int size) {
  759. if (int error = halide_opengl_init(user_context)) {
  760. return error;
  761. }
  762. ModuleState **state = (ModuleState **)state_ptr;
  763. ModuleState *module = *state;
  764. if (!module) {
  765. module = (ModuleState *)malloc(sizeof(ModuleState));
  766. module->kernel = NULL;
  767. module->next = state_list;
  768. state_list = module;
  769. *state = module;
  770. }
  771. KernelInfo *kernel = module->kernel;
  772. if (!kernel) {
  773. kernel = create_kernel(user_context, src, size);
  774. if (!kernel) {
  775. error(user_context) << "Invalid kernel: " << src;
  776. return -1;
  777. }
  778. module->kernel = kernel;
  779. }
  780. if (kernel->program_id == 0) {
  781. // Create the vertex shader the runtime will output boilerplate for the
  782. // vertex shader based on a fixed program plus arguments obtained from
  783. // the comment header passed in the fragment shader. Since there is a
  784. // relatively small number of vertices (i.e. usually only four) per
  785. // vertex expressions interpolated by varying attributes are evaluated
  786. // by host code on the CPU and passed to the GPU as values in the
  787. // vertex buffer.
  788. enum { PrinterLength = 1024*256 };
  789. Printer<StringStreamPrinter,PrinterLength> vertex_src(user_context);
  790. // Count the number of varying attributes, this is 2 for the spatial
  791. // x and y coordinates, plus the number of scalar varying attribute
  792. // expressions pulled out of the fragment shader.
  793. int num_varying_float = 2;
  794. for (Argument* arg = kernel->arguments; arg; arg=arg->next) {
  795. if (arg->kind == Argument::Varying)
  796. ++num_varying_float;
  797. }
  798. int num_packed_varying_float = ((num_varying_float + 3) & ~0x3) / 4;
  799. for (int i = 0; i != num_packed_varying_float; ++i) {
  800. vertex_src << "attribute vec4 _varyingf" << i << "_attrib;\n";
  801. vertex_src << "varying vec4 _varyingf" << i << ";\n";
  802. }
  803. vertex_src << "uniform ivec2 output_min;\n"
  804. << "uniform ivec2 output_extent;\n"
  805. << "void main() {\n"
  806. // Host codegen always passes the spatial vertex coordinates
  807. // in the first two elements of the _varyingf0_attrib
  808. << " vec2 position = vec2(_varyingf0_attrib[0], _varyingf0_attrib[1]);\n"
  809. << " gl_Position = vec4(position, 0.0, 1.0);\n"
  810. << " vec2 texcoord = 0.5 * position + 0.5;\n"
  811. << " vec2 pixcoord = texcoord * vec2(output_extent.xy) + vec2(output_min.xy);\n";
  812. // Copy through all of the varying attributes
  813. for (int i = 0; i != num_packed_varying_float; ++i) {
  814. vertex_src << " _varyingf" << i << " = _varyingf" << i << "_attrib;\n";
  815. }
  816. vertex_src << " _varyingf0.xy = pixcoord;\n";
  817. vertex_src << "}\n";
  818. // Check to see if there was sufficient storage for the vertex program.
  819. if (vertex_src.size() >= PrinterLength) {
  820. error(user_context) << "Vertex shader source truncated";
  821. return 1;
  822. }
  823. // Initialize vertex shader.
  824. GLuint vertex_shader_id = make_shader(user_context,
  825. GL_VERTEX_SHADER, vertex_src.buf, NULL);
  826. if (vertex_shader_id == 0) {
  827. halide_error(user_context, "Failed to create vertex shader");
  828. return 1;
  829. }
  830. // Create the fragment shader
  831. GLuint fragment_shader_id = make_shader(user_context, GL_FRAGMENT_SHADER,
  832. kernel->source, NULL);
  833. // Link GLSL program
  834. GLuint program = ST.CreateProgram();
  835. ST.AttachShader(program, vertex_shader_id);
  836. ST.AttachShader(program, fragment_shader_id);
  837. ST.LinkProgram(program);
  838. // Release the individual shaders
  839. ST.DeleteShader(vertex_shader_id);
  840. ST.DeleteShader(fragment_shader_id);
  841. GLint status;
  842. ST.GetProgramiv(program, GL_LINK_STATUS, &status);
  843. if (!status) {
  844. GLint log_len;
  845. ST.GetProgramiv(program, GL_INFO_LOG_LENGTH, &log_len);
  846. char *log = (char*) malloc(log_len);
  847. ST.GetProgramInfoLog(program, log_len, NULL, log);
  848. debug(user_context) << "Could not link GLSL program:\n"
  849. << log << "\n";
  850. free(log);
  851. ST.DeleteProgram(program);
  852. return -1;
  853. }
  854. kernel->program_id = program;
  855. }
  856. return 0;
  857. }
  858. WEAK int halide_opengl_dev_sync(void *user_context) {
  859. CHECK_INITIALIZED(1);
  860. // TODO: glFinish()
  861. return 0;
  862. }
  863. template <class T>
  864. WEAK void halide_to_interleaved(buffer_t *buf, T *dst, int width, int height, int channels) {
  865. T *src = reinterpret_cast<T *>(buf->host);
  866. for (int y = 0; y < height; y++) {
  867. int dstidx = y * width * channels;
  868. for (int x = 0; x < width; x++) {
  869. int srcidx = (y - buf->min[1]) * buf->stride[1] +
  870. (x - buf->min[0]) * buf->stride[0] +
  871. -buf->min[2] * buf->stride[2];
  872. for (int c = 0; c < channels; c++) {
  873. dst[dstidx] = src[srcidx];
  874. dstidx++;
  875. srcidx += buf->stride[2];
  876. }
  877. }
  878. }
  879. }
  880. template <class T>
  881. WEAK void interleaved_to_halide(buffer_t *buf, T *src, int width, int height, int channels) {
  882. T *dst = reinterpret_cast<T *>(buf->host);
  883. for (int y = 0; y < height; y++) {
  884. int srcidx = y * width * channels;
  885. for (int x = 0; x < width; x++) {
  886. int dstidx = (y - buf->min[1]) * buf->stride[1] +
  887. (x - buf->min[0]) * buf->stride[0] +
  888. -buf->min[2] * buf->stride[2];
  889. for (int c = 0; c < channels; c++) {
  890. dst[dstidx] = src[srcidx];
  891. srcidx++;
  892. dstidx += buf->stride[2];
  893. }
  894. }
  895. }
  896. }
  897. // Copy image data from host memory to texture.
  898. WEAK int halide_opengl_copy_to_dev(void *user_context, buffer_t *buf) {
  899. int err = halide_opengl_dev_malloc(user_context, buf);
  900. if (err) {
  901. return err;
  902. }
  903. CHECK_INITIALIZED(1);
  904. if (!buf->host_dirty) {
  905. return 0;
  906. }
  907. if (!buf->host || !buf->dev) {
  908. debug_buffer(user_context, buf);
  909. error(user_context) << "Invalid copy_to_dev operation: host or dev NULL";
  910. return 1;
  911. }
  912. GLuint tex = get_texture_id(buf);
  913. debug(user_context) << "halide_copy_to_dev: " << tex << "\n";
  914. ST.BindTexture(GL_TEXTURE_2D, tex);
  915. CHECK_GLERROR(1);
  916. GLint internal_format, format, type;
  917. if (!get_texture_format(user_context, buf, &internal_format, &format, &type)) {
  918. error(user_context) << "Invalid texture format";
  919. return 1;
  920. }
  921. GLint width = buf->extent[0];
  922. GLint height = buf->extent[1];
  923. // To use TexSubImage2D directly, the colors must be stored interleaved
  924. // and rows must be stored consecutively.
  925. bool is_interleaved = (buf->stride[2] == 1 && buf->stride[0] == buf->extent[2]);
  926. bool is_packed = (buf->stride[1] == buf->extent[0] * buf->stride[0]);
  927. if (is_interleaved && is_packed) {
  928. ST.PixelStorei(GL_UNPACK_ALIGNMENT, 1);
  929. uint8_t *host_ptr = buf->host + buf->elem_size *
  930. (buf->min[0] * buf->stride[0] +
  931. buf->min[1] * buf->stride[1] +
  932. buf->min[2] * buf->stride[2] +
  933. buf->min[3] * buf->stride[3]);
  934. ST.TexSubImage2D(GL_TEXTURE_2D, 0,
  935. 0, 0, width, height,
  936. format, type, host_ptr);
  937. CHECK_GLERROR(1);
  938. } else {
  939. debug(user_context)
  940. << "Warning: In copy_to_dev, host buffer is not interleaved. Doing slow interleave.\n";
  941. size_t size = width * height * buf->extent[2] * buf->elem_size;
  942. void *tmp = halide_malloc(user_context, size);
  943. switch (type) {
  944. case GL_UNSIGNED_BYTE:
  945. halide_to_interleaved<uint8_t>(buf, (uint8_t*)tmp, width, height, buf->extent[2]);
  946. break;
  947. case GL_UNSIGNED_SHORT:
  948. halide_to_interleaved<uint16_t>(buf, (uint16_t*)tmp, width, height, buf->extent[2]);
  949. break;
  950. case GL_FLOAT:
  951. halide_to_interleaved<float>(buf, (float*)tmp, width, height, buf->extent[2]);
  952. break;
  953. }
  954. ST.PixelStorei(GL_UNPACK_ALIGNMENT, 1);
  955. ST.TexSubImage2D(GL_TEXTURE_2D, 0,
  956. 0, 0, width, height,
  957. format, type, tmp);
  958. CHECK_GLERROR(1);
  959. halide_free(user_context, tmp);
  960. }
  961. ST.BindTexture(GL_TEXTURE_2D, 0);
  962. buf->host_dirty = false;
  963. halide_use_jit_module();
  964. return 0;
  965. }
  966. // Copy pixel data from a texture to a CPU buffer.
  967. WEAK int get_pixels(void *user_context, buffer_t *buf, GLint format, GLint type, void *dest) {
  968. GLuint tex = get_texture_id(buf);
  969. ST.BindFramebuffer(GL_FRAMEBUFFER, ST.framebuffer_id);
  970. ST.FramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0,
  971. GL_TEXTURE_2D, tex, 0);
  972. // Check that framebuffer is set up correctly
  973. GLenum status = ST.CheckFramebufferStatus(GL_FRAMEBUFFER);
  974. if (status != GL_FRAMEBUFFER_COMPLETE) {
  975. ST.BindFramebuffer(GL_FRAMEBUFFER, 0);
  976. error(user_context)
  977. << "Setting up GL framebuffer " << ST.framebuffer_id << " failed " << status;
  978. return 1;
  979. }
  980. ST.ReadPixels(0, 0, buf->extent[0], buf->extent[1], format, type, dest);
  981. ST.BindFramebuffer(GL_FRAMEBUFFER, 0);
  982. return 0;
  983. }
  984. // Copy image data from texture back to host memory.
  985. WEAK int halide_opengl_copy_to_host(void *user_context, buffer_t *buf) {
  986. CHECK_INITIALIZED(1);
  987. if (!buf->dev_dirty) {
  988. return 0;
  989. }
  990. if (!buf->host || !buf->dev) {
  991. debug_buffer(user_context, buf);
  992. error(user_context) << "Invalid copy_to_host operation: host or dev NULL";
  993. return 1;
  994. }
  995. #ifdef DEBUG_RUNTIME
  996. GLuint tex = get_texture_id(buf);
  997. debug(user_context) << "halide_copy_to_host: " << tex << "\n";
  998. #endif
  999. GLint internal_format, format, type;
  1000. if (!get_texture_format(user_context, buf, &internal_format, &format, &type)) {
  1001. error(user_context) << "Invalid texture format";
  1002. return 1;
  1003. }
  1004. GLint width = buf->extent[0], height = buf->extent[1];
  1005. // To download the texture directly, the colors must be stored interleaved
  1006. // and rows must be stored consecutively.
  1007. bool is_interleaved = (buf->stride[2] == 1 && buf->stride[0] == buf->extent[2]);
  1008. bool is_packed = (buf->stride[1] == buf->extent[0] * buf->stride[0]);
  1009. if (is_interleaved && is_packed) {
  1010. ST.PixelStorei(GL_PACK_ALIGNMENT, 1);
  1011. uint8_t *host_ptr = buf->host + buf->elem_size *
  1012. (buf->min[0] * buf->stride[0] +
  1013. buf->min[1] * buf->stride[1] +
  1014. buf->min[2] * buf->stride[2] +
  1015. buf->min[3] * buf->stride[3]);
  1016. get_pixels(user_context, buf, format, type, host_ptr);
  1017. } else {
  1018. debug(user_context)
  1019. << "Warning: In copy_to_host, host buffer is not interleaved. Doing slow deinterleave.\n";
  1020. size_t stride = width * buf->extent[2] * buf->elem_size;
  1021. size_t size = height * stride;
  1022. uint8_t *tmp = (uint8_t*)halide_malloc(user_context, size);
  1023. if (!tmp) {
  1024. error(user_context) << "halide_malloc failed inside copy_to_host";
  1025. return -1;
  1026. }
  1027. ST.PixelStorei(GL_PACK_ALIGNMENT, 1);
  1028. if (int err = get_pixels(user_context, buf, format, type, tmp)) {
  1029. halide_free(user_context, tmp);
  1030. return err;
  1031. }
  1032. switch (type) {
  1033. case GL_UNSIGNED_BYTE:
  1034. interleaved_to_halide<uint8_t>(buf, (uint8_t*)tmp, width, height, buf->extent[2]);
  1035. break;
  1036. case GL_UNSIGNED_SHORT:
  1037. interleaved_to_halide<uint16_t>(buf, (uint16_t*)tmp, width, height, buf->extent[2]);
  1038. break;
  1039. case GL_FLOAT:
  1040. interleaved_to_halide<float>(buf, (float*)tmp, width, height, buf->extent[2]);
  1041. break;
  1042. }
  1043. halide_free(user_context, tmp);
  1044. }
  1045. CHECK_GLERROR(1);
  1046. buf->dev_dirty = false;
  1047. return 0;
  1048. }
  1049. WEAK void set_int_param(void *user_context, const char *name,
  1050. GLint loc, GLint value) {
  1051. ST.Uniform1iv(loc, 1, &value);
  1052. }
  1053. WEAK void set_float_param(void *user_context, const char *name,
  1054. GLint loc, GLfloat value) {
  1055. ST.Uniform1fv(loc, 1, &value);
  1056. }
  1057. class IndexSorter {
  1058. public:
  1059. IndexSorter(float* values_) : values(values_) { }
  1060. bool operator()(int a, int b) { return values[a] < values[b]; }
  1061. float* values;
  1062. };
  1063. WEAK int halide_opengl_dev_run(
  1064. void *user_context,
  1065. void *state_ptr,
  1066. const char *entry_name,
  1067. int blocksX, int blocksY, int blocksZ,
  1068. int threadsX, int threadsY, int threadsZ,
  1069. int shared_mem_bytes,
  1070. size_t arg_sizes[],
  1071. void *args[],
  1072. int num_padded_attributes,
  1073. float* vertex_buffer,
  1074. int num_coords_dim0,
  1075. int num_coords_dim1) {
  1076. CHECK_INITIALIZED(1);
  1077. ModuleState *mod = (ModuleState *)state_ptr;
  1078. if (!mod) {
  1079. error(user_context) << "Internal error: module state is NULL";
  1080. return -1;
  1081. }
  1082. KernelInfo *kernel = mod->kernel;
  1083. if (!kernel) {
  1084. error(user_context) << "Internal error: unknown kernel named '" << entry_name << "'";
  1085. return 1;
  1086. }
  1087. ST.UseProgram(kernel->program_id);
  1088. CHECK_GLERROR(1);
  1089. // TODO(abstephensg) it would be great to codegen these vec4 uniform buffers
  1090. // directly, instead of passing an array of arguments and then copying them
  1091. // out at runtime.
  1092. // Determine the number of float and int uniform parameters. This code
  1093. // follows the argument packing convention in CodeGen_GPU_Host and
  1094. // CodeGen_OpenGL_Dev
  1095. int num_uniform_floats = 0;
  1096. int num_uniform_ints = 0;
  1097. Argument *kernel_arg = kernel->arguments;
  1098. for (int i = 0; args[i]; i++, kernel_arg = kernel_arg->next) {
  1099. // Check for a mismatch between the number of arguments declared in the
  1100. // fragment shader source header and the number passed to this function
  1101. if (!kernel_arg) {
  1102. error(user_context)
  1103. << "Too many arguments passed to halide_opengl_dev_run\n"
  1104. << "Argument " << i << ": size=" << i << " value=" << args[i];
  1105. return 1;
  1106. }
  1107. // Count the number of float and int uniform parameters.
  1108. if (kernel_arg->kind == Argument::Uniform) {
  1109. switch (kernel_arg->type) {
  1110. case Argument::Float:
  1111. // Integer parameters less than 32 bits wide are passed as
  1112. // normalized float values
  1113. case Argument::Int8:
  1114. case Argument::UInt8:
  1115. case Argument::Int16:
  1116. case Argument::UInt16:
  1117. ++num_uniform_floats;
  1118. break;
  1119. case Argument::Bool:
  1120. case Argument::Int32:
  1121. case Argument::UInt32:
  1122. ++num_uniform_ints;
  1123. break;
  1124. default:
  1125. error(user_context) << "GLSL: Encountered invalid kernel argument type";
  1126. return 1;
  1127. }
  1128. }
  1129. }
  1130. // Pad up to a multiple of four
  1131. int num_padded_uniform_floats = (num_uniform_floats + 0x3) & ~0x3;
  1132. int num_padded_uniform_ints = (num_uniform_ints + 0x3) & ~0x3;
  1133. // Allocate storage for the packed arguments
  1134. float uniform_float[num_padded_uniform_floats];
  1135. int uniform_int[num_padded_uniform_ints];
  1136. bool bind_render_targets = true;
  1137. // Copy input arguments to corresponding GLSL uniforms.
  1138. GLint num_active_textures = 0;
  1139. int uniform_float_idx = 0;
  1140. int uniform_int_idx = 0;
  1141. kernel_arg = kernel->arguments;
  1142. for (int i = 0; args[i]; i++, kernel_arg = kernel_arg->next) {
  1143. if (kernel_arg->kind == Argument::Outbuf) {
  1144. // Check if the output buffer will be bound by the client instead of
  1145. // the Halide runtime
  1146. GLuint tex = *((GLuint *)args[i]);
  1147. if (tex == (GLuint)HALIDE_GLSL_CLIENT_BOUND) {
  1148. bind_render_targets = false;
  1149. }
  1150. // Outbuf textures are handled explicitly below
  1151. continue;
  1152. } else if (kernel_arg->kind == Argument::Inbuf) {
  1153. GLint loc =
  1154. ST.GetUniformLocation(kernel->program_id, kernel_arg->name);
  1155. CHECK_GLERROR(1);
  1156. if (loc == -1) {
  1157. error(user_context) << "No sampler defined for input texture.";
  1158. return 1;
  1159. }
  1160. GLuint tex = *((GLuint *)args[i]);
  1161. ST.ActiveTexture(GL_TEXTURE0 + num_active_textures);
  1162. ST.BindTexture(GL_TEXTURE_2D, tex);
  1163. // Textures not created by the Halide runtime might not have
  1164. // parameters set, or might have had parameters set differently
  1165. ST.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
  1166. ST.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
  1167. ST.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
  1168. ST.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
  1169. ST.Uniform1iv(loc, 1, &num_active_textures);
  1170. num_active_textures++;
  1171. // TODO: check maximum number of active textures
  1172. } else if (kernel_arg->kind == Argument::Uniform) {
  1173. // Copy the uniform parameter into the packed scalar list
  1174. // corresponding to its type.
  1175. // Note: small integers are represented as floats in GLSL.
  1176. switch (kernel_arg->type) {
  1177. case Argument::Float:
  1178. uniform_float[uniform_float_idx++] = *(float*)args[i];
  1179. break;
  1180. case Argument::Bool:
  1181. uniform_int[uniform_int_idx++] = *((bool*)args[i]) ? 1 : 0;
  1182. break;
  1183. case Argument::Int8:
  1184. uniform_float[uniform_float_idx++] = *((int8_t*)args[i]);
  1185. break;
  1186. case Argument::UInt8:
  1187. uniform_float[uniform_float_idx++] = *((uint8_t*)args[i]);
  1188. break;
  1189. case Argument::Int16: {
  1190. uniform_float[uniform_float_idx++] = *((int16_t*)args[i]);
  1191. break;
  1192. }
  1193. case Argument::UInt16: {
  1194. uniform_float[uniform_float_idx++] = *((uint16_t*)args[i]);
  1195. break;
  1196. }
  1197. case Argument::Int32: {
  1198. uniform_int[uniform_int_idx++] = *((int32_t*)args[i]);
  1199. break;
  1200. }
  1201. case Argument::UInt32: {
  1202. uint32_t value = *((uint32_t*)args[i]);
  1203. if (value > 0x7fffffff) {
  1204. error(user_context)
  1205. << "GLSL: argument '" << kernel_arg->name << "' is too large for GLint";
  1206. return -1;
  1207. }
  1208. uniform_int[uniform_int_idx++] = static_cast<GLint>(value);
  1209. break;
  1210. }
  1211. case Argument::Void:
  1212. error(user_context) <<"GLSL: Encountered invalid kernel argument type";
  1213. return 1;
  1214. }
  1215. }
  1216. }
  1217. if (kernel_arg) {
  1218. halide_error(user_context, "Too few arguments passed to halide_opengl_dev_run");
  1219. return 1;
  1220. }
  1221. // Set the packed uniform int parameters
  1222. for (int idx = 0; idx != num_padded_uniform_ints; idx += 4) {
  1223. // Produce the uniform parameter name without using the std library.
  1224. Printer<StringStreamPrinter,16> name(user_context);
  1225. name << "_uniformi" << (idx/4);
  1226. GLint loc = ST.GetUniformLocation(kernel->program_id, name.str());
  1227. CHECK_GLERROR(1);
  1228. if (loc == -1) {
  1229. // Argument was probably optimized away by GLSL compiler.
  1230. continue;
  1231. }
  1232. ST.Uniform4iv(loc,1,&uniform_int[idx]);
  1233. }
  1234. // Set the packed uniform float parameters
  1235. for (int idx = 0; idx != num_padded_uniform_floats; idx += 4) {
  1236. // Produce the uniform parameter name without using the std library.
  1237. Printer<StringStreamPrinter,16> name(user_context);
  1238. name << "_uniformf" << (idx/4);
  1239. GLint loc = ST.GetUniformLocation(kernel->program_id, name.str());
  1240. CHECK_GLERROR(1);
  1241. if (loc == -1) {
  1242. // Argument was probably optimized away by GLSL compiler.
  1243. continue;
  1244. }
  1245. ST.Uniform4fv(loc,1,&uniform_float[idx]);
  1246. }
  1247. // Prepare framebuffer for rendering to output textures.
  1248. GLint output_min[2] = { 0, 0 };
  1249. GLint output_extent[2] = { 0, 0 };
  1250. if (bind_render_targets) {
  1251. ST.BindFramebuffer(GL_FRAMEBUFFER, ST.framebuffer_id);
  1252. }
  1253. ST.Disable(GL_CULL_FACE);
  1254. ST.Disable(GL_DEPTH_TEST);
  1255. GLint num_output_textures = 0;
  1256. kernel_arg = kernel->arguments;
  1257. for (int i = 0; args[i]; i++, kernel_arg = kernel_arg->next) {
  1258. if (kernel_arg->kind != Argument::Outbuf) continue;
  1259. // TODO: GL_MAX_COLOR_ATTACHMENTS
  1260. if (num_output_textures >= 1) {
  1261. error(user_context)
  1262. << "OpenGL ES 2.0 only supports one single output texture";
  1263. return 1;
  1264. }
  1265. GLuint tex = *((GLuint*)args[i]);
  1266. // Check to see if the object name is actually a FBO
  1267. if (bind_render_targets) {
  1268. debug(user_context)
  1269. << "Output texture " << num_output_textures << ": " << tex << "\n";
  1270. ST.FramebufferTexture2D(GL_FRAMEBUFFER,
  1271. GL_COLOR_ATTACHMENT0 + num_output_textures,
  1272. GL_TEXTURE_2D, tex, 0);
  1273. CHECK_GLERROR(1);
  1274. }
  1275. TextureInfo *texinfo = find_texture(tex);
  1276. if (!texinfo) {
  1277. error(user_context) << "Undefined output texture";
  1278. return 1;
  1279. }
  1280. output_min[0] = texinfo->min[0];
  1281. output_min[1] = texinfo->min[1];
  1282. output_extent[0] = texinfo->extent[0];
  1283. output_extent[1] = texinfo->extent[1];
  1284. num_output_textures++;
  1285. }
  1286. // TODO: GL_MAX_DRAW_BUFFERS
  1287. if (num_output_textures == 0) {
  1288. error(user_context) << "kernel has no output";
  1289. // TODO: cleanup
  1290. return 1;
  1291. } else {
  1292. GLenum *draw_buffers = (GLenum*)
  1293. malloc(num_output_textures * sizeof(GLenum));
  1294. for (int i=0; i<num_output_textures; i++)
  1295. draw_buffers[i] = GL_COLOR_ATTACHMENT0 + i;
  1296. // TODO: disabled for now, since OpenGL ES 2 doesn't support multiple render
  1297. // targets.
  1298. // ST.DrawBuffers(num_output_textures, draw_buffers);
  1299. free(draw_buffers);
  1300. CHECK_GLERROR(1);
  1301. }
  1302. if (bind_render_targets) {
  1303. // Check that framebuffer is set up correctly
  1304. GLenum status = ST.CheckFramebufferStatus(GL_FRAMEBUFFER);
  1305. CHECK_GLERROR(1);
  1306. if (status != GL_FRAMEBUFFER_COMPLETE) {
  1307. error(user_context)
  1308. << "Setting up GL framebuffer " << ST.framebuffer_id
  1309. << " failed (" << status << ")";
  1310. // TODO: cleanup
  1311. return 1;
  1312. }
  1313. }
  1314. // Set vertex attributes
  1315. GLint loc = ST.GetUniformLocation(kernel->program_id, "output_extent");
  1316. ST.Uniform2iv(loc, 1, output_extent);
  1317. CHECK_GLERROR(1);
  1318. loc = ST.GetUniformLocation(kernel->program_id, "output_min");
  1319. ST.Uniform2iv(loc, 1, output_min);
  1320. CHECK_GLERROR(1);
  1321. #if 0 // DEBUG_RUNTIME
  1322. debug(user_context) << "output_extent: " << output_extent[0] << "," << output_extent[1] << "\n";
  1323. debug(user_context) << "output_min: " << output_min[0] << "," << output_min[1] << "\n";
  1324. #endif
  1325. // TODO(abestephensg): Sort coordinate dimensions when the linear solver is integrated
  1326. // Sort the coordinates
  1327. // Construct an element buffer using the sorted vertex order
  1328. int width = num_coords_dim0;
  1329. int height = num_coords_dim1;
  1330. int vertex_buffer_size = width*height*num_padded_attributes;
  1331. int element_buffer_size = (width-1)*(height-1)*6;
  1332. int element_buffer[element_buffer_size];
  1333. int idx = 0;
  1334. for (int h=0;h!=(height-1);++h) {
  1335. for (int w=0;w!=(width-1);++w) {
  1336. // TODO(abestephensg): Use sorted coordinates when integrated
  1337. int v = w+h*width;
  1338. element_buffer[idx++] = v;
  1339. element_buffer[idx++] = v+1;
  1340. element_buffer[idx++] = v+width+1;
  1341. element_buffer[idx++] = v+width+1;
  1342. element_buffer[idx++] = v+width;
  1343. element_buffer[idx++] = v;
  1344. }
  1345. }
  1346. #if 0 // DEBUG_RUNTIME
  1347. debug(user_context) << "Vertex buffer:";
  1348. for (int i=0;i!=vertex_buffer_size;++i) {
  1349. if (!(i%num_padded_attributes)) {
  1350. debug(user_context) << "\n";
  1351. }
  1352. debug(user_context) << vertex_buffer[i] << " ";
  1353. }
  1354. debug(user_context) << "\n";
  1355. debug(user_context) << "\n";
  1356. debug(user_context) << "Element buffer:";
  1357. for (int i=0;i!=element_buffer_size;++i) {
  1358. if (!(i%3)) {
  1359. debug(user_context) << "\n";
  1360. }
  1361. debug(user_context) << element_buffer[i] << " ";
  1362. }
  1363. debug(user_context) << "\n";
  1364. #endif
  1365. // Setup viewport
  1366. ST.Viewport(0, 0, output_extent[0], output_extent[1]);
  1367. // Setup the vertex and element buffers
  1368. GLuint vertex_array_object = 0;
  1369. if (ST.have_vertex_array_objects) {
  1370. ST.GenVertexArrays(1,&vertex_array_object);
  1371. ST.BindVertexArray(vertex_array_object);
  1372. }
  1373. GLuint vertex_buffer_id;
  1374. ST.GenBuffers(1,&vertex_buffer_id);
  1375. ST.BindBuffer(GL_ARRAY_BUFFER, vertex_buffer_id);
  1376. ST.BufferData(GL_ARRAY_BUFFER, sizeof(float)*vertex_buffer_size, vertex_buffer, GL_STATIC_DRAW);
  1377. CHECK_GLERROR(1);
  1378. GLuint element_buffer_id;
  1379. ST.GenBuffers(1,&element_buffer_id);
  1380. ST.BindBuffer(GL_ELEMENT_ARRAY_BUFFER, element_buffer_id);
  1381. ST.BufferData(GL_ELEMENT_ARRAY_BUFFER, sizeof(float)*element_buffer_size, element_buffer, GL_STATIC_DRAW);
  1382. CHECK_GLERROR(1);
  1383. // The num_padded_attributes argument is the number of vertex attributes,
  1384. // including the spatial x and y coordinates, padded up to a multiple of
  1385. // four so that the attributes may be packed into vec4 slots.
  1386. int num_packed_attributes = num_padded_attributes/4;
  1387. // Set up the per vertex attributes
  1388. GLint attrib_ids[num_packed_attributes];
  1389. for (int i=0;i!=num_packed_attributes;i++) {
  1390. // The attribute names can synthesized by the runtime based on the
  1391. // number of packed varying attributes
  1392. Printer<StringStreamPrinter> attribute_name(user_context);
  1393. attribute_name << "_varyingf" << i << "_attrib";
  1394. // TODO(abstephensg): Switch to glBindAttribLocation
  1395. GLint attrib_id = ST.GetAttribLocation(kernel->program_id, attribute_name.buf);
  1396. attrib_ids[i] = attrib_id;
  1397. // Check to see if the varying attribute was simplified out of the
  1398. // program by the GLSL compiler.
  1399. if (attrib_id == -1) {
  1400. continue;
  1401. }
  1402. ST.VertexAttribPointer(attrib_id, 4, GL_FLOAT, GL_FALSE /* Normalized */, sizeof(GLfloat)*num_padded_attributes, (void*)(i*sizeof(GLfloat)*4));
  1403. CHECK_GLERROR(1);
  1404. ST.EnableVertexAttribArray(attrib_id);
  1405. CHECK_GLERROR(1);
  1406. }
  1407. // Draw the scene
  1408. ST.DrawElements(GL_TRIANGLES, element_buffer_size, GL_UNSIGNED_INT, NULL);
  1409. CHECK_GLERROR(1);
  1410. for (int i=0;i!=num_packed_attributes;++i) {
  1411. if (attrib_ids[i] != -1)
  1412. ST.DisableVertexAttribArray(attrib_ids[i]);
  1413. }
  1414. // Cleanup
  1415. for (int i = 0; i < num_active_textures; i++) {
  1416. ST.ActiveTexture(GL_TEXTURE0 + i);
  1417. ST.BindTexture(GL_TEXTURE_2D, 0);
  1418. }
  1419. if (bind_render_targets) {
  1420. ST.BindFramebuffer(GL_FRAMEBUFFER, 0);
  1421. }
  1422. ST.BindBuffer(GL_ARRAY_BUFFER, 0);
  1423. ST.BindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0);
  1424. if (ST.have_vertex_array_objects) {
  1425. ST.BindVertexArray(0);
  1426. ST.DeleteVertexArrays(1,&vertex_array_object);
  1427. }
  1428. ST.DeleteBuffers(1,&vertex_buffer_id);
  1429. ST.DeleteBuffers(1,&element_buffer_id);
  1430. return 0;
  1431. }
  1432. }}} // namespace Halide::Runtime::Internal
  1433. extern "C" {
  1434. WEAK void halide_opengl_context_lost(void *user_context) {
  1435. if (!ST.initialized) return;
  1436. debug(user_context) << "halide_opengl_context_lost\n";
  1437. for (ModuleState *mod = state_list; mod; mod = mod->next) {
  1438. // Reset program handle to force recompilation.
  1439. mod->kernel->program_id = 0;
  1440. }
  1441. TextureInfo *tex = ST.textures;
  1442. while (tex) {
  1443. TextureInfo *next = tex->next;
  1444. free(tex);
  1445. tex = next;
  1446. }
  1447. global_state.init();
  1448. return;
  1449. }
  1450. WEAK uint64_t halide_opengl_output_client_bound() {
  1451. return HALIDE_GLSL_CLIENT_BOUND;
  1452. }
  1453. // Create wrappers that satisfy old naming conventions
  1454. WEAK void halide_release(void *user_context) {
  1455. halide_opengl_release(user_context);
  1456. }
  1457. namespace {
  1458. __attribute__((destructor))
  1459. WEAK void halide_opengl_cleanup() {
  1460. halide_release(NULL);
  1461. }
  1462. }
  1463. WEAK int halide_dev_malloc(void *user_context, buffer_t *buf) {
  1464. return halide_opengl_dev_malloc(user_context, buf);
  1465. }
  1466. WEAK int halide_dev_free(void *user_context, buffer_t *buf) {
  1467. return halide_opengl_dev_free(user_context, buf);
  1468. }
  1469. WEAK int halide_copy_to_host(void *user_context, buffer_t *buf) {
  1470. return halide_opengl_copy_to_host(user_context, buf);
  1471. }
  1472. WEAK int halide_copy_to_dev(void *user_context, buffer_t *buf) {
  1473. return halide_opengl_copy_to_dev(user_context, buf);
  1474. }
  1475. WEAK int halide_dev_run(void *user_context,
  1476. void *state_ptr,
  1477. const char *entry_name,
  1478. int blocksX, int blocksY, int blocksZ,
  1479. int threadsX, int threadsY, int threadsZ,
  1480. int shared_mem_bytes,
  1481. size_t arg_sizes[], void *args[],
  1482. int num_padded_attributes,
  1483. float* vertex_buffer,
  1484. int num_coords_dim0,
  1485. int num_coords_dim1) {
  1486. return halide_opengl_dev_run(user_context, state_ptr,
  1487. entry_name,
  1488. blocksX, blocksY, blocksZ,
  1489. threadsX, threadsY, threadsY,
  1490. shared_mem_bytes,
  1491. arg_sizes, args,
  1492. num_padded_attributes,
  1493. vertex_buffer,
  1494. num_coords_dim0,
  1495. num_coords_dim1);
  1496. }
  1497. WEAK int halide_dev_sync(void *user_context) {
  1498. return halide_opengl_dev_sync(user_context);
  1499. }
  1500. WEAK int halide_init_kernels(void *user_context, void **state_ptr,
  1501. const char *src, int size) {
  1502. return halide_opengl_init_kernels(user_context, state_ptr, src, size);
  1503. }
  1504. }