/opengles/src/arm/CodeGenerator.cpp

http://ftk.googlecode.com/ · C++ · 501 lines · 316 code · 132 blank · 53 comment · 16 complexity · 5a8cc9516310fb537d34093b287c2b63 MD5 · raw file

  1. // ==========================================================================
  2. //
  3. // CodeGenerator.cpp JIT Class for 3D Rendering Library
  4. //
  5. // This file contains the rasterizer functions that
  6. // implement the runtime code generation support
  7. // for optimized scan line rasterization routines.
  8. //
  9. // --------------------------------------------------------------------------
  10. //
  11. // 12-29-2003 Hans-Martin Will initial version
  12. //
  13. // --------------------------------------------------------------------------
  14. //
  15. // Copyright (c) 2004, Hans-Martin Will. All rights reserved.
  16. //
  17. // Redistribution and use in source and binary forms, with or without
  18. // modification, are permitted provided that the following conditions are
  19. // met:
  20. //
  21. // * Redistributions of source code must retain the above copyright
  22. // notice, this list of conditions and the following disclaimer.
  23. // * Redistributions in binary form must reproduce the above copyright
  24. // notice, this list of conditions and the following disclaimer in the
  25. // documentation and/or other materials provided with the distribution.
  26. //
  27. // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  28. // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  29. // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  30. // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  31. // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
  32. // OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  33. // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  34. // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  35. // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  36. // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  37. // THE POSSIBILITY OF SUCH DAMAGE.
  38. //
  39. // ==========================================================================
  40. #include "stdafx.h"
  41. #include "CodeGenerator.h"
  42. #include "Rasterizer.h"
  43. #include "FunctionCache.h"
  44. #include "Surface.h"
  45. #include "Texture.h"
  46. #include "codegen.h"
  47. #include "instruction.h"
  48. #include "emit.h"
  49. #include "arm-dis.h"
  50. #ifdef EGL_ON_WINCE
  51. // --------------------------------------------------------------------------
  52. // These declarations for coredll are extracted from platform builder
  53. // source code
  54. // --------------------------------------------------------------------------
  55. /* Flags for CacheSync/CacheRangeFlush */
  56. #define CACHE_SYNC_DISCARD 0x001 /* write back & discard all cached data */
  57. #define CACHE_SYNC_INSTRUCTIONS 0x002 /* discard all cached instructions */
  58. #define CACHE_SYNC_WRITEBACK 0x004 /* write back but don't discard data cache*/
  59. #define CACHE_SYNC_FLUSH_I_TLB 0x008 /* flush I-TLB */
  60. #define CACHE_SYNC_FLUSH_D_TLB 0x010 /* flush D-TLB */
  61. #define CACHE_SYNC_FLUSH_TLB (CACHE_SYNC_FLUSH_I_TLB|CACHE_SYNC_FLUSH_D_TLB) /* flush all TLB */
  62. #define CACHE_SYNC_L2_WRITEBACK 0x020 /* write-back L2 Cache */
  63. #define CACHE_SYNC_L2_DISCARD 0x040 /* discard L2 Cache */
  64. #define CACHE_SYNC_ALL 0x07F /* sync and discard everything in Cache/TLB */
  65. extern "C" {
  66. void CacheSync(int flags);
  67. void CacheRangeFlush (LPVOID pAddr, DWORD dwLength, DWORD dwFlags);
  68. }
  69. #endif
  70. using namespace EGL;
  71. #define ALLOC_REG(reg) reg = cg_virtual_reg_create(procedure, cg_reg_type_general)
  72. #define ALLOC_FLAGS(reg) reg = cg_virtual_reg_create(procedure, cg_reg_type_flags)
  73. #define DECL_REG(reg) cg_virtual_reg_t * reg = cg_virtual_reg_create(procedure, cg_reg_type_general)
  74. #define DECL_FLAGS(reg) cg_virtual_reg_t * reg = cg_virtual_reg_create(procedure, cg_reg_type_flags)
  75. #define DECL_CONST_REG(reg, value) cg_virtual_reg_t * reg = cg_virtual_reg_create(procedure, cg_reg_type_general); LDI(reg, value)
  76. namespace {
  77. void Dump(const char * filename, cg_module_t * module)
  78. {
  79. FILE * fp = fopen(filename, "w");
  80. cg_module_dump(module, fp);
  81. fclose(fp);
  82. }
  83. }
  84. void CodeGenerator :: Compile(FunctionCache * target, FunctionCache::FunctionType type,
  85. void (CodeGenerator::*function)()) {
  86. cg_heap_t * heap = cg_heap_create(4096);
  87. cg_module_t * module = cg_module_create(heap);
  88. m_Module = module;
  89. (this->*function)();
  90. #ifdef DEBUG
  91. Dump("dump1.txt", m_Module);
  92. #endif
  93. cg_module_inst_def(m_Module);
  94. cg_module_amode(m_Module);
  95. #ifdef DEBUG
  96. Dump("dump2.txt", m_Module);
  97. #endif
  98. cg_module_eliminate_dead_code(m_Module);
  99. #ifdef DEBUG
  100. Dump("dump3.txt", m_Module);
  101. #endif
  102. cg_module_unify_registers(m_Module);
  103. cg_module_allocate_variables(m_Module);
  104. cg_module_inst_use_chains(m_Module);
  105. //cg_module_reorder_instructions(m_Module);
  106. #ifdef DEBUG
  107. Dump("dump35.txt", m_Module);
  108. #endif
  109. cg_module_dataflow(m_Module);
  110. cg_module_interferences(m_Module);
  111. #ifdef DEBUG
  112. Dump("dump4.txt", m_Module);
  113. #endif
  114. cg_runtime_info_t runtime;
  115. memset(&runtime, 0, sizeof runtime);
  116. runtime.div = div;
  117. runtime.div_HP_16_32s = EGL_Div;
  118. runtime.div_LP_16_32s = EGL_Div;
  119. runtime.inv_HP_16_32s = EGL_Inverse;
  120. runtime.inv_LP_16_32s = EGL_Inverse;
  121. runtime.inv_sqrt_HP_16_32s = EGL_InvSqrt;
  122. runtime.inv_sqrt_LP_16_32s = EGL_InvSqrt;
  123. runtime.sqrt_HP_16_32s = EGL_Sqrt;
  124. runtime.sqrt_LP_16_32s = EGL_Sqrt;
  125. cg_processor_info_t processor;
  126. #ifdef EGL_XSCALE
  127. processor.useV5 = 1;
  128. #else
  129. processor.useV5 = 0;
  130. #endif
  131. cg_codegen_t * codegen = cg_codegen_create(heap, &runtime, &processor);
  132. cg_codegen_emit_module(codegen, m_Module);
  133. cg_codegen_fix_refs(codegen);
  134. cg_segment_t * cseg = cg_codegen_segment(codegen);
  135. #ifdef DEBUG
  136. ARMDis dis;
  137. armdis_init(&dis);
  138. armdis_dump(&dis, "dump5.txt", cseg);
  139. #endif
  140. void * targetBuffer =
  141. target->AddFunction(type,
  142. *m_State, cg_segment_size(cseg));
  143. cg_segment_get_block(cseg, 0, targetBuffer, cg_segment_size(cseg));
  144. #if defined(EGL_ON_WINCE) && (defined(ARM) || defined(_ARM_))
  145. // flush data cache and clear instruction cache to make new code visible to execution unit
  146. CacheSync(CACHE_SYNC_INSTRUCTIONS | CACHE_SYNC_WRITEBACK);
  147. #endif
  148. cg_codegen_destroy(codegen);
  149. cg_heap_destroy(module->heap);
  150. }
  151. cg_virtual_reg_t * CodeGenerator :: Mul255(cg_block_t * block, cg_virtual_reg_t * first, cg_virtual_reg_t * second) {
  152. cg_proc_t * procedure = block->proc;
  153. DECL_REG (regProduct);
  154. DECL_CONST_REG (constant8, 8);
  155. DECL_REG (regShifted);
  156. DECL_REG (regAdjusted);
  157. DECL_REG (regFinal);
  158. MUL (regProduct, first, second);
  159. ASR (regShifted, regProduct, constant8);
  160. ADD (regAdjusted, regProduct, regShifted);
  161. ASR (regFinal, regAdjusted, constant8);
  162. return regFinal;
  163. }
  164. cg_virtual_reg_t * CodeGenerator :: AddSaturate255(cg_block_t * block, cg_virtual_reg_t * first, cg_virtual_reg_t * second) {
  165. cg_proc_t * procedure = block->proc;
  166. DECL_REG (regSum);
  167. DECL_CONST_REG (constant255, 0xff);
  168. DECL_REG (regResult);
  169. ADD (regSum, first, second);
  170. MIN (regResult, regSum, constant255);
  171. return regResult;
  172. }
  173. cg_virtual_reg_t * CodeGenerator :: ClampTo255(cg_block_t * block, cg_virtual_reg_t * value) {
  174. cg_proc_t * procedure = block->proc;
  175. DECL_CONST_REG (constant0, 0);
  176. DECL_CONST_REG (constant17, 17);
  177. DECL_CONST_REG (constant1, 0x10000);
  178. DECL_CONST_REG (constantFactor, 0x1ff);
  179. DECL_REG (regClamped0);
  180. DECL_REG (regClamped1);
  181. DECL_REG (regAdjusted);
  182. DECL_REG (regResult);
  183. MAX (regClamped0, value, constant0);
  184. MIN (regClamped1, regClamped0, constant1);
  185. MUL (regAdjusted, regClamped1, constantFactor);
  186. LSR (regResult, regAdjusted, constant17);
  187. return regResult;
  188. }
  189. cg_virtual_reg_t * CodeGenerator :: AddSigned(cg_block_t * block, cg_virtual_reg_t * first, cg_virtual_reg_t * second) {
  190. cg_proc_t * procedure = block->proc;
  191. DECL_REG (regResult);
  192. DECL_REG (regSum);
  193. DECL_REG (regAdjusted);
  194. DECL_CONST_REG (constantHalf, 0x80);
  195. DECL_CONST_REG (constant0, 0);
  196. ADD (regSum, first, second);
  197. SUB (regAdjusted, regSum, constantHalf);
  198. MAX (regResult, regAdjusted, constant0);
  199. return regResult;
  200. }
  201. cg_virtual_reg_t * CodeGenerator :: Add(cg_block_t * block, cg_virtual_reg_t * first, cg_virtual_reg_t * second) {
  202. cg_proc_t * procedure = block->proc;
  203. DECL_REG (regResult);
  204. DECL_REG (regSum);
  205. ADD (regResult, first, second);
  206. return regResult;
  207. }
  208. cg_virtual_reg_t * CodeGenerator :: Sub(cg_block_t * block, cg_virtual_reg_t * first, cg_virtual_reg_t * second) {
  209. cg_proc_t * procedure = block->proc;
  210. DECL_REG (regResult);
  211. SUB (regResult, first, second);
  212. return regResult;
  213. }
  214. cg_virtual_reg_t * CodeGenerator :: ExtractBitFieldTo255(cg_block_t * block, cg_virtual_reg_t * value, size_t low, size_t high) {
  215. cg_proc_t * procedure = block->proc;
  216. if (high == low) {
  217. if (high < 8) {
  218. DECL_REG (regShifted);
  219. DECL_CONST_REG (constantShift, 8 - high);
  220. LSL (regShifted, value, constantShift);
  221. value = regShifted;
  222. } else if (high > 8) {
  223. DECL_REG (regShifted);
  224. DECL_CONST_REG (constantShift, high - 8);
  225. LSR (regShifted, value, constantShift);
  226. value = regShifted;
  227. }
  228. DECL_CONST_REG (constantMask, 0x100);
  229. DECL_REG (regMasked);
  230. AND (regMasked, value, constantMask);
  231. DECL_CONST_REG (constant8, 8);
  232. DECL_REG (regShifted);
  233. DECL_REG (regAdjusted);
  234. LSR (regShifted, value, constant8);
  235. SUB (regAdjusted, value, regShifted);
  236. return regAdjusted;
  237. }
  238. if (high < 7) {
  239. DECL_REG (regShifted);
  240. DECL_CONST_REG (constantShift, 7 - high);
  241. LSL (regShifted, value, constantShift);
  242. value = regShifted;
  243. } else if (high > 7) {
  244. DECL_REG (regShifted);
  245. DECL_CONST_REG (constantShift, high - 7);
  246. LSR (regShifted, value, constantShift);
  247. value = regShifted;
  248. }
  249. size_t bits = high - low + 1;
  250. static const U8 mask[9] = { 0, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe, 0xff };
  251. DECL_CONST_REG (constantMask, mask[bits]);
  252. DECL_REG (regMasked);
  253. AND (regMasked, value, constantMask);
  254. value = regMasked;
  255. while (bits < 8) {
  256. DECL_CONST_REG (constantShift, bits);
  257. DECL_REG (regShifted);
  258. DECL_REG (regOred);
  259. LSR (regShifted, value, constantShift);
  260. OR (regOred, value, regShifted);
  261. value = regOred;
  262. bits += 2;
  263. }
  264. return value;
  265. }
  266. cg_virtual_reg_t * CodeGenerator :: BitFieldFrom255(cg_block_t * block, cg_virtual_reg_t * value, size_t low, size_t high) {
  267. cg_proc_t * procedure = block->proc;
  268. size_t bits = high - low + 1;
  269. assert(bits <= 8);
  270. size_t lowBit = 8 - bits;
  271. if (bits != 8) {
  272. static const U8 mask[9] = { 0, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe, 0xff };
  273. DECL_CONST_REG (constantMask, mask[bits]);
  274. DECL_REG (regMasked);
  275. AND (regMasked, value, constantMask);
  276. value = regMasked;
  277. }
  278. if (low > lowBit) {
  279. DECL_CONST_REG (constantShift, low - lowBit);
  280. DECL_REG (regShifted);
  281. LSL (regShifted, value, constantShift);
  282. value = regShifted;
  283. } else if (low < lowBit) {
  284. DECL_CONST_REG (constantShift, lowBit - low);
  285. DECL_REG (regShifted);
  286. LSR (regShifted, value, constantShift);
  287. value = regShifted;
  288. }
  289. return value;
  290. }
  291. // ----------------------------------------------------------------------
  292. // Emit code to convert a representation of a color as individual
  293. // R, G and B components into a 16-bit 565 representation
  294. //
  295. // R, G B are within the range 0..0xff
  296. // ----------------------------------------------------------------------
  297. void CodeGenerator :: Color565FromRGB(cg_block_t * block, cg_virtual_reg_t * regRGB,
  298. cg_virtual_reg_t * r, cg_virtual_reg_t * g, cg_virtual_reg_t * b) {
  299. cg_proc_t * procedure = block->proc;
  300. cg_virtual_reg_t * regFieldR = BitFieldFrom255(block, r, 11, 15);
  301. cg_virtual_reg_t * regFieldG = BitFieldFrom255(block, g, 5, 10);
  302. cg_virtual_reg_t * regFieldB = BitFieldFrom255(block, b, 0, 4);
  303. DECL_REG (regBG);
  304. OR (regBG, regFieldB, regFieldG);
  305. OR (regRGB, regBG, regFieldR);
  306. }
  307. cg_virtual_reg_t * CodeGenerator :: Color565FromRGB(cg_block_t * block,
  308. cg_virtual_reg_t * r, cg_virtual_reg_t * g, cg_virtual_reg_t * b) {
  309. cg_proc_t * procedure = block->proc;
  310. DECL_REG (regResult);
  311. Color565FromRGB(block, regResult, r, g, b);
  312. return regResult;
  313. }
  314. cg_virtual_reg_t * CodeGenerator :: Blend255(cg_block_t * block, cg_virtual_reg_t * first, cg_virtual_reg_t * second,
  315. cg_virtual_reg_t * alpha) {
  316. cg_proc_t * procedure = block->proc;
  317. DECL_REG (regDiff);
  318. SUB (regDiff, second, first); // diff = (second - first)
  319. cg_virtual_reg_t * regProd = Mul255(block, regDiff, alpha); // alpha * (second - first)
  320. return Add(block, first, regProd); // first + alpha * (second - first)
  321. }
  322. cg_virtual_reg_t * CodeGenerator :: Blend255(cg_block_t * block, U8 constant, cg_virtual_reg_t * second,
  323. cg_virtual_reg_t * alpha) {
  324. cg_proc_t * procedure = block->proc;
  325. DECL_CONST_REG (regConst, constant);
  326. return Blend255(block, regConst, second, alpha);
  327. }
  328. cg_virtual_reg_t * CodeGenerator :: SignedVal(cg_block_t * block, cg_virtual_reg_t * value) {
  329. cg_proc_t * procedure = block->proc;
  330. DECL_REG (regShifted);
  331. DECL_CONST_REG (constantShift, 7);
  332. DECL_REG (regExpanded);
  333. DECL_CONST_REG (c128, 128);
  334. DECL_REG (regResult);
  335. // expand 0..255 -> 0..256
  336. LSR (regShifted, value, constantShift);
  337. ADD (regExpanded, value, regShifted);
  338. SUB (regResult, regExpanded, c128);
  339. return regResult;
  340. }
  341. cg_virtual_reg_t * CodeGenerator :: Dot3(cg_block_t * block,
  342. cg_virtual_reg_t * r[], cg_virtual_reg_t * g[], cg_virtual_reg_t * b[]) {
  343. cg_proc_t * procedure = block->proc;
  344. DECL_REG (regProdR);
  345. DECL_REG (regProdG);
  346. DECL_REG (regProdB);
  347. DECL_REG (regSumRG);
  348. DECL_REG (regSumRGB);
  349. MUL (regProdR, SignedVal(block, r[0]), SignedVal(block, r[1]));
  350. MUL (regProdG, SignedVal(block, g[0]), SignedVal(block, g[1]));
  351. ADD (regSumRG, regProdR, regProdG);
  352. MUL (regProdB, SignedVal(block, b[0]), SignedVal(block, b[1]));
  353. ADD (regSumRGB, regSumRG, regProdB);
  354. DECL_CONST_REG (constant6, 6);
  355. DECL_CONST_REG (constant7, 7);
  356. DECL_REG (regShifted6);
  357. DECL_REG (regShifted13);
  358. DECL_REG (regAdjusted);
  359. ASR (regShifted6, regSumRGB, constant6);
  360. ASR (regShifted13, regShifted6, constant7);
  361. SUB (regAdjusted, regShifted6, regShifted13);
  362. DECL_REG (regClamped0);
  363. DECL_REG (regClamped255);
  364. DECL_CONST_REG (constant0, 0);
  365. DECL_CONST_REG (constant255, 255);
  366. MAX (regClamped0, regAdjusted, constant0);
  367. MIN (regClamped255, regClamped0, constant255);
  368. return regClamped255;
  369. }