PageRenderTime 156ms CodeModel.GetById 80ms app.highlight 51ms RepoModel.GetById 20ms app.codeStats 0ms

/opengles/src/arm/CodeGenerator.cpp

http://ftk.googlecode.com/
C++ | 501 lines | 316 code | 132 blank | 53 comment | 16 complexity | 5a8cc9516310fb537d34093b287c2b63 MD5 | raw file
  1// ==========================================================================
  2//
  3// CodeGenerator.cpp	JIT Class for 3D Rendering Library
  4//
  5//						This file contains the rasterizer functions that
  6//						implement the runtime code generation support
  7//						for optimized scan line rasterization routines.
  8//
  9// --------------------------------------------------------------------------
 10//
 11// 12-29-2003		Hans-Martin Will	initial version
 12//
 13// --------------------------------------------------------------------------
 14//
 15// Copyright (c) 2004, Hans-Martin Will. All rights reserved.
 16// 
 17// Redistribution and use in source and binary forms, with or without 
 18// modification, are permitted provided that the following conditions are 
 19// met:
 20// 
 21//	 *  Redistributions of source code must retain the above copyright
 22// 		notice, this list of conditions and the following disclaimer. 
 23//   *	Redistributions in binary form must reproduce the above copyright
 24// 		notice, this list of conditions and the following disclaimer in the 
 25// 		documentation and/or other materials provided with the distribution. 
 26// 
 27// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 28// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
 29// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
 30// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
 31// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, 
 32// OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
 33// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 34// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
 35// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
 36// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 
 37// THE POSSIBILITY OF SUCH DAMAGE.
 38//
 39// ==========================================================================
 40
 41
 42#include "stdafx.h"
 43#include "CodeGenerator.h"
 44#include "Rasterizer.h"
 45#include "FunctionCache.h"
 46#include "Surface.h"
 47#include "Texture.h"
 48#include "codegen.h"
 49#include "instruction.h"
 50#include "emit.h"
 51#include "arm-dis.h"
 52
 53
 54#ifdef EGL_ON_WINCE
 55
 56// --------------------------------------------------------------------------
 57// These declarations for coredll are extracted from platform builder
 58// source code
 59// --------------------------------------------------------------------------
 60
 61/* Flags for CacheSync/CacheRangeFlush */
 62#define CACHE_SYNC_DISCARD      0x001   /* write back & discard all cached data */
 63#define CACHE_SYNC_INSTRUCTIONS 0x002   /* discard all cached instructions */
 64#define CACHE_SYNC_WRITEBACK    0x004   /* write back but don't discard data cache*/
 65#define CACHE_SYNC_FLUSH_I_TLB  0x008   /* flush I-TLB */
 66#define CACHE_SYNC_FLUSH_D_TLB  0x010   /* flush D-TLB */
 67#define CACHE_SYNC_FLUSH_TLB    (CACHE_SYNC_FLUSH_I_TLB|CACHE_SYNC_FLUSH_D_TLB)    /* flush all TLB */
 68#define CACHE_SYNC_L2_WRITEBACK 0x020   /* write-back L2 Cache */
 69#define CACHE_SYNC_L2_DISCARD   0x040   /* discard L2 Cache */
 70
 71#define CACHE_SYNC_ALL          0x07F   /* sync and discard everything in Cache/TLB */
 72
 73extern "C" {
 74	void CacheSync(int flags);
 75	void CacheRangeFlush (LPVOID pAddr, DWORD dwLength, DWORD dwFlags);
 76}
 77
 78#endif
 79
 80using namespace EGL;
 81
 82
 83#define ALLOC_REG(reg) reg = cg_virtual_reg_create(procedure, cg_reg_type_general)
 84#define ALLOC_FLAGS(reg) reg = cg_virtual_reg_create(procedure, cg_reg_type_flags)
 85#define DECL_REG(reg) cg_virtual_reg_t * reg = cg_virtual_reg_create(procedure, cg_reg_type_general)
 86#define DECL_FLAGS(reg) cg_virtual_reg_t * reg = cg_virtual_reg_create(procedure, cg_reg_type_flags)
 87#define DECL_CONST_REG(reg, value) cg_virtual_reg_t * reg = cg_virtual_reg_create(procedure, cg_reg_type_general); LDI(reg, value)
 88
 89
 90namespace {
 91
 92	void Dump(const char * filename, cg_module_t * module)
 93	{
 94		FILE * fp = fopen(filename, "w");
 95		cg_module_dump(module, fp);
 96		fclose(fp);
 97	}
 98
 99}
100
101void CodeGenerator :: Compile(FunctionCache * target, FunctionCache::FunctionType type,
102	void (CodeGenerator::*function)()) {
103
104	cg_heap_t * heap = cg_heap_create(4096);
105	cg_module_t * module = cg_module_create(heap);
106
107	m_Module = module;
108
109	(this->*function)();
110
111#ifdef DEBUG
112	Dump("dump1.txt", m_Module);
113#endif
114
115	cg_module_inst_def(m_Module);
116	cg_module_amode(m_Module);
117
118#ifdef DEBUG
119	Dump("dump2.txt", m_Module);
120#endif
121
122	cg_module_eliminate_dead_code(m_Module);
123
124#ifdef DEBUG
125	Dump("dump3.txt", m_Module);
126#endif
127
128	cg_module_unify_registers(m_Module);
129	cg_module_allocate_variables(m_Module);
130	cg_module_inst_use_chains(m_Module);
131	//cg_module_reorder_instructions(m_Module);
132
133#ifdef DEBUG
134	Dump("dump35.txt", m_Module);
135#endif
136
137	cg_module_dataflow(m_Module);
138	cg_module_interferences(m_Module);
139
140#ifdef DEBUG
141	Dump("dump4.txt", m_Module);
142#endif
143
144	cg_runtime_info_t runtime; 
145	memset(&runtime, 0, sizeof runtime);
146
147	runtime.div = div;
148
149	runtime.div_HP_16_32s = EGL_Div;
150	runtime.div_LP_16_32s = EGL_Div;
151	runtime.inv_HP_16_32s = EGL_Inverse;
152	runtime.inv_LP_16_32s = EGL_Inverse;
153	runtime.inv_sqrt_HP_16_32s = EGL_InvSqrt;
154	runtime.inv_sqrt_LP_16_32s = EGL_InvSqrt;
155	runtime.sqrt_HP_16_32s = EGL_Sqrt;
156	runtime.sqrt_LP_16_32s = EGL_Sqrt;
157
158	cg_processor_info_t processor;
159
160#ifdef EGL_XSCALE
161	processor.useV5 = 1;
162#else
163	processor.useV5 = 0;
164#endif
165
166	cg_codegen_t * codegen = cg_codegen_create(heap, &runtime, &processor);
167	cg_codegen_emit_module(codegen, m_Module);
168	cg_codegen_fix_refs(codegen);
169
170	cg_segment_t * cseg = cg_codegen_segment(codegen);
171
172#ifdef DEBUG
173	ARMDis dis;
174	armdis_init(&dis);
175	armdis_dump(&dis, "dump5.txt", cseg);
176#endif
177
178	void * targetBuffer = 
179		target->AddFunction(type, 
180							*m_State, cg_segment_size(cseg));
181
182	cg_segment_get_block(cseg, 0, targetBuffer, cg_segment_size(cseg));
183
184#if defined(EGL_ON_WINCE) && (defined(ARM) || defined(_ARM_))
185	// flush data cache and clear instruction cache to make new code visible to execution unit
186	CacheSync(CACHE_SYNC_INSTRUCTIONS | CACHE_SYNC_WRITEBACK);		
187#endif
188
189	cg_codegen_destroy(codegen);
190	cg_heap_destroy(module->heap);
191}
192
193
194cg_virtual_reg_t * CodeGenerator :: Mul255(cg_block_t * block, cg_virtual_reg_t * first, cg_virtual_reg_t * second) {
195	cg_proc_t * procedure = block->proc;
196
197	DECL_REG		(regProduct);
198	DECL_CONST_REG	(constant8, 8);
199	DECL_REG		(regShifted);
200	DECL_REG		(regAdjusted);
201	DECL_REG		(regFinal);
202	
203	MUL			(regProduct,	first, second);
204	ASR			(regShifted,	regProduct, constant8);
205	ADD			(regAdjusted,	regProduct, regShifted);
206	ASR			(regFinal,		regAdjusted, constant8);
207
208	return regFinal;
209}
210
211cg_virtual_reg_t * CodeGenerator :: AddSaturate255(cg_block_t * block, cg_virtual_reg_t * first, cg_virtual_reg_t * second) {
212	cg_proc_t * procedure = block->proc;
213
214	DECL_REG		(regSum);
215	DECL_CONST_REG	(constant255, 0xff);
216	DECL_REG		(regResult);
217
218	ADD				(regSum, first, second);
219	MIN				(regResult, regSum, constant255);
220
221	return regResult;
222}
223
224cg_virtual_reg_t * CodeGenerator :: ClampTo255(cg_block_t * block, cg_virtual_reg_t * value) {
225	cg_proc_t * procedure = block->proc;
226
227	DECL_CONST_REG	(constant0, 0);
228	DECL_CONST_REG	(constant17, 17);
229	DECL_CONST_REG	(constant1, 0x10000);
230	DECL_CONST_REG	(constantFactor, 0x1ff);
231
232	DECL_REG	(regClamped0);
233	DECL_REG	(regClamped1);
234	DECL_REG	(regAdjusted);
235	DECL_REG	(regResult);
236
237	MAX		(regClamped0, value, constant0);
238	MIN		(regClamped1, regClamped0, constant1);
239	MUL		(regAdjusted, regClamped1, constantFactor);
240	LSR		(regResult, regAdjusted, constant17);
241
242	return regResult;
243}
244
245cg_virtual_reg_t * CodeGenerator :: AddSigned(cg_block_t * block, cg_virtual_reg_t * first, cg_virtual_reg_t * second) {
246	cg_proc_t * procedure = block->proc;
247
248	DECL_REG		(regResult);
249	DECL_REG		(regSum);
250	DECL_REG		(regAdjusted);
251	DECL_CONST_REG	(constantHalf, 0x80);
252	DECL_CONST_REG	(constant0, 0);
253
254	ADD				(regSum, first, second);
255	SUB				(regAdjusted, regSum, constantHalf);
256	MAX				(regResult, regAdjusted, constant0);
257
258	return regResult;
259}
260
261cg_virtual_reg_t * CodeGenerator :: Add(cg_block_t * block, cg_virtual_reg_t * first, cg_virtual_reg_t * second) {
262	cg_proc_t * procedure = block->proc;
263
264	DECL_REG		(regResult);
265	DECL_REG		(regSum);
266
267	ADD				(regResult, first, second);
268
269	return regResult;
270}
271
272cg_virtual_reg_t * CodeGenerator :: Sub(cg_block_t * block, cg_virtual_reg_t * first, cg_virtual_reg_t * second) {
273	cg_proc_t * procedure = block->proc;
274
275	DECL_REG		(regResult);
276
277	SUB				(regResult, first, second);
278
279	return regResult;
280}
281
282cg_virtual_reg_t * CodeGenerator :: ExtractBitFieldTo255(cg_block_t * block, cg_virtual_reg_t * value, size_t low, size_t high) {
283	cg_proc_t * procedure = block->proc;
284
285	if (high == low) {
286		if (high < 8) {
287			DECL_REG		(regShifted);
288			DECL_CONST_REG	(constantShift, 8 - high);
289
290			LSL				(regShifted, value, constantShift);
291
292			value = regShifted;
293		} else if (high > 8) {
294			DECL_REG		(regShifted);
295			DECL_CONST_REG	(constantShift, high - 8);
296
297			LSR				(regShifted, value, constantShift);
298
299			value = regShifted;
300		}
301
302		DECL_CONST_REG	(constantMask,	0x100);
303		DECL_REG		(regMasked);
304
305		AND				(regMasked,		value, constantMask);
306
307		DECL_CONST_REG	(constant8,		8);
308		DECL_REG		(regShifted);
309		DECL_REG		(regAdjusted);
310
311		LSR				(regShifted,	value, constant8);
312		SUB				(regAdjusted,	value, regShifted);
313
314		return regAdjusted;
315	}
316
317	if (high < 7) {
318		DECL_REG		(regShifted);
319		DECL_CONST_REG	(constantShift, 7 - high);
320
321		LSL				(regShifted, value, constantShift);
322
323		value = regShifted;
324	} else if (high > 7) {
325		DECL_REG		(regShifted);
326		DECL_CONST_REG	(constantShift, high - 7);
327
328		LSR				(regShifted, value, constantShift);
329
330		value = regShifted;
331	}
332
333	size_t bits = high - low + 1;
334	static const U8 mask[9] = { 0, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe, 0xff };
335
336	DECL_CONST_REG		(constantMask,	mask[bits]);
337	DECL_REG			(regMasked);
338
339	AND					(regMasked,		value, constantMask);
340	value = regMasked;
341
342	while (bits < 8) {
343		DECL_CONST_REG	(constantShift,	bits);
344		DECL_REG		(regShifted);
345		DECL_REG		(regOred);
346
347		LSR				(regShifted,	value, constantShift);
348		OR				(regOred,		value, regShifted);
349
350		value = regOred;
351		bits += 2;
352	}
353
354	return value;
355}
356
357cg_virtual_reg_t * CodeGenerator :: BitFieldFrom255(cg_block_t * block, cg_virtual_reg_t * value, size_t low, size_t high) {
358	cg_proc_t * procedure = block->proc;
359
360	size_t bits = high - low + 1;
361	assert(bits <= 8);
362	size_t lowBit = 8 - bits;
363
364	if (bits != 8) {
365		static const U8 mask[9] = { 0, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe, 0xff };
366
367		DECL_CONST_REG		(constantMask,	mask[bits]);
368		DECL_REG			(regMasked);
369
370		AND					(regMasked,		value, constantMask);
371		value = regMasked;
372	}
373	
374	if (low > lowBit) {
375		DECL_CONST_REG		(constantShift,	low - lowBit);
376		DECL_REG			(regShifted);
377
378		LSL					(regShifted,	value, constantShift);
379		value = regShifted;
380	} else if (low < lowBit) {
381		DECL_CONST_REG		(constantShift,	lowBit - low);
382		DECL_REG			(regShifted);
383
384		LSR					(regShifted,	value, constantShift);
385		value = regShifted;
386	}
387
388	return value;
389}
390
391// ----------------------------------------------------------------------
392// Emit code to convert a representation of a color as individual
393// R, G and B components into a 16-bit 565 representation
394//
395// R, G B are within the range 0..0xff
396// ----------------------------------------------------------------------
397void CodeGenerator :: Color565FromRGB(cg_block_t * block, cg_virtual_reg_t * regRGB,
398	cg_virtual_reg_t * r, cg_virtual_reg_t * g, cg_virtual_reg_t * b) {
399	cg_proc_t * procedure = block->proc;
400
401	cg_virtual_reg_t *	regFieldR = BitFieldFrom255(block, r, 11, 15);
402	cg_virtual_reg_t *	regFieldG = BitFieldFrom255(block, g, 5, 10);
403	cg_virtual_reg_t *	regFieldB = BitFieldFrom255(block, b, 0, 4);
404
405	DECL_REG	(regBG);
406
407	OR			(regBG,		regFieldB, regFieldG);
408	OR			(regRGB,	regBG, regFieldR);
409}
410
411
412cg_virtual_reg_t * CodeGenerator :: Color565FromRGB(cg_block_t * block,
413													cg_virtual_reg_t * r, cg_virtual_reg_t * g, cg_virtual_reg_t * b) {
414	cg_proc_t * procedure = block->proc;
415
416	DECL_REG	(regResult);
417
418	Color565FromRGB(block, regResult, r, g, b);
419
420	return regResult;
421}
422
423
424cg_virtual_reg_t * CodeGenerator :: Blend255(cg_block_t * block, cg_virtual_reg_t * first, cg_virtual_reg_t * second,
425											 cg_virtual_reg_t * alpha) {
426
427	cg_proc_t * procedure = block->proc;
428
429	DECL_REG		(regDiff);
430
431	SUB				(regDiff,		second, first);		// diff = (second - first)
432
433	cg_virtual_reg_t *	regProd = Mul255(block, regDiff, alpha);	//	alpha * (second - first)
434
435	return Add(block, first, regProd);					// first + alpha * (second - first)
436}
437
438
439cg_virtual_reg_t * CodeGenerator :: Blend255(cg_block_t * block, U8 constant, cg_virtual_reg_t * second,
440											 cg_virtual_reg_t * alpha) {
441	cg_proc_t * procedure = block->proc;
442
443	DECL_CONST_REG	(regConst,	constant);
444
445	return Blend255(block, regConst, second, alpha);
446}
447
448cg_virtual_reg_t * CodeGenerator :: SignedVal(cg_block_t * block, cg_virtual_reg_t * value) {
449	cg_proc_t * procedure = block->proc;
450
451	DECL_REG		(regShifted);
452	DECL_CONST_REG	(constantShift, 7);
453	DECL_REG		(regExpanded);
454	DECL_CONST_REG	(c128, 128);
455	DECL_REG		(regResult);
456
457	// expand 0..255 -> 0..256
458	LSR				(regShifted, value, constantShift);
459	ADD				(regExpanded, value, regShifted);
460	SUB				(regResult, regExpanded, c128);
461
462	return regResult;
463}
464
465cg_virtual_reg_t * CodeGenerator :: Dot3(cg_block_t * block, 
466										 cg_virtual_reg_t * r[], cg_virtual_reg_t * g[], cg_virtual_reg_t * b[]) {
467	cg_proc_t * procedure = block->proc;
468
469	DECL_REG		(regProdR);
470	DECL_REG		(regProdG);
471	DECL_REG		(regProdB);
472	DECL_REG		(regSumRG);
473	DECL_REG		(regSumRGB);
474
475	MUL				(regProdR, SignedVal(block, r[0]), SignedVal(block, r[1]));
476	MUL				(regProdG, SignedVal(block, g[0]), SignedVal(block, g[1]));
477	ADD				(regSumRG, regProdR, regProdG);
478	MUL				(regProdB, SignedVal(block, b[0]), SignedVal(block, b[1]));
479	ADD				(regSumRGB, regSumRG, regProdB);
480
481	DECL_CONST_REG	(constant6, 6);
482	DECL_CONST_REG	(constant7, 7);
483	DECL_REG		(regShifted6);
484	DECL_REG		(regShifted13);
485	DECL_REG		(regAdjusted);
486
487	ASR				(regShifted6, regSumRGB, constant6);
488	ASR				(regShifted13, regShifted6, constant7);
489	SUB				(regAdjusted, regShifted6, regShifted13);
490
491	DECL_REG		(regClamped0);
492	DECL_REG		(regClamped255);
493
494	DECL_CONST_REG	(constant0, 0);
495	DECL_CONST_REG	(constant255, 255);
496
497	MAX				(regClamped0, regAdjusted, constant0);
498	MIN				(regClamped255, regClamped0, constant255);
499
500	return regClamped255;
501}