PageRenderTime 37ms CodeModel.GetById 17ms app.highlight 16ms RepoModel.GetById 2ms app.codeStats 0ms

/xbmc/visualizations/Goom/goom2k4-0/src/xmmx.c

http://github.com/xbmc/xbmc
C | 390 lines | 244 code | 59 blank | 87 comment | 36 complexity | 0b35e33c4a9cf5dc7777a2b85c4589d4 MD5 | raw file
  1
  2#ifdef HAVE_MMX
  3
  4/* a definir pour avoir exactement le meme resultat que la fonction C
  5 * (un chouillat plus lent).. mais la difference est assez peu notable.
  6 */
  7// #define STRICT_COMPAT
  8
  9#define BUFFPOINTNB 16
 10#define BUFFPOINTMASK 0xffff
 11#define BUFFINCR 0xff
 12
 13#define sqrtperte 16
 14/* faire : a % sqrtperte <=> a & pertemask*/
 15#define PERTEMASK 0xf
 16/* faire : a / sqrtperte <=> a >> PERTEDEC*/
 17#define PERTEDEC 4
 18
 19
 20/*#define MMX_TRACE*/
 21#include "mmx.h"
 22/*#include "xmmx.h"*/
 23#include "goom_graphic.h"
 24
 25int xmmx_supported (void) {
 26	return (mm_support()&0x8)>>3;
 27}
 28
 29void zoom_filter_xmmx (int prevX, int prevY,
 30                       Pixel *expix1, Pixel *expix2,
 31                       int *lbruS, int *lbruD, int buffratio,
 32                       int precalCoef[16][16])
 33{
 34	int bufsize = prevX * prevY; /* taille du buffer */
 35	volatile int loop;                    /* variable de boucle */
 36
 37	mmx_t *brutS = (mmx_t*)lbruS; /* buffer de transformation source */
 38	mmx_t *brutD = (mmx_t*)lbruD; /* buffer de transformation dest */
 39
 40	volatile mmx_t prevXY;
 41	volatile mmx_t ratiox;
 42	/*	volatile mmx_t interpix; */
 43
 44	expix1[0].val=expix1[prevX-1].val=expix1[prevX*prevY-1].val=expix1[prevX*prevY-prevX].val=0;
 45
 46	prevXY.ud[0] = (prevX-1)<<PERTEDEC;
 47	prevXY.ud[1] = (prevY-1)<<PERTEDEC;
 48
 49	ratiox.d[0] = buffratio;
 50	ratiox.d[1] = buffratio;
 51
 52  asm volatile
 53    ("\n\t movq  %[ratio], %%mm6"
 54     "\n\t pslld $16,      %%mm6" /* mm6 = [rat16=buffratio<<16 | rat16=buffratio<<16] */
 55     "\n\t pxor  %%mm7,    %%mm7" /* mm7 = 0 */
 56     ::[ratio]"m"(ratiox));
 57
 58	loop=0;
 59
 60	/*
 61	 * NOTE : mm6 et mm7 ne sont pas modifies dans la boucle.
 62	 */
 63	while (loop < bufsize)
 64	{
 65		/* Thread #1
 66		 * pre :  mm6 = [rat16|rat16]
 67		 * post : mm0 = S + ((D-S)*rat16 format [X|Y]
 68		 * modified = mm0,mm1,mm2
 69		 */
 70
 71    asm volatile ("#1 \n\t movq 0(%[brutS]), %%mm0"
 72       "#1 \n\t movq 0(%[brutD]), %%mm1"
 73       "#1 \n\t psubd   %%mm0, %%mm1"  /* mm1 = D - S */
 74       "#1 \n\t movq    %%mm1, %%mm2" /* mm2 = D - S */
 75       "#1 \n\t pslld     $16, %%mm1"
 76       "#1 \n\t pmullw  %%mm6, %%mm2"
 77       "#1 \n\t pmulhuw %%mm6, %%mm1"
 78       "#1 \n\t pslld   $16,   %%mm0"
 79       "#1 \n\t paddd   %%mm2, %%mm1"  /* mm1 = (D - S) * buffratio >> 16 */
 80
 81       "#1 \n\t paddd   %%mm1, %%mm0"  /* mm0 = S + mm1 */
 82       "#1 \n\t psrld   $16,   %%mm0"
 83       :
 84       :[brutS] "r" (&brutS[loop]) ,[brutD] "r" (&brutD[loop])
 85         );                      /* mm0 = S */
 86
 87		/*
 88		 * pre : mm0 : position vector on screen
 89		 *       prevXY : coordinate of the lower-right point on screen
 90		 * post : clipped mm0
 91		 * modified : mm0,mm1,mm2
 92		 */
 93    asm volatile
 94      ("#1 \n\t movq %[prevXY], %%mm1"
 95       "#1 \n\t pcmpgtd %%mm0,  %%mm1"
 96       /* mm0 en X contient (idem pour Y) :
 97        *   1111 si prevXY > px
 98        *   0000 si prevXY <= px */
 99#ifdef STRICT_COMPAT
100       "#1 \n\t movq      %%mm1, %%mm2"
101       "#1 \n\t punpckhdq %%mm2, %%mm2"
102       "#1 \n\t punpckldq %%mm1, %%mm1"
103       "#1 \n\t pand      %%mm2, %%mm0"
104#endif
105
106       "#1 \n\t pand %%mm1, %%mm0" /* on met a zero la partie qui deborde */
107        ::[prevXY]"m"(prevXY));
108
109		/* Thread #2
110		 * pre :  mm0 : clipped position on screen
111		 *
112		 * post : mm3 : coefs for this position
113		 *        mm1 : X vector [0|X]
114		 *
115		 * modif : eax,esi
116		 */
117		__asm__ __volatile__ (
118			"#2 \n\t movd %%mm0,%%esi"
119			"#2 \n\t movq %%mm0,%%mm1"
120
121			"#2 \n\t andl $15,%%esi"
122			"#2 \n\t psrlq $32,%%mm1"
123
124			"#2 \n\t shll $6,%%esi"
125			"#2 \n\t movd %%mm1,%%eax"
126
127			"#2 \n\t addl %[precalCoef],%%esi"
128			"#2 \n\t andl $15,%%eax"
129
130			"#2 \n\t movd (%%esi,%%eax,4),%%mm3"
131			::[precalCoef]"g"(precalCoef):"eax","esi");
132
133		/*
134		 * extraction des coefficients... (Thread #3)
135		 *
136		 * pre : coef dans mm3
137		 *
138		 * post : coef extraits dans mm3 (c1 & c2)
139		 *                        et mm4 (c3 & c4)
140		 *
141		 * modif : mm5
142		 */
143
144		/* (Thread #4)
145		 * pre : mm0 : Y pos [*|Y]
146		 *       mm1 : X pos [*|X]
147		 *
148		 * post : mm0 : expix1[position]
149		 *        mm2 : expix1[position+largeur]
150		 *
151		 * modif : eax, esi
152		 */
153		__asm__ __volatile__ (
154      "#2 \n\t psrld $4, %%mm0"
155      "#2 \n\t psrld $4, %%mm1"      /* PERTEDEC = $4 */
156
157      "#4 \n\t movd %%mm1,%%eax"
158			"#3 \n\t movq %%mm3,%%mm5" 
159
160			"#4 \n\t mull %[prevX]"
161			"#4 \n\t movd %%mm0,%%esi"
162
163      "#3 \n\t punpcklbw %%mm5, %%mm3"
164			"#4 \n\t addl %%esi, %%eax"
165
166      "#3 \n\t movq %%mm3, %%mm4"     
167      "#3 \n\t movq %%mm3, %%mm5"     
168
169      "#4 \n\t movl %[expix1], %%esi"
170      "#3 \n\t punpcklbw %%mm5, %%mm3"
171
172      "#4 \n\t movq (%%esi,%%eax,4),%%mm0"
173      "#3 \n\t punpckhbw %%mm5, %%mm4"
174
175      "#4 \n\t addl %[prevX],%%eax"
176      "#4 \n\t movq (%%esi,%%eax,4),%%mm2"
177
178			:
179      : [expix1] "g"(expix1)
180      , [prevX]  "g"(prevX)
181      :"eax","esi"
182		);
183
184		/*
185		 * pre :       mm0 : expix1[position]
186		 *             mm2 : expix1[position+largeur]
187		 *       mm3 & mm4 : coefs
188		 */
189
190		/* recopie des deux premiers pixels dans mm0 et mm1 */
191		movq_r2r (mm0, mm1);            /* b1-v1-r1-a1-b2-v2-r2-a2 */
192
193		/* depackage du premier pixel */
194		punpcklbw_r2r (mm7, mm0);       /* 00-b2-00-v2-00-r2-00-a2 */
195
196		/* extraction des coefficients... */
197
198		movq_r2r (mm3, mm5);      /* c2-c2-c2-c2-c1-c1-c1-c1 */
199
200		/*^en parrallele^*/ /* depackage du 2ieme pixel */
201		/*^*/ punpckhbw_r2r (mm7, mm1); /* 00-b1-00-v1-00-r1-00-a1 */
202
203		punpcklbw_r2r (mm7, mm5);	/* 00-c1-00-c1-00-c1-00-c1 */
204		punpckhbw_r2r (mm7, mm3);	/* 00-c2-00-c2-00-c2-00-c2 */
205
206		/* multiplication des pixels par les coefficients */
207		pmullw_r2r (mm5, mm0);		/* c1*b2-c1*v2-c1*r2-c1*a2 */
208		pmullw_r2r (mm3, mm1);		/* c2*b1-c2*v1-c2*r1-c2*a1 */
209		paddw_r2r (mm1, mm0);
210
211		/* ...extraction des 2 derniers coefficients */
212		movq_r2r (mm4, mm5);			/* c4-c4-c4-c4-c3-c3-c3-c3 */
213		punpcklbw_r2r (mm7, mm4);	/* 00-c3-00-c3-00-c3-00-c3 */
214		punpckhbw_r2r (mm7, mm5);	/* 00-c4-00-c4-00-c4-00-c4 */
215
216		/* recuperation des 2 derniers pixels */
217		movq_r2r (mm2, mm1);
218
219		/* depackage des pixels */
220		punpcklbw_r2r (mm7, mm1);
221		punpckhbw_r2r (mm7, mm2);
222
223		/* multiplication pas les coeffs */
224		pmullw_r2r (mm4, mm1);
225		pmullw_r2r (mm5, mm2);
226
227		/* ajout des valeurs obtenues ? la valeur finale */
228		paddw_r2r (mm1, mm0);
229		paddw_r2r (mm2, mm0);
230
231		/* division par 256 = 16+16+16+16, puis repackage du pixel final */
232		psrlw_i2r (8, mm0);
233		packuswb_r2r (mm7, mm0);
234
235		movd_r2m (mm0,expix2[loop]);
236
237		++loop;
238	}
239	__asm__ __volatile__ ("emms\n");
240}
241
242#define DRAWMETHOD_PLUS_XMMX(_out,_backbuf,_col) \
243{ \
244	movd_m2r(_backbuf, mm0); \
245	paddusb_m2r(_col, mm0); \
246	movd_r2m(mm0, _out); \
247}
248
249#define DRAWMETHOD DRAWMETHOD_PLUS_XMMX(*p,*p,col)
250
251void draw_line_xmmx (Pixel *data, int x1, int y1, int x2, int y2, int col, int screenx, int screeny)
252{
253	int x, y, dx, dy, yy, xx;
254	Pixel *p;
255
256	if ((y1 < 0) || (y2 < 0) || (x1 < 0) || (x2 < 0) || (y1 >= screeny) || (y2 >= screeny) || (x1 >= screenx) || (x2 >= screenx))
257		goto end_of_line;
258
259	dx = x2 - x1;
260	dy = y2 - y1;
261	if (x1 >= x2) {
262		int tmp;
263
264		tmp = x1;
265		x1 = x2;
266		x2 = tmp;
267		tmp = y1;
268		y1 = y2;
269		y2 = tmp;
270		dx = x2 - x1;
271		dy = y2 - y1;
272	}
273
274	/* vertical line */
275	if (dx == 0) {
276		if (y1 < y2) {
277			p = &(data[(screenx * y1) + x1]);
278			for (y = y1; y <= y2; y++) {
279				DRAWMETHOD;
280				p += screenx;
281			}
282		}
283		else {
284			p = &(data[(screenx * y2) + x1]);
285			for (y = y2; y <= y1; y++) {
286				DRAWMETHOD;
287				p += screenx;
288			}
289		}
290		goto end_of_line;
291	}
292	/* horizontal line */
293	if (dy == 0) {
294		if (x1 < x2) {
295			p = &(data[(screenx * y1) + x1]);
296			for (x = x1; x <= x2; x++) {
297				DRAWMETHOD;
298				p++;
299			}
300			goto end_of_line;
301		}
302		else {
303			p = &(data[(screenx * y1) + x2]);
304			for (x = x2; x <= x1; x++) {
305				DRAWMETHOD;
306				p++;
307			}
308			goto end_of_line;
309		}
310	}
311	/* 1    */
312	/*  \   */
313	/*   \  */
314	/*    2 */
315	if (y2 > y1) {
316		/* steep */
317		if (dy > dx) {
318			dx = ((dx << 16) / dy);
319			x = x1 << 16;
320			for (y = y1; y <= y2; y++) {
321				xx = x >> 16;
322				p = &(data[(screenx * y) + xx]);
323				DRAWMETHOD;
324				if (xx < (screenx - 1)) {
325					p++;
326					/* DRAWMETHOD; */
327				}
328				x += dx;
329			}
330			goto end_of_line;
331		}
332		/* shallow */
333		else {
334			dy = ((dy << 16) / dx);
335			y = y1 << 16;
336			for (x = x1; x <= x2; x++) {
337				yy = y >> 16;
338				p = &(data[(screenx * yy) + x]);
339				DRAWMETHOD;
340				if (yy < (screeny - 1)) {
341					p += screeny;
342					/* DRAWMETHOD; */
343				}
344				y += dy;
345			}
346		}
347	}
348	/*    2 */
349	/*   /  */
350	/*  /   */
351	/* 1    */
352	else {
353		/* steep */
354		if (-dy > dx) {
355			dx = ((dx << 16) / -dy);
356			x = (x1 + 1) << 16;
357			for (y = y1; y >= y2; y--) {
358				xx = x >> 16;
359				p = &(data[(screenx * y) + xx]);
360				DRAWMETHOD;
361				if (xx < (screenx - 1)) {
362					p--;
363					/* DRAWMETHOD; */
364				}
365				x += dx;
366			}
367			goto end_of_line;
368		}
369		/* shallow */
370		else {
371			dy = ((dy << 16) / dx);
372			y = y1 << 16;
373			for (x = x1; x <= x2; x++) {
374				yy = y >> 16;
375				p = &(data[(screenx * yy) + x]);
376				DRAWMETHOD;
377				if (yy < (screeny - 1)) {
378					p += screeny;
379					/* DRAWMETHOD; */
380				}
381				y += dy;
382			}
383			goto end_of_line;
384		}
385	}
386end_of_line:
387	__asm__ __volatile__ ("emms\n"); 
388}
389
390#endif