/project/jni/sdl_gfx/SDL_imageFilter.c
https://github.com/aichunyu/FFPlayer · C · 7556 lines · 6038 code · 274 blank · 1244 comment · 520 complexity · d43bab1767b6590f1804da8a41766b90 MD5 · raw file
Large files are truncated click here to view the full file
- /*
-
- SDL_imageFilter - bytes-image "filter" routines.
- (Uses inline x86 MMX or ASM optimizations if available and enabled.)
-
- LGPL (c) A. Schiffler
-
- Note: Most of the MMX code is based on published routines
- by Vladimir Kravtchenko at vk@cs.ubc.ca - credits go to
- him for his work.
-
- */
-
- #include <stdio.h>
- #include <stdlib.h>
- #include <string.h>
-
- #include "SDL_imageFilter.h"
-
- /*!
- \brief Swaps the byte order in a 32bit integer (LSB becomes MSB, etc.).
- */
- #define SWAP_32(x) (((x) >> 24) | (((x) & 0x00ff0000) >> 8) | (((x) & 0x0000ff00) << 8) | ((x) << 24))
-
- /* ------ Static variables ----- */
-
- /*!
- \brief Static state which enables the use of the MMX routines. Enabled by default
- */
- static int SDL_imageFilterUseMMX = 1;
-
- /* Detect GCC */
- #if defined(__GNUC__)
- #define GCC__
- #endif
-
- /*!
- \brief Internal function returning the CPU flags.
-
- \returns Flags of system CPU.
- */
- unsigned int _cpuFlags()
- {
- int flags = 0;
-
- #ifdef USE_MMX
- #if !defined(GCC__)
- __asm
- {
- pusha
- mov eax, 1
- cpuid /* get CPU ID flag */
- mov flags,edx /* move result to mmx_bit */
- popa
- }
- #else
- asm volatile ("pusha \n\t" "mov %1, %%eax \n\t" /* request feature flag */
- "cpuid \n\t" /* get CPU ID flag */
- "mov %%edx, %0 \n\t" /* move result to mmx_bit */
- "popa \n\t":"=m" (flags) /* %0 */
- :"i"(0x00000001) /* %1 */
- );
- #endif
- #endif
-
- return (flags);
- }
-
- /*!
- \brief MMX detection routine (with override flag).
-
- \returns 1 of MMX was detected, 0 otherwise.
- */
- int SDL_imageFilterMMXdetect(void)
- {
- unsigned int mmx_bit;
-
- /* Check override flag */
- if (SDL_imageFilterUseMMX == 0) {
- return (0);
- }
-
- mmx_bit = _cpuFlags();
- mmx_bit &= 0x00800000;
- mmx_bit = (mmx_bit && 0x00800000);
-
- return (mmx_bit);
- }
-
- /*!
- \brief Disable MMX check for filter functions and and force to use non-MMX C based code.
- */
- void SDL_imageFilterMMXoff()
- {
- SDL_imageFilterUseMMX = 0;
- }
-
- /*!
- \brief Enable MMX check for filter functions and use MMX code if available.
- */
- void SDL_imageFilterMMXon()
- {
- SDL_imageFilterUseMMX = 1;
- }
-
- /* ------------------------------------------------------------------------------------ */
-
- /*!
- \brief Internal MMX Filter using Add: D = saturation255(S1 + S2)
-
- \param Src1 Pointer to the start of the first source byte array (S1).
- \param Src2 Pointer to the start of the second source byte array (S2).
- \param Dest Pointer to the start of the destination byte array (D).
- \param SrcLength The number of bytes in the source arrays.
-
- \return Returns 0 for success or -1 for error.
- */
- int SDL_imageFilterAddMMX(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int SrcLength)
- {
- #ifdef USE_MMX
- #if !defined(GCC__)
- __asm
- {
- pusha
- mov eax, Src1 /* load Src1 address into eax */
- mov ebx, Src2 /* load Src2 address into ebx */
- mov edi, Dest /* load Dest address into edi */
- mov ecx, SrcLength /* load loop counter (SIZE) into ecx */
- shr ecx, 3 /* counter/8 (MMX loads 8 bytes at a time) */
- align 16 /* 16 byte alignment of the loop entry */
- L1010:
- movq mm1, [eax] /* load 8 bytes from Src1 into mm1 */
- paddusb mm1, [ebx] /* mm1=Src1+Src2 (add 8 bytes with saturation) */
- movq [edi], mm1 /* store result in Dest */
- add eax, 8 /* increase Src1, Src2 and Dest */
- add ebx, 8 /* register pointers by 8 */
- add edi, 8
- dec ecx /* decrease loop counter */
- jnz L1010 /* check loop termination, proceed if required */
- emms /* exit MMX state */
- popa
- }
- #else
- asm volatile
- ("pusha \n\t" "mov %2, %%eax \n\t" /* load Src1 address into eax */
- "mov %1, %%ebx \n\t" /* load Src2 address into ebx */
- "mov %0, %%edi \n\t" /* load Dest address into edi */
- "mov %3, %%ecx \n\t" /* load loop counter (SIZE) into ecx */
- "shr $3, %%ecx \n\t" /* counter/8 (MMX loads 8 bytes at a time) */
- ".align 16 \n\t" /* 16 byte alignment of the loop entry */
- "1: movq (%%eax), %%mm1 \n\t" /* load 8 bytes from Src1 into mm1 */
- "paddusb (%%ebx), %%mm1 \n\t" /* mm1=Src1+Src2 (add 8 bytes with saturation) */
- "movq %%mm1, (%%edi) \n\t" /* store result in Dest */
- "add $8, %%eax \n\t" /* increase Src1, Src2 and Dest */
- "add $8, %%ebx \n\t" /* register pointers by 8 */
- "add $8, %%edi \n\t" "dec %%ecx \n\t" /* decrease loop counter */
- "jnz 1b \n\t" /* check loop termination, proceed if required */
- "emms \n\t" /* exit MMX state */
- "popa \n\t":"=m" (Dest) /* %0 */
- :"m"(Src2), /* %1 */
- "m"(Src1), /* %2 */
- "m"(SrcLength) /* %3 */
- );
- #endif
- return (0);
- #else
- return (-1);
- #endif
- }
-
- /*!
- \brief Filter using Add: D = saturation255(S1 + S2)
-
- \param Src1 Pointer to the start of the first source byte array (S1).
- \param Src2 Pointer to the start of the second source byte array (S2).
- \param Dest Pointer to the start of the destination byte array (D).
- \param length The number of bytes in the source arrays.
-
- \return Returns 0 for success or -1 for error.
- */
- int SDL_imageFilterAdd(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int length)
- {
- unsigned int i, istart;
- unsigned char *cursrc1, *cursrc2, *curdst;
- int result;
-
- /* Validate input parameters */
- if ((Src1 == NULL) || (Src2 == NULL) || (Dest == NULL))
- return(-1);
- if (length == 0)
- return(0);
-
- if ((SDL_imageFilterMMXdetect()) && (length > 7)) {
-
- /* Use MMX assembly routine */
- SDL_imageFilterAddMMX(Src1, Src2, Dest, length);
-
- /* Check for unaligned bytes */
- if ((length & 7) > 0) {
- /* Setup to process unaligned bytes */
- istart = length & 0xfffffff8;
- cursrc1 = &Src1[istart];
- cursrc2 = &Src2[istart];
- curdst = &Dest[istart];
- } else {
- /* No unaligned bytes - we are done */
- return (0);
- }
- } else {
- /* Setup to process whole image */
- istart = 0;
- cursrc1 = Src1;
- cursrc2 = Src2;
- curdst = Dest;
- }
-
- /* C routine to process image */
- for (i = istart; i < length; i++) {
- result = (int) *cursrc1 + (int) *cursrc2;
- if (result > 255)
- result = 255;
- *curdst = (unsigned char) result;
- /* Advance pointers */
- cursrc1++;
- cursrc2++;
- curdst++;
- }
-
- return (0);
- }
-
- /*!
- \brief Internal MMX Filter using Mean: D = S1/2 + S2/2
-
- \param Src1 Pointer to the start of the first source byte array (S1).
- \param Src2 Pointer to the start of the second source byte array (S2).
- \param Dest Pointer to the start of the destination byte array (D).
- \param SrcLength The number of bytes in the source arrays.
- \param Mask Mask array containing 8 bytes with 0x7F value.
- ]
- \return Returns 0 for success or -1 for error.
- */
- int SDL_imageFilterMeanMMX(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int SrcLength,
- unsigned char *Mask)
- {
- #ifdef USE_MMX
- #if !defined(GCC__)
- __asm
- {
- pusha
- mov edx, Mask /* load Mask address into edx */
- movq mm0, [edx] /* load Mask into mm0 */
- mov eax, Src1 /* load Src1 address into eax */
- mov ebx, Src2 /* load Src2 address into ebx */
- mov edi, Dest /* load Dest address into edi */
- mov ecx, SrcLength /* load loop counter (SIZE) into ecx */
- shr ecx, 3 /* counter/8 (MMX loads 8 bytes at a time) */
- align 16 /* 16 byte alignment of the loop entry */
- L21011:
- movq mm1, [eax] /* load 8 bytes from Src1 into mm1 */
- movq mm2, [ebx] /* load 8 bytes from Src2 into mm2 */
- /* --- Byte shift via Word shift --- */
- psrlw mm1, 1 /* shift 4 WORDS of mm1 1 bit to the right */
- psrlw mm2, 1 /* shift 4 WORDS of mm2 1 bit to the right */
- pand mm1, mm0 // apply Mask to 8 BYTES of mm1 */
- /* byte 0x0f, 0xdb, 0xc8 */
- pand mm2, mm0 // apply Mask to 8 BYTES of mm2 */
- /* byte 0x0f, 0xdb, 0xd0 */
- paddusb mm1, mm2 /* mm1=mm1+mm2 (add 8 bytes with saturation) */
- movq [edi], mm1 /* store result in Dest */
- add eax, 8 /* increase Src1, Src2 and Dest */
- add ebx, 8 /* register pointers by 8 */
- add edi, 8
- dec ecx /* decrease loop counter */
- jnz L21011 /* check loop termination, proceed if required */
- emms /* exit MMX state */
- popa
- }
- #else
- asm volatile
- ("pusha \n\t" "movl %4, %%edx \n\t" /* load Mask address into edx */
- "movq (%%edx), %%mm0 \n\t" /* load Mask into mm0 */
- "mov %2, %%eax \n\t" /* load Src1 address into eax */
- "mov %1, %%ebx \n\t" /* load Src2 address into ebx */
- "mov %0, %%edi \n\t" /* load Dest address into edi */
- "mov %3, %%ecx \n\t" /* load loop counter (SIZE) into ecx */
- "shr $3, %%ecx \n\t" /* counter/8 (MMX loads 8 bytes at a time) */
- ".align 16 \n\t" /* 16 byte alignment of the loop entry */
- "1: \n\t"
- "movq (%%eax), %%mm1 \n\t" /* load 8 bytes from Src1 into mm1 */
- "movq (%%ebx), %%mm2 \n\t" /* load 8 bytes from Src2 into mm2 */
- /* --- Byte shift via Word shift --- */
- "psrlw $1, %%mm1 \n\t" /* shift 4 WORDS of mm1 1 bit to the right */
- "psrlw $1, %%mm2 \n\t" /* shift 4 WORDS of mm2 1 bit to the right */
- /* "pand %%mm0, %%mm1 \n\t" // apply Mask to 8 BYTES of mm1 */
- ".byte 0x0f, 0xdb, 0xc8 \n\t"
- /* "pand %%mm0, %%mm2 \n\t" // apply Mask to 8 BYTES of mm2 */
- ".byte 0x0f, 0xdb, 0xd0 \n\t"
- "paddusb %%mm2, %%mm1 \n\t" /* mm1=mm1+mm2 (add 8 bytes with saturation) */
- "movq %%mm1, (%%edi) \n\t" /* store result in Dest */
- "add $8, %%eax \n\t" /* increase Src1, Src2 and Dest */
- "add $8, %%ebx \n\t" /* register pointers by 8 */
- "add $8, %%edi \n\t"
- "dec %%ecx \n\t" /* decrease loop counter */
- "jnz 1b \n\t" /* check loop termination, proceed if required */
- "emms \n\t" /* exit MMX state */
- "popa \n\t":"=m" (Dest) /* %0 */
- :"m"(Src2), /* %1 */
- "m"(Src1), /* %2 */
- "m"(SrcLength), /* %3 */
- "m"(Mask) /* %4 */
- );
- #endif
- return (0);
- #else
- return (-1);
- #endif
- }
-
- /*!
- \brief Filter using Mean: D = S1/2 + S2/2
-
- \param Src1 Pointer to the start of the first source byte array (S1).
- \param Src2 Pointer to the start of the second source byte array (S2).
- \param Dest Pointer to the start of the destination byte array (D).
- \param length The number of bytes in the source arrays.
-
- \return Returns 0 for success or -1 for error.
- */
- int SDL_imageFilterMean(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int length)
- {
- static unsigned char Mask[8] = { 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F };
- unsigned int i, istart;
- unsigned char *cursrc1, *cursrc2, *curdst;
- int result;
-
- /* Validate input parameters */
- if ((Src1 == NULL) || (Src2 == NULL) || (Dest == NULL))
- return(-1);
- if (length == 0)
- return(0);
-
- if ((SDL_imageFilterMMXdetect()) && (length > 7)) {
- /* MMX routine */
- SDL_imageFilterMeanMMX(Src1, Src2, Dest, length, Mask);
-
- /* Check for unaligned bytes */
- if ((length & 7) > 0) {
- /* Setup to process unaligned bytes */
- istart = length & 0xfffffff8;
- cursrc1 = &Src1[istart];
- cursrc2 = &Src2[istart];
- curdst = &Dest[istart];
- } else {
- /* No unaligned bytes - we are done */
- return (0);
- }
- } else {
- /* Setup to process whole image */
- istart = 0;
- cursrc1 = Src1;
- cursrc2 = Src2;
- curdst = Dest;
- }
-
- /* C routine to process image */
- for (i = istart; i < length; i++) {
- result = (int) *cursrc1 / 2 + (int) *cursrc2 / 2;
- *curdst = (unsigned char) result;
- /* Advance pointers */
- cursrc1++;
- cursrc2++;
- curdst++;
- }
-
- return (0);
- }
-
- /*!
- \brief Internal MMX Filter using Sub: D = saturation0(S1 - S2)
-
- \param Src1 Pointer to the start of the first source byte array (S1).
- \param Src2 Pointer to the start of the second source byte array (S2).
- \param Dest Pointer to the start of the destination byte array (D).
- \param SrcLength The number of bytes in the source arrays.
-
- \return Returns 0 for success or -1 for error.
- */
- int SDL_imageFilterSubMMX(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int SrcLength)
- {
- #ifdef USE_MMX
- #if !defined(GCC__)
- __asm
- {
- pusha
- mov eax, Src1 /* load Src1 address into eax */
- mov ebx, Src2 /* load Src2 address into ebx */
- mov edi, Dest /* load Dest address into edi */
- mov ecx, SrcLength /* load loop counter (SIZE) into ecx */
- shr ecx, 3 /* counter/8 (MMX loads 8 bytes at a time) */
- align 16 /* 16 byte alignment of the loop entry */
- L1012:
- movq mm1, [eax] /* load 8 bytes from Src1 into mm1 */
- psubusb mm1, [ebx] /* mm1=Src1-Src2 (sub 8 bytes with saturation) */
- movq [edi], mm1 /* store result in Dest */
- add eax, 8 /* increase Src1, Src2 and Dest */
- add ebx, 8 /* register pointers by 8 */
- add edi, 8
- dec ecx /* decrease loop counter */
- jnz L1012 /* check loop termination, proceed if required */
- emms /* exit MMX state */
- popa
- }
- #else
- asm volatile
- ("pusha \n\t" "mov %2, %%eax \n\t" /* load Src1 address into eax */
- "mov %1, %%ebx \n\t" /* load Src2 address into ebx */
- "mov %0, %%edi \n\t" /* load Dest address into edi */
- "mov %3, %%ecx \n\t" /* load loop counter (SIZE) into ecx */
- "shr $3, %%ecx \n\t" /* counter/8 (MMX loads 8 bytes at a time) */
- ".align 16 \n\t" /* 16 byte alignment of the loop entry */
- "1: movq (%%eax), %%mm1 \n\t" /* load 8 bytes from Src1 into mm1 */
- "psubusb (%%ebx), %%mm1 \n\t" /* mm1=Src1-Src2 (sub 8 bytes with saturation) */
- "movq %%mm1, (%%edi) \n\t" /* store result in Dest */
- "add $8, %%eax \n\t" /* increase Src1, Src2 and Dest */
- "add $8, %%ebx \n\t" /* register pointers by 8 */
- "add $8, %%edi \n\t" "dec %%ecx \n\t" /* decrease loop counter */
- "jnz 1b \n\t" /* check loop termination, proceed if required */
- "emms \n\t" /* exit MMX state */
- "popa \n\t":"=m" (Dest) /* %0 */
- :"m"(Src2), /* %1 */
- "m"(Src1), /* %2 */
- "m"(SrcLength) /* %3 */
- );
- #endif
- return (0);
- #else
- return (-1);
- #endif
- }
-
- /*!
- \brief Filter using Sub: D = saturation0(S1 - S2)
-
- \param Src1 Pointer to the start of the first source byte array (S1).
- \param Src2 Pointer to the start of the second source byte array (S2).
- \param Dest Pointer to the start of the destination byte array (D).
- \param length The number of bytes in the source arrays.
-
- \return Returns 0 for success or -1 for error.
- */
- int SDL_imageFilterSub(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int length)
- {
- unsigned int i, istart;
- unsigned char *cursrc1, *cursrc2, *curdst;
- int result;
-
- /* Validate input parameters */
- if ((Src1 == NULL) || (Src2 == NULL) || (Dest == NULL))
- return(-1);
- if (length == 0)
- return(0);
-
- if ((SDL_imageFilterMMXdetect()) && (length > 7)) {
- /* MMX routine */
- SDL_imageFilterSubMMX(Src1, Src2, Dest, length);
-
- /* Check for unaligned bytes */
- if ((length & 7) > 0) {
- /* Setup to process unaligned bytes */
- istart = length & 0xfffffff8;
- cursrc1 = &Src1[istart];
- cursrc2 = &Src2[istart];
- curdst = &Dest[istart];
- } else {
- /* No unaligned bytes - we are done */
- return (0);
- }
- } else {
- /* Setup to process whole image */
- istart = 0;
- cursrc1 = Src1;
- cursrc2 = Src2;
- curdst = Dest;
- }
-
- /* C routine to process image */
- for (i = istart; i < length; i++) {
- result = (int) *cursrc1 - (int) *cursrc2;
- if (result < 0)
- result = 0;
- *curdst = (unsigned char) result;
- /* Advance pointers */
- cursrc1++;
- cursrc2++;
- curdst++;
- }
-
- return (0);
- }
-
- /*!
- \brief Internal MMX Filter using AbsDiff: D = | S1 - S2 |
-
- \param Src1 Pointer to the start of the first source byte array (S1).
- \param Src2 Pointer to the start of the second source byte array (S2).
- \param Dest Pointer to the start of the destination byte array (D).
- \param SrcLength The number of bytes in the source arrays.
-
- \return Returns 0 for success or -1 for error.
- */
- int SDL_imageFilterAbsDiffMMX(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int SrcLength)
- {
- #ifdef USE_MMX
- #if !defined(GCC__)
- __asm
- {
- pusha
- mov eax, Src1 /* load Src1 address into eax */
- mov ebx, Src2 /* load Src2 address into ebx */
- mov edi, Dest /* load Dest address into edi */
- mov ecx, SrcLength /* load loop counter (SIZE) into ecx */
- shr ecx, 3 /* counter/8 (MMX loads 8 bytes at a time) */
- align 16 /* 16 byte alignment of the loop entry */
- L1013:
- movq mm1, [eax] /* load 8 bytes from Src1 into mm1 */
- movq mm2, [ebx] /* load 8 bytes from Src2 into mm2 */
- psubusb mm1, [ebx] /* mm1=Src1-Src2 (sub 8 bytes with saturation) */
- psubusb mm2, [eax] /* mm2=Src2-Src1 (sub 8 bytes with saturation) */
- por mm1, mm2 /* combine both mm2 and mm1 results */
- movq [edi], mm1 /* store result in Dest */
- add eax, 8 /* increase Src1, Src2 and Dest */
- add ebx, 8 /* register pointers by 8 */
- add edi, 8
- dec ecx /* decrease loop counter */
- jnz L1013 /* check loop termination, proceed if required */
- emms /* exit MMX state */
- popa
- }
- #else
- asm volatile
- ("pusha \n\t" "mov %2, %%eax \n\t" /* load Src1 address into eax */
- "mov %1, %%ebx \n\t" /* load Src2 address into ebx */
- "mov %0, %%edi \n\t" /* load Dest address into edi */
- "mov %3, %%ecx \n\t" /* load loop counter (SIZE) into ecx */
- "shr $3, %%ecx \n\t" /* counter/8 (MMX loads 8 bytes at a time) */
- ".align 16 \n\t" /* 16 byte alignment of the loop entry */
- "1: movq (%%eax), %%mm1 \n\t" /* load 8 bytes from Src1 into mm1 */
- "movq (%%ebx), %%mm2 \n\t" /* load 8 bytes from Src2 into mm2 */
- "psubusb (%%ebx), %%mm1 \n\t" /* mm1=Src1-Src2 (sub 8 bytes with saturation) */
- "psubusb (%%eax), %%mm2 \n\t" /* mm2=Src2-Src1 (sub 8 bytes with saturation) */
- "por %%mm2, %%mm1 \n\t" /* combine both mm2 and mm1 results */
- "movq %%mm1, (%%edi) \n\t" /* store result in Dest */
- "add $8, %%eax \n\t" /* increase Src1, Src2 and Dest */
- "add $8, %%ebx \n\t" /* register pointers by 8 */
- "add $8, %%edi \n\t" "dec %%ecx \n\t" /* decrease loop counter */
- "jnz 1b \n\t" /* check loop termination, proceed if required */
- "emms \n\t" /* exit MMX state */
- "popa \n\t":"=m" (Dest) /* %0 */
- :"m"(Src2), /* %1 */
- "m"(Src1), /* %2 */
- "m"(SrcLength) /* %3 */
- );
- #endif
- return (0);
- #else
- return (-1);
- #endif
- }
-
- /*!
- \brief Filter using AbsDiff: D = | S1 - S2 |
-
- \param Src1 Pointer to the start of the first source byte array (S1).
- \param Src2 Pointer to the start of the second source byte array (S2).
- \param Dest Pointer to the start of the destination byte array (D).
- \param length The number of bytes in the source arrays.
-
- \return Returns 0 for success or -1 for error.
- */
- int SDL_imageFilterAbsDiff(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int length)
- {
- unsigned int i, istart;
- unsigned char *cursrc1, *cursrc2, *curdst;
- int result;
-
- /* Validate input parameters */
- if ((Src1 == NULL) || (Src2 == NULL) || (Dest == NULL))
- return(-1);
- if (length == 0)
- return(0);
-
- if ((SDL_imageFilterMMXdetect()) && (length > 7)) {
- /* MMX routine */
- SDL_imageFilterAbsDiffMMX(Src1, Src2, Dest, length);
-
- /* Check for unaligned bytes */
- if ((length & 7) > 0) {
- /* Setup to process unaligned bytes */
- istart = length & 0xfffffff8;
- cursrc1 = &Src1[istart];
- cursrc2 = &Src2[istart];
- curdst = &Dest[istart];
- } else {
- /* No unaligned bytes - we are done */
- return (0);
- }
- } else {
- /* Setup to process whole image */
- istart = 0;
- cursrc1 = Src1;
- cursrc2 = Src2;
- curdst = Dest;
- }
-
- /* C routine to process image */
- for (i = istart; i < length; i++) {
- result = abs((int) *cursrc1 - (int) *cursrc2);
- *curdst = (unsigned char) result;
- /* Advance pointers */
- cursrc1++;
- cursrc2++;
- curdst++;
- }
-
- return (0);
- }
-
- /*!
- \brief Internal MMX Filter using Mult: D = saturation255(S1 * S2)
-
- \param Src1 Pointer to the start of the first source byte array (S1).
- \param Src2 Pointer to the start of the second source byte array (S2).
- \param Dest Pointer to the start of the destination byte array (D).
- \param SrcLength The number of bytes in the source arrays.
-
- \return Returns 0 for success or -1 for error.
- */
- int SDL_imageFilterMultMMX(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int SrcLength)
- {
- #ifdef USE_MMX
- #if !defined(GCC__)
- __asm
- {
- pusha
- mov eax, Src1 /* load Src1 address into eax */
- mov ebx, Src2 /* load Src2 address into ebx */
- mov edi, Dest /* load Dest address into edi */
- mov ecx, SrcLength /* load loop counter (SIZE) into ecx */
- shr ecx, 3 /* counter/8 (MMX loads 8 bytes at a time) */
- pxor mm0, mm0 /* zero mm0 register */
- align 16 /* 16 byte alignment of the loop entry */
- L1014:
- movq mm1, [eax] /* load 8 bytes from Src1 into mm1 */
- movq mm3, [ebx] /* load 8 bytes from Src2 into mm3 */
- movq mm2, mm1 /* copy mm1 into mm2 */
- movq mm4, mm3 /* copy mm3 into mm4 */
- punpcklbw mm1, mm0 /* unpack low bytes of Src1 into words */
- punpckhbw mm2, mm0 /* unpack high bytes of Src1 into words */
- punpcklbw mm3, mm0 /* unpack low bytes of Src2 into words */
- punpckhbw mm4, mm0 /* unpack high bytes of Src2 into words */
- pmullw mm1, mm3 /* mul low bytes of Src1 and Src2 */
- pmullw mm2, mm4 /* mul high bytes of Src1 and Src2 */
- /* Take abs value of the results (signed words) */
- movq mm5, mm1 /* copy mm1 into mm5 */
- movq mm6, mm2 /* copy mm2 into mm6 */
- psraw mm5, 15 /* fill mm5 words with word sign bit */
- psraw mm6, 15 /* fill mm6 words with word sign bit */
- pxor mm1, mm5 /* take 1's compliment of only neg. words */
- pxor mm2, mm6 /* take 1's compliment of only neg. words */
- psubsw mm1, mm5 /* add 1 to only neg. words, W-(-1) or W-0 */
- psubsw mm2, mm6 /* add 1 to only neg. words, W-(-1) or W-0 */
- packuswb mm1, mm2 /* pack words back into bytes with saturation */
- movq [edi], mm1 /* store result in Dest */
- add eax, 8 /* increase Src1, Src2 and Dest */
- add ebx, 8 /* register pointers by 8 */
- add edi, 8
- dec ecx /* decrease loop counter */
- jnz L1014 /* check loop termination, proceed if required */
- emms /* exit MMX state */
- popa
- }
- #else
- asm volatile
- ("pusha \n\t" "mov %2, %%eax \n\t" /* load Src1 address into eax */
- "mov %1, %%ebx \n\t" /* load Src2 address into ebx */
- "mov %0, %%edi \n\t" /* load Dest address into edi */
- "mov %3, %%ecx \n\t" /* load loop counter (SIZE) into ecx */
- "shr $3, %%ecx \n\t" /* counter/8 (MMX loads 8 bytes at a time) */
- "pxor %%mm0, %%mm0 \n\t" /* zero mm0 register */
- ".align 16 \n\t" /* 16 byte alignment of the loop entry */
- "1: movq (%%eax), %%mm1 \n\t" /* load 8 bytes from Src1 into mm1 */
- "movq (%%ebx), %%mm3 \n\t" /* load 8 bytes from Src2 into mm3 */
- "movq %%mm1, %%mm2 \n\t" /* copy mm1 into mm2 */
- "movq %%mm3, %%mm4 \n\t" /* copy mm3 into mm4 */
- "punpcklbw %%mm0, %%mm1 \n\t" /* unpack low bytes of Src1 into words */
- "punpckhbw %%mm0, %%mm2 \n\t" /* unpack high bytes of Src1 into words */
- "punpcklbw %%mm0, %%mm3 \n\t" /* unpack low bytes of Src2 into words */
- "punpckhbw %%mm0, %%mm4 \n\t" /* unpack high bytes of Src2 into words */
- "pmullw %%mm3, %%mm1 \n\t" /* mul low bytes of Src1 and Src2 */
- "pmullw %%mm4, %%mm2 \n\t" /* mul high bytes of Src1 and Src2 */
- /* Take abs value of the results (signed words) */
- "movq %%mm1, %%mm5 \n\t" /* copy mm1 into mm5 */
- "movq %%mm2, %%mm6 \n\t" /* copy mm2 into mm6 */
- "psraw $15, %%mm5 \n\t" /* fill mm5 words with word sign bit */
- "psraw $15, %%mm6 \n\t" /* fill mm6 words with word sign bit */
- "pxor %%mm5, %%mm1 \n\t" /* take 1's compliment of only neg. words */
- "pxor %%mm6, %%mm2 \n\t" /* take 1's compliment of only neg. words */
- "psubsw %%mm5, %%mm1 \n\t" /* add 1 to only neg. words, W-(-1) or W-0 */
- "psubsw %%mm6, %%mm2 \n\t" /* add 1 to only neg. words, W-(-1) or W-0 */
- "packuswb %%mm2, %%mm1 \n\t" /* pack words back into bytes with saturation */
- "movq %%mm1, (%%edi) \n\t" /* store result in Dest */
- "add $8, %%eax \n\t" /* increase Src1, Src2 and Dest */
- "add $8, %%ebx \n\t" /* register pointers by 8 */
- "add $8, %%edi \n\t" "dec %%ecx \n\t" /* decrease loop counter */
- "jnz 1b \n\t" /* check loop termination, proceed if required */
- "emms \n\t" /* exit MMX state */
- "popa \n\t":"=m" (Dest) /* %0 */
- :"m"(Src2), /* %1 */
- "m"(Src1), /* %2 */
- "m"(SrcLength) /* %3 */
- );
- #endif
- return (0);
- #else
- return (-1);
- #endif
- }
-
- /*!
- \brief Filter using Mult: D = saturation255(S1 * S2)
-
- \param Src1 Pointer to the start of the first source byte array (S1).
- \param Src2 Pointer to the start of the second source byte array (S2).
- \param Dest Pointer to the start of the destination byte array (D).
- \param length The number of bytes in the source arrays.
-
- \return Returns 0 for success or -1 for error.
- */
- int SDL_imageFilterMult(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int length)
- {
- unsigned int i, istart;
- unsigned char *cursrc1, *cursrc2, *curdst;
- int result;
-
- /* Validate input parameters */
- if ((Src1 == NULL) || (Src2 == NULL) || (Dest == NULL))
- return(-1);
- if (length == 0)
- return(0);
-
- if ((SDL_imageFilterMMXdetect()) && (length > 7)) {
- /* MMX routine */
- SDL_imageFilterMultMMX(Src1, Src2, Dest, length);
-
- /* Check for unaligned bytes */
- if ((length & 7) > 0) {
- /* Setup to process unaligned bytes */
- istart = length & 0xfffffff8;
- cursrc1 = &Src1[istart];
- cursrc2 = &Src2[istart];
- curdst = &Dest[istart];
- } else {
- /* No unaligned bytes - we are done */
- return (0);
- }
- } else {
- /* Setup to process whole image */
- istart = 0;
- cursrc1 = Src1;
- cursrc2 = Src2;
- curdst = Dest;
- }
-
- /* C routine to process image */
- for (i = istart; i < length; i++) {
-
- /* NOTE: this is probably wrong - dunno what the MMX code does */
-
- result = (int) *cursrc1 * (int) *cursrc2;
- if (result > 255)
- result = 255;
- *curdst = (unsigned char) result;
- /* Advance pointers */
- cursrc1++;
- cursrc2++;
- curdst++;
- }
-
- return (0);
- }
-
- /*!
- \brief Internal ASM Filter using MultNor: D = S1 * S2
-
- \param Src1 Pointer to the start of the first source byte array (S1).
- \param Src2 Pointer to the start of the second source byte array (S2).
- \param Dest Pointer to the start of the destination byte array (D).
- \param SrcLength The number of bytes in the source arrays.
-
- \return Returns 0 for success or -1 for error.
- */
- int SDL_imageFilterMultNorASM(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int SrcLength)
- {
- #ifdef USE_MMX
- #if !defined(GCC__)
- __asm
- {
- pusha
- mov edx, Src1 /* load Src1 address into edx */
- mov esi, Src2 /* load Src2 address into esi */
- mov edi, Dest /* load Dest address into edi */
- mov ecx, SrcLength /* load loop counter (SIZE) into ecx */
- align 16 /* 16 byte alignment of the loop entry */
- L10141:
- mov al, [edx] /* load a byte from Src1 */
- mul [esi] /* mul with a byte from Src2 */
- mov [edi], al /* move a byte result to Dest */
- inc edx /* increment Src1, Src2, Dest */
- inc esi /* pointer registers by one */
- inc edi
- dec ecx /* decrease loop counter */
- jnz L10141 /* check loop termination, proceed if required */
- popa
- }
- #else
- asm volatile
- ("pusha \n\t" "mov %2, %%edx \n\t" /* load Src1 address into edx */
- "mov %1, %%esi \n\t" /* load Src2 address into esi */
- "mov %0, %%edi \n\t" /* load Dest address into edi */
- "mov %3, %%ecx \n\t" /* load loop counter (SIZE) into ecx */
- ".align 16 \n\t" /* 16 byte alignment of the loop entry */
- "1:mov (%%edx), %%al \n\t" /* load a byte from Src1 */
- "mulb (%%esi) \n\t" /* mul with a byte from Src2 */
- "mov %%al, (%%edi) \n\t" /* move a byte result to Dest */
- "inc %%edx \n\t" /* increment Src1, Src2, Dest */
- "inc %%esi \n\t" /* pointer registers by one */
- "inc %%edi \n\t" "dec %%ecx \n\t" /* decrease loop counter */
- "jnz 1b \n\t" /* check loop termination, proceed if required */
- "popa \n\t":"=m" (Dest) /* %0 */
- :"m"(Src2), /* %1 */
- "m"(Src1), /* %2 */
- "m"(SrcLength) /* %3 */
- );
- #endif
- return (0);
- #else
- return (-1);
- #endif
- }
-
- /*!
- \brief Filter using MultNor: D = S1 * S2
-
- \param Src1 Pointer to the start of the first source byte array (S1).
- \param Src2 Pointer to the start of the second source byte array (S2).
- \param Dest Pointer to the start of the destination byte array (D).
- \param length The number of bytes in the source arrays.
-
- \return Returns 0 for success or -1 for error.
- */
- int SDL_imageFilterMultNor(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int length)
- {
- unsigned int i, istart;
- unsigned char *cursrc1, *cursrc2, *curdst;
- int result;
-
- /* Validate input parameters */
- if ((Src1 == NULL) || (Src2 == NULL) || (Dest == NULL))
- return(-1);
- if (length == 0)
- return(0);
-
- if (SDL_imageFilterMMXdetect()) {
- if (length > 0) {
- /* ASM routine */
- SDL_imageFilterMultNorASM(Src1, Src2, Dest, length);
-
- /* Check for unaligned bytes */
- if ((length & 7) > 0) {
- /* Setup to process unaligned bytes */
- istart = length & 0xfffffff8;
- cursrc1 = &Src1[istart];
- cursrc2 = &Src2[istart];
- curdst = &Dest[istart];
- } else {
- /* No unaligned bytes - we are done */
- return (0);
- }
- } else {
- /* No bytes - we are done */
- return (0);
- }
- } else {
- /* Setup to process whole image */
- istart = 0;
- cursrc1 = Src1;
- cursrc2 = Src2;
- curdst = Dest;
- }
-
- /* C routine to process image */
- for (i = istart; i < length; i++) {
- result = (int) *cursrc1 * (int) *cursrc2;
- *curdst = (unsigned char) result;
- /* Advance pointers */
- cursrc1++;
- cursrc2++;
- curdst++;
- }
-
- return (0);
- }
-
- /*!
- \brief Internal MMX Filter using MultDivby2: D = saturation255(S1/2 * S2)
-
- \param Src1 Pointer to the start of the first source byte array (S1).
- \param Src2 Pointer to the start of the second source byte array (S2).
- \param Dest Pointer to the start of the destination byte array (D).
- \param SrcLength The number of bytes in the source arrays.
-
- \return Returns 0 for success or -1 for error.
- */
- int SDL_imageFilterMultDivby2MMX(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int SrcLength)
- {
- #ifdef USE_MMX
- #if !defined(GCC__)
- __asm
- {
- pusha
- mov eax, Src1 /* load Src1 address into eax */
- mov ebx, Src2 /* load Src2 address into ebx */
- mov edi, Dest /* load Dest address into edi */
- mov ecx, SrcLength /* load loop counter (SIZE) into ecx */
- shr ecx, 3 /* counter/8 (MMX loads 8 bytes at a time) */
- pxor mm0, mm0 /* zero mm0 register */
- align 16 /* 16 byte alignment of the loop entry */
- L1015:
- movq mm1, [eax] /* load 8 bytes from Src1 into mm1 */
- movq mm3, [ebx] /* load 8 bytes from Src2 into mm3 */
- movq mm2, mm1 /* copy mm1 into mm2 */
- movq mm4, mm3 /* copy mm3 into mm4 */
- punpcklbw mm1, mm0 /* unpack low bytes of Src1 into words */
- punpckhbw mm2, mm0 /* unpack high bytes of Src1 into words */
- punpcklbw mm3, mm0 /* unpack low bytes of Src2 into words */
- punpckhbw mm4, mm0 /* unpack high bytes of Src2 into words */
- psrlw mm1, 1 /* divide mm1 words by 2, Src1 low bytes */
- psrlw mm2, 1 /* divide mm2 words by 2, Src1 high bytes */
- pmullw mm1, mm3 /* mul low bytes of Src1 and Src2 */
- pmullw mm2, mm4 /* mul high bytes of Src1 and Src2 */
- packuswb mm1, mm2 /* pack words back into bytes with saturation */
- movq [edi], mm1 /* store result in Dest */
- add eax, 8 /* increase Src1, Src2 and Dest */
- add ebx, 8 /* register pointers by 8 */
- add edi, 8
- dec ecx /* decrease loop counter */
- jnz L1015 /* check loop termination, proceed if required */
- emms /* exit MMX state */
- popa
- }
- #else
- asm volatile
- ("pusha \n\t" "mov %2, %%eax \n\t" /* load Src1 address into eax */
- "mov %1, %%ebx \n\t" /* load Src2 address into ebx */
- "mov %0, %%edi \n\t" /* load Dest address into edi */
- "mov %3, %%ecx \n\t" /* load loop counter (SIZE) into ecx */
- "shr $3, %%ecx \n\t" /* counter/8 (MMX loads 8 bytes at a time) */
- "pxor %%mm0, %%mm0 \n\t" /* zero mm0 register */
- ".align 16 \n\t" /* 16 byte alignment of the loop entry */
- "1: movq (%%eax), %%mm1 \n\t" /* load 8 bytes from Src1 into mm1 */
- "movq (%%ebx), %%mm3 \n\t" /* load 8 bytes from Src2 into mm3 */
- "movq %%mm1, %%mm2 \n\t" /* copy mm1 into mm2 */
- "movq %%mm3, %%mm4 \n\t" /* copy mm3 into mm4 */
- "punpcklbw %%mm0, %%mm1 \n\t" /* unpack low bytes of Src1 into words */
- "punpckhbw %%mm0, %%mm2 \n\t" /* unpack high bytes of Src1 into words */
- "punpcklbw %%mm0, %%mm3 \n\t" /* unpack low bytes of Src2 into words */
- "punpckhbw %%mm0, %%mm4 \n\t" /* unpack high bytes of Src2 into words */
- "psrlw $1, %%mm1 \n\t" /* divide mm1 words by 2, Src1 low bytes */
- "psrlw $1, %%mm2 \n\t" /* divide mm2 words by 2, Src1 high bytes */
- "pmullw %%mm3, %%mm1 \n\t" /* mul low bytes of Src1 and Src2 */
- "pmullw %%mm4, %%mm2 \n\t" /* mul high bytes of Src1 and Src2 */
- "packuswb %%mm2, %%mm1 \n\t" /* pack words back into bytes with saturation */
- "movq %%mm1, (%%edi) \n\t" /* store result in Dest */
- "add $8, %%eax \n\t" /* increase Src1, Src2 and Dest */
- "add $8, %%ebx \n\t" /* register pointers by 8 */
- "add $8, %%edi \n\t" "dec %%ecx \n\t" /* decrease loop counter */
- "jnz 1b \n\t" /* check loop termination, proceed if required */
- "emms \n\t" /* exit MMX state */
- "popa \n\t":"=m" (Dest) /* %0 */
- :"m"(Src2), /* %1 */
- "m"(Src1), /* %2 */
- "m"(SrcLength) /* %3 */
- );
- #endif
- return (0);
- #else
- return (-1);
- #endif
- }
-
- /*!
- \brief Filter using MultDivby2: D = saturation255(S1/2 * S2)
-
- \param Src1 Pointer to the start of the first source byte array (S1).
- \param Src2 Pointer to the start of the second source byte array (S2).
- \param Dest Pointer to the start of the destination byte array (D).
- \param length The number of bytes in the source arrays.
-
- \return Returns 0 for success or -1 for error.
- */
- int SDL_imageFilterMultDivby2(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int length)
- {
- unsigned int i, istart;
- unsigned char *cursrc1, *cursrc2, *curdst;
- int result;
-
- /* Validate input parameters */
- if ((Src1 == NULL) || (Src2 == NULL) || (Dest == NULL))
- return(-1);
- if (length == 0)
- return(0);
-
- if ((SDL_imageFilterMMXdetect()) && (length > 7)) {
- /* MMX routine */
- SDL_imageFilterMultDivby2MMX(Src1, Src2, Dest, length);
-
- /* Check for unaligned bytes */
- if ((length & 7) > 0) {
- /* Setup to process unaligned bytes */
- istart = length & 0xfffffff8;
- cursrc1 = &Src1[istart];
- cursrc2 = &Src2[istart];
- curdst = &Dest[istart];
- } else {
- /* No unaligned bytes - we are done */
- return (0);
- }
- } else {
- /* Setup to process whole image */
- istart = 0;
- cursrc1 = Src1;
- cursrc2 = Src2;
- curdst = Dest;
- }
-
- /* C routine to process image */
- for (i = istart; i < length; i++) {
- result = ((int) *cursrc1 / 2) * (int) *cursrc2;
- if (result > 255)
- result = 255;
- *curdst = (unsigned char) result;
- /* Advance pointers */
- cursrc1++;
- cursrc2++;
- curdst++;
- }
-
- return (0);
- }
-
- /*!
- \brief Internal MMX Filter using MultDivby4: D = saturation255(S1/2 * S2/2)
-
- \param Src1 Pointer to the start of the first source byte array (S1).
- \param Src2 Pointer to the start of the second source byte array (S2).
- \param Dest Pointer to the start of the destination byte array (D).
- \param SrcLength The number of bytes in the source arrays.
-
- \return Returns 0 for success or -1 for error.
- */
- int SDL_imageFilterMultDivby4MMX(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int SrcLength)
- {
- #ifdef USE_MMX
- #if !defined(GCC__)
- __asm
- {
- pusha
- mov eax, Src1 /* load Src1 address into eax */
- mov ebx, Src2 /* load Src2 address into ebx */
- mov edi, Dest /* load Dest address into edi */
- mov ecx, SrcLength /* load loop counter (SIZE) into ecx */
- shr ecx, 3 /* counter/8 (MMX loads 8 bytes at a time) */
- pxor mm0, mm0 /* zero mm0 register */
- align 16 /* 16 byte alignment of the loop entry */
- L1016:
- movq mm1, [eax] /* load 8 bytes from Src1 into mm1 */
- movq mm3, [ebx] /* load 8 bytes from Src2 into mm3 */
- movq mm2, mm1 /* copy mm1 into mm2 */
- movq mm4, mm3 /* copy mm3 into mm4 */
- punpcklbw mm1, mm0 /* unpack low bytes of Src1 into words */
- punpckhbw mm2, mm0 /* unpack high bytes of Src1 into words */
- punpcklbw mm3, mm0 /* unpack low bytes of Src2 into words */
- punpckhbw mm4, mm0 /* unpack high bytes of Src2 into words */
- psrlw mm1, 1 /* divide mm1 words by 2, Src1 low bytes */
- psrlw mm2, 1 /* divide mm2 words by 2, Src1 high bytes */
- psrlw mm3, 1 /* divide mm3 words by 2, Src2 low bytes */
- psrlw mm4, 1 /* divide mm4 words by 2, Src2 high bytes */
- pmullw mm1, mm3 /* mul low bytes of Src1 and Src2 */
- pmullw mm2, mm4 /* mul high bytes of Src1 and Src2 */
- packuswb mm1, mm2 /* pack words back into bytes with saturation */
- movq [edi], mm1 /* store result in Dest */
- add eax, 8 /* increase Src1, Src2 and Dest */
- add ebx, 8 /* register pointers by 8 */
- add edi, 8
- dec ecx /* decrease loop counter */
- jnz L1016 /* check loop termination, proceed if required */
- emms /* exit MMX state */
- popa
- }
- #else
- asm volatile
- ("pusha \n\t" "mov %2, %%eax \n\t" /* load Src1 address into eax */
- "mov %1, %%ebx \n\t" /* load Src2 address into ebx */
- "mov %0, %%edi \n\t" /* load Dest address into edi */
- "mov %3, %%ecx \n\t" /* load loop counter (SIZE) into ecx */
- "shr $3, %%ecx \n\t" /* counter/8 (MMX loads 8 bytes at a time) */
- "pxor %%mm0, %%mm0 \n\t" /* zero mm0 register */
- ".align 16 \n\t" /* 16 byte alignment of the loop entry */
- "1: movq (%%eax), %%mm1 \n\t" /* load 8 bytes from Src1 into mm1 */
- "movq (%%ebx), %%mm3 \n\t" /* load 8 bytes from Src2 into mm3 */
- "movq %%mm1, %%mm2 \n\t" /* copy mm1 into mm2 */
- "movq %%mm3, %%mm4 \n\t" /* copy mm3 into mm4 */
- "punpcklbw %%mm0, %%mm1 \n\t" /* unpack low bytes of Src1 into words */
- "punpckhbw %%mm0, %%mm2 \n\t" /* unpack high bytes of Src1 into words */
- "punpcklbw %%mm0, %%mm3 \n\t" /* unpack low bytes of Src2 into words */
- "punpckhbw %%mm0, %%mm4 \n\t" /* unpack high bytes of Src2 into words */
- "psrlw $1, %%mm1 \n\t" /* divide mm1 words by 2, Src1 low bytes */
- "psrlw $1, %%mm2 \n\t" /* divide mm2 words by 2, Src1 high bytes */
- "psrlw $1, %%mm3 \n\t" /* divide mm3 words by 2, Src2 low bytes */
- "psrlw $1, %%mm4 \n\t" /* divide mm4 words by 2, Src2 high bytes */
- "pmullw %%mm3, %%mm1 \n\t" /* mul low bytes of Src1 and Src2 */
- "pmullw %%mm4, %%mm2 \n\t" /* mul high bytes of Src1 and Src2 */
- "packuswb %%mm2, %%mm1 \n\t" /* pack words back into bytes with saturation */
- "movq %%mm1, (%%edi) \n\t" /* store result in Dest */
- "add $8, %%eax \n\t" /* increase Src1, Src2 and Dest */
- "add $8, %%ebx \n\t" /* register pointers by 8 */
- "add $8, %%edi \n\t" "dec %%ecx \n\t" /* decrease loop counter */
- "jnz 1b \n\t" /* check loop termination, proceed if required */
- "emms \n\t" /* exit MMX state */
- "popa \n\t":"=m" (Dest) /* %0 */
- :"m"(Src2), /* %1 */
- "m"(Src1), /* %2 */
- "m"(SrcLength) /* %3 */
- );
- #endif
- return (0);
- #else
- return (-1);
- #endif
- }
-
- /*!
- \brief Filter using MultDivby4: D = saturation255(S1/2 * S2/2)
-
- \param Src1 Pointer to the start of the first source byte array (S1).
- \param Src2 Pointer to the start of the second source byte array (S2).
- \param Dest Pointer to the start of the destination byte array (D).
- \param length The number of bytes in the source arrays.
-
- \return Returns 0 for success or -1 for error.
- */
- int SDL_imageFilterMultDivby4(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int length)
- {
- unsigned int i, istart;
- unsigned char *cursrc1, *cursrc2, *curdst;
- int result;
-
- /* Validate input parameters */
- if ((Src1 == NULL) || (Src2 == NULL) || (Dest == NULL))
- return(-1);
- if (length == 0)
- return(0);
-
- if ((SDL_imageFilterMMXdetect()) && (length > 7)) {
- /* MMX routine */
- SDL_imageFilterMultDivby4MMX(Src1, Src2, Dest, length);
-
- /* Check for unaligned bytes */
- if ((length & 7) > 0) {
- /* Setup to process unaligned bytes */
- istart = length & 0xfffffff8;
- cursrc1 = &Src1[istart];
- cursrc2 = &Src2[istart];
- curdst = &Dest[istart];
- } else {
- /* No unaligned bytes - we are done */
- return (0);
- }
- } else {
- /* Setup to process whole image */
- istart = 0;
- cursrc1 = Src1;
- cursrc2 = Src2;
- curdst = Dest;
- }
-
- /* C routine to process image */
- for (i = istart; i < length; i++) {
- result = ((int) *cursrc1 / 2) * ((int) *cursrc2 / 2);
- if (result > 255)
- result = 255;
- *curdst = (unsigned char) result;
- /* Advance pointers */
- cursrc1++;
- cursrc2++;
- curdst++;
- }
-
- return (0);
- }
-
- /*!
- \brief Internal MMX Filter using BitAnd: D = S1 & S2
-
- \param Src1 Pointer to the start of the first source byte array (S1).
- \param Src2 Pointer to the start of the second source byte array (S2).
- \param Dest Pointer to the start of the destination byte array (D).
- \param SrcLength The number of bytes in the source arrays.
-
- \return Returns 0 for success or -1 for error.
- */
- int SDL_imageFilterBitAndMMX(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int SrcLength)
- {
- #ifdef USE_MMX
- #if !defined(GCC__)
- __asm
- {
- pusha
- mov eax, Src1 /* load Src1 address into eax */
- mov ebx, Src2 /* load Src2 address into ebx */
- mov edi, Dest /* load Dest address into edi */
- mov ecx, SrcLength /* load loop counter (SIZE) into ecx */
- shr ecx, 3 /* counter/8 (MMX loads 8 bytes at a time) */
- align 16 /* 16 byte alignment of the loop entry */
- L1017:
- movq mm1, [eax] /* load 8 bytes from Src1 into mm1 */
- pand mm1, [ebx] /* mm1=Src1&Src2 */
- movq [edi], mm1 /* store result in Dest */
- add eax, 8 /* increase Src1, Src2 and Dest */
- add ebx, 8 /* register pointers by 8 */
- add edi, 8
- dec ecx /* decrease loop counter */
- jnz L1017 /* check loop termination, proceed if required */
- emms /* exit MMX state */
- popa
- }
- #else
- asm volatile
- ("pusha \n\t" "mov %2, %%eax \n\t" /* load Src1 address into eax */
- "mov %1, %%ebx \n\t" /* load Src2 address into ebx */
- "mov %0, %%edi \n\t" /* load Dest address into edi */
- "mov %3, %%ecx \n\t" /* load loop counter (SIZE) into ecx */
- "shr $3, %%ecx \n\t" /* counter/8 (MMX loads 8 bytes at a time) */
- ".align 16 \n\t" /* 16 byte alignment of the loop entry */
- "1: movq (%%eax), %%mm1 \n\t" /* load 8 bytes from Src1 into mm1 */
- "pand (%%ebx), %%mm1 \n\t" /* mm1=Src1&Src2 */
- "movq %%mm1, (%%edi) \n\t" /* store result in Dest */
- "add $8, %%eax \n\t" /* increase Src1, Src2 and Dest */
- "add $8, %%ebx \n\t" /* register pointers by 8 */
- "add $8, %%edi \n\t" "dec %%ecx \n\t" /* decrease loop counter */
- "jnz 1b \n\t" /* check loop termination, proceed if required */
- "emms \n\t" /* exit MMX state */
- "popa \n\t":"=m" (Dest) /* %0 */
- :"m"(Src2), /* %1 */
- "m"(Src1), /* %2 */
- "m"(SrcLength) /* %3 */
- );
- #endif
- return (0);
- #else
- return (-1);
- #endif
- }
-
- /*!
- \brief Filter using BitAnd: D = S1 & S2
-
- \param Src1 Pointer to the start of the first source byte array (S1).
- \param Src2 Pointer to the start of the second source byte array (S2).
- \param Dest Pointer to the start of the destination byte array (D).
- \param length The number of bytes in the source arrays.
-
- \return Returns 0 for success or -1 for error.
- */
- int SDL_imageFilterBitAnd(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int length)
- {
- unsigned int i, istart;
- unsigned char *cursrc1, *cursrc2, *curdst;
-
- /* Validate input parameters */
- if ((Src1 == NULL) || (Src2 == NULL) || (Dest == NULL))
- return(-1);
- if (length == 0)
- return(0);
-
- if ((SDL_imageFilterMMXdetect()>0) && (length>7)) {
- /* if (length > 7) { */
- /* Call MMX routine */
-
- SDL_imageFilterBitAndMMX(Src1, Src2, Dest, length);
-
- /* Check for unaligned bytes */
- if ((length & 7) > 0) {
-
- /* Setup to process unaligned bytes */
- istart = length & 0xfffffff8;
- cursrc1 = &Src1[istart];
- cursrc2 = &Src2[istart];
- curdst = &Dest[istart];
- } else {
- /* No unaligned bytes - we are done */
- return (0);
- }
- } else {
- /* Setup to process whole image */
- istart = 0;
- cursrc1 = Src1;
- cursrc2 = Src2;
- curdst = Dest;
- }
-
- /* C routine to process image */
- for (i = istart; i < length; i++) {
- *curdst = (*cursrc1) & (*cursrc2);
- /* Advance pointers */
- cursrc1++;
- cursrc2++;
- curdst++;
- }
-
- return (0);
- }
-
- /*!
- \brief Internal MMX Filter using BitOr: D = S1 | S2
-
- \param Src1 Pointer to the start of the first source byte array (S1).
- \param Src2 Pointer to the start of the second source byte array (S2).
- \param Dest Pointer to the start of the destination byte array (D).
- \param SrcLength The number of bytes in the source arrays.
-
- \return Returns 0 for success or -1 for error.
- */
- int SDL_imageFilterBitOrMMX(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int SrcLength)
- {
- #ifdef USE_MMX
- #if !defined(GCC__)
- __asm
- {
- pusha
- mov eax, Src1 /* load Src1 address into eax */
- mov ebx, Src2 /* load Src2 address into ebx */
- mov edi, Dest /* load Dest address into edi */
- mov ecx, SrcLength /* load loop counter (SIZE) into ecx */
- shr ecx, 3 /* counter/8 (MMX loads 8 bytes at a time) */
- align 16 /* 16 byte alignment of the loop entry */
- L91017:
- movq mm1, [eax] /* load 8 bytes from Src1 into mm1 */
- por mm1, [ebx] /* mm1=Src1|Src2 */
- movq [edi], mm1 /* store result in Dest */
- add eax, 8 /* increase Src1, Src2 and Dest */
- add ebx, 8 /* register pointers by 8 */
- add edi, 8
- dec ecx /* decrease loop counter */
- jnz L91017 /* check loop termination, proceed if required */
- emms /* exit MMX state */
- popa
- }
- #else
- asm volatile
- ("pusha \n\t" "mov %2, %%eax \n\t" /* load Src1 address into eax */
- "mov %1, %%ebx \n\t" /* load Src2 address into ebx */
- "mov %0, %%edi \n\t" /* load Dest address into edi */
- "mov %3, %%ecx \n\t" /* load loop counter (SIZE) into ecx */
- "shr $3, %%ecx \n\t" /* counter/8 (MMX loads 8 bytes at a time) */
- ".align 16 \n\t" /* 16 byte alignment of the loop entry */
- "1: movq (%%eax), %%mm1 \n\t" /* load 8 bytes from Src1 into mm1 */
- "por (%%ebx), %%mm1 \n\t" /* mm1=Src1|Src2 */
- "movq %%mm1, (%%edi) \n\t" /* store result in Dest */
- "add $8, %%eax \n\t" /* increase Src1, Src2 and Dest */
- "add $8, %%ebx \n\t" /* register pointers by 8 */
- "add $8, %%edi \n\t" "dec %%ecx \n\t" /* decrease loop counter */
- "jnz 1b \n\t" /* check loop termination, proceed if required */
- "emms \n\t" /* exit MMX state */
- "popa \n\t":"=m" (Dest) /* %0 */
- :"m"(Src2), /* %1 */
- "m"(Src1), /* %2 */
- "m"(SrcLength) /* %3 */
- );
- #endif
- return (0);
- #else
- return (-1);
- #endif
- }
-
- /*!
- \brief Filter using BitOr: D = S1 | S2
-
- \param Src1 Pointer to the start of the first source byte array (S1).
- \param Src2 Pointer to the start of the second source byte array (S2).
- \param Dest Pointer to the start of the destination byte array (D).
- \param length The number of bytes in the source arrays.
-
- \return Returns 0 for success or -1 for error.
- */
- int SDL_imageFilterBitOr(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int length)
- {
- unsigned int i, istart;
- unsigned char *cursrc1, *cursrc2, *curdst;
-
- /* Validate input parameters */
- if ((Src1 == NULL) || (Src2 == NULL) || (Dest == NULL))
- return(-1);
- if (length == 0)
- return(0);
-
- if ((SDL_imageFilterMMXdetect()) && (length > 7)) {
-
- /* MMX routine */
- SDL_imageFilterBitOrMMX(Src1, Src2, Dest, length);
-
- /* Check for unaligned bytes */
- if ((length & 7) > 0) {
- /* Setup to process unaligned bytes */
- istart = length & 0xfffffff8;
- cursrc1 = &Src1[istart];
- cursrc2 = &Src2[istart];
- curdst = &Dest[istart];
- } else {
- /*…