PageRenderTime 88ms CodeModel.GetById 12ms app.highlight 65ms RepoModel.GetById 1ms app.codeStats 0ms

/project/jni/sdl-1.3/src/video/SDL_blit_N.c

https://github.com/aichunyu/FFPlayer
C | 2514 lines | 2155 code | 165 blank | 194 comment | 208 complexity | d4b695a71205fc371eeb8f9b6ca443d1 MD5 | raw file

Large files files are truncated, but you can click here to view the full file

   1/*
   2  Simple DirectMedia Layer
   3  Copyright (C) 1997-2012 Sam Lantinga <slouken@libsdl.org>
   4
   5  This software is provided 'as-is', without any express or implied
   6  warranty.  In no event will the authors be held liable for any damages
   7  arising from the use of this software.
   8
   9  Permission is granted to anyone to use this software for any purpose,
  10  including commercial applications, and to alter it and redistribute it
  11  freely, subject to the following restrictions:
  12
  13  1. The origin of this software must not be misrepresented; you must not
  14     claim that you wrote the original software. If you use this software
  15     in a product, an acknowledgment in the product documentation would be
  16     appreciated but is not required.
  17  2. Altered source versions must be plainly marked as such, and must not be
  18     misrepresented as being the original software.
  19  3. This notice may not be removed or altered from any source distribution.
  20*/
  21#include "SDL_config.h"
  22
  23#include "SDL_video.h"
  24#include "SDL_endian.h"
  25#include "SDL_cpuinfo.h"
  26#include "SDL_blit.h"
  27
  28#include "SDL_assert.h"
  29
  30/* Functions to blit from N-bit surfaces to other surfaces */
  31
  32#if SDL_ALTIVEC_BLITTERS
  33#ifdef HAVE_ALTIVEC_H
  34#include <altivec.h>
  35#endif
  36#ifdef __MACOSX__
  37#include <sys/sysctl.h>
  38static size_t
  39GetL3CacheSize(void)
  40{
  41    const char key[] = "hw.l3cachesize";
  42    u_int64_t result = 0;
  43    size_t typeSize = sizeof(result);
  44
  45
  46    int err = sysctlbyname(key, &result, &typeSize, NULL, 0);
  47    if (0 != err)
  48        return 0;
  49
  50    return result;
  51}
  52#else
  53static size_t
  54GetL3CacheSize(void)
  55{
  56    /* XXX: Just guess G4 */
  57    return 2097152;
  58}
  59#endif /* __MACOSX__ */
  60
  61#if (defined(__MACOSX__) && (__GNUC__ < 4))
  62#define VECUINT8_LITERAL(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) \
  63        (vector unsigned char) ( a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p )
  64#define VECUINT16_LITERAL(a,b,c,d,e,f,g,h) \
  65        (vector unsigned short) ( a,b,c,d,e,f,g,h )
  66#else
  67#define VECUINT8_LITERAL(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) \
  68        (vector unsigned char) { a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p }
  69#define VECUINT16_LITERAL(a,b,c,d,e,f,g,h) \
  70        (vector unsigned short) { a,b,c,d,e,f,g,h }
  71#endif
  72
  73#define UNALIGNED_PTR(x) (((size_t) x) & 0x0000000F)
  74#define VSWIZZLE32(a,b,c,d) (vector unsigned char) \
  75                               ( 0x00+a, 0x00+b, 0x00+c, 0x00+d, \
  76                                 0x04+a, 0x04+b, 0x04+c, 0x04+d, \
  77                                 0x08+a, 0x08+b, 0x08+c, 0x08+d, \
  78                                 0x0C+a, 0x0C+b, 0x0C+c, 0x0C+d )
  79
  80#define MAKE8888(dstfmt, r, g, b, a)  \
  81    ( ((r<<dstfmt->Rshift)&dstfmt->Rmask) | \
  82      ((g<<dstfmt->Gshift)&dstfmt->Gmask) | \
  83      ((b<<dstfmt->Bshift)&dstfmt->Bmask) | \
  84      ((a<<dstfmt->Ashift)&dstfmt->Amask) )
  85
  86/*
  87 * Data Stream Touch...Altivec cache prefetching.
  88 *
  89 *  Don't use this on a G5...however, the speed boost is very significant
  90 *   on a G4.
  91 */
  92#define DST_CHAN_SRC 1
  93#define DST_CHAN_DEST 2
  94
  95/* macro to set DST control word value... */
  96#define DST_CTRL(size, count, stride) \
  97    (((size) << 24) | ((count) << 16) | (stride))
  98
  99#define VEC_ALIGNER(src) ((UNALIGNED_PTR(src)) \
 100    ? vec_lvsl(0, src) \
 101    : vec_add(vec_lvsl(8, src), vec_splat_u8(8)))
 102
 103/* Calculate the permute vector used for 32->32 swizzling */
 104static vector unsigned char
 105calc_swizzle32(const SDL_PixelFormat * srcfmt, const SDL_PixelFormat * dstfmt)
 106{
 107    /*
 108     * We have to assume that the bits that aren't used by other
 109     *  colors is alpha, and it's one complete byte, since some formats
 110     *  leave alpha with a zero mask, but we should still swizzle the bits.
 111     */
 112    /* ARGB */
 113    const static const struct SDL_PixelFormat default_pixel_format = {
 114        0, NULL, 0, 0,
 115        {0, 0},
 116        0x00FF0000, 0x0000FF00, 0x000000FF, 0xFF000000,
 117        0, 0, 0, 0,
 118        16, 8, 0, 24,
 119        0, NULL
 120    };
 121    if (!srcfmt) {
 122        srcfmt = &default_pixel_format;
 123    }
 124    if (!dstfmt) {
 125        dstfmt = &default_pixel_format;
 126    }
 127    const vector unsigned char plus = VECUINT8_LITERAL(0x00, 0x00, 0x00, 0x00,
 128                                                       0x04, 0x04, 0x04, 0x04,
 129                                                       0x08, 0x08, 0x08, 0x08,
 130                                                       0x0C, 0x0C, 0x0C,
 131                                                       0x0C);
 132    vector unsigned char vswiz;
 133    vector unsigned int srcvec;
 134#define RESHIFT(X) (3 - ((X) >> 3))
 135    Uint32 rmask = RESHIFT(srcfmt->Rshift) << (dstfmt->Rshift);
 136    Uint32 gmask = RESHIFT(srcfmt->Gshift) << (dstfmt->Gshift);
 137    Uint32 bmask = RESHIFT(srcfmt->Bshift) << (dstfmt->Bshift);
 138    Uint32 amask;
 139    /* Use zero for alpha if either surface doesn't have alpha */
 140    if (dstfmt->Amask) {
 141        amask =
 142            ((srcfmt->Amask) ? RESHIFT(srcfmt->
 143                                       Ashift) : 0x10) << (dstfmt->Ashift);
 144    } else {
 145        amask =
 146            0x10101010 & ((dstfmt->Rmask | dstfmt->Gmask | dstfmt->Bmask) ^
 147                          0xFFFFFFFF);
 148    }
 149#undef RESHIFT
 150    ((unsigned int *) (char *) &srcvec)[0] = (rmask | gmask | bmask | amask);
 151    vswiz = vec_add(plus, (vector unsigned char) vec_splat(srcvec, 0));
 152    return (vswiz);
 153}
 154
 155static void Blit_RGB888_RGB565(SDL_BlitInfo * info);
 156static void
 157Blit_RGB888_RGB565Altivec(SDL_BlitInfo * info)
 158{
 159    int height = info->dst_h;
 160    Uint8 *src = (Uint8 *) info->src;
 161    int srcskip = info->src_skip;
 162    Uint8 *dst = (Uint8 *) info->dst;
 163    int dstskip = info->dst_skip;
 164    SDL_PixelFormat *srcfmt = info->src_fmt;
 165    vector unsigned char valpha = vec_splat_u8(0);
 166    vector unsigned char vpermute = calc_swizzle32(srcfmt, NULL);
 167    vector unsigned char vgmerge = VECUINT8_LITERAL(0x00, 0x02, 0x00, 0x06,
 168                                                    0x00, 0x0a, 0x00, 0x0e,
 169                                                    0x00, 0x12, 0x00, 0x16,
 170                                                    0x00, 0x1a, 0x00, 0x1e);
 171    vector unsigned short v1 = vec_splat_u16(1);
 172    vector unsigned short v3 = vec_splat_u16(3);
 173    vector unsigned short v3f =
 174        VECUINT16_LITERAL(0x003f, 0x003f, 0x003f, 0x003f,
 175                          0x003f, 0x003f, 0x003f, 0x003f);
 176    vector unsigned short vfc =
 177        VECUINT16_LITERAL(0x00fc, 0x00fc, 0x00fc, 0x00fc,
 178                          0x00fc, 0x00fc, 0x00fc, 0x00fc);
 179    vector unsigned short vf800 = (vector unsigned short) vec_splat_u8(-7);
 180    vf800 = vec_sl(vf800, vec_splat_u16(8));
 181
 182    while (height--) {
 183        vector unsigned char valigner;
 184        vector unsigned char voverflow;
 185        vector unsigned char vsrc;
 186
 187        int width = info->dst_w;
 188        int extrawidth;
 189
 190        /* do scalar until we can align... */
 191#define ONE_PIXEL_BLEND(condition, widthvar) \
 192        while (condition) { \
 193            Uint32 Pixel; \
 194            unsigned sR, sG, sB, sA; \
 195            DISEMBLE_RGBA((Uint8 *)src, 4, srcfmt, Pixel, \
 196                          sR, sG, sB, sA); \
 197            *(Uint16 *)(dst) = (((sR << 8) & 0x0000F800) | \
 198                                ((sG << 3) & 0x000007E0) | \
 199                                ((sB >> 3) & 0x0000001F)); \
 200            dst += 2; \
 201            src += 4; \
 202            widthvar--; \
 203        }
 204
 205        ONE_PIXEL_BLEND(((UNALIGNED_PTR(dst)) && (width)), width);
 206
 207        /* After all that work, here's the vector part! */
 208        extrawidth = (width % 8);       /* trailing unaligned stores */
 209        width -= extrawidth;
 210        vsrc = vec_ld(0, src);
 211        valigner = VEC_ALIGNER(src);
 212
 213        while (width) {
 214            vector unsigned short vpixel, vrpixel, vgpixel, vbpixel;
 215            vector unsigned int vsrc1, vsrc2;
 216            vector unsigned char vdst;
 217
 218            voverflow = vec_ld(15, src);
 219            vsrc = vec_perm(vsrc, voverflow, valigner);
 220            vsrc1 = (vector unsigned int) vec_perm(vsrc, valpha, vpermute);
 221            src += 16;
 222            vsrc = voverflow;
 223            voverflow = vec_ld(15, src);
 224            vsrc = vec_perm(vsrc, voverflow, valigner);
 225            vsrc2 = (vector unsigned int) vec_perm(vsrc, valpha, vpermute);
 226            /* 1555 */
 227            vpixel = (vector unsigned short) vec_packpx(vsrc1, vsrc2);
 228            vgpixel = (vector unsigned short) vec_perm(vsrc1, vsrc2, vgmerge);
 229            vgpixel = vec_and(vgpixel, vfc);
 230            vgpixel = vec_sl(vgpixel, v3);
 231            vrpixel = vec_sl(vpixel, v1);
 232            vrpixel = vec_and(vrpixel, vf800);
 233            vbpixel = vec_and(vpixel, v3f);
 234            vdst =
 235                vec_or((vector unsigned char) vrpixel,
 236                       (vector unsigned char) vgpixel);
 237            /* 565 */
 238            vdst = vec_or(vdst, (vector unsigned char) vbpixel);
 239            vec_st(vdst, 0, dst);
 240
 241            width -= 8;
 242            src += 16;
 243            dst += 16;
 244            vsrc = voverflow;
 245        }
 246
 247        SDL_assert(width == 0);
 248
 249        /* do scalar until we can align... */
 250        ONE_PIXEL_BLEND((extrawidth), extrawidth);
 251#undef ONE_PIXEL_BLEND
 252
 253        src += srcskip;         /* move to next row, accounting for pitch. */
 254        dst += dstskip;
 255    }
 256
 257
 258}
 259
 260static void
 261Blit_RGB565_32Altivec(SDL_BlitInfo * info)
 262{
 263    int height = info->dst_h;
 264    Uint8 *src = (Uint8 *) info->src;
 265    int srcskip = info->src_skip;
 266    Uint8 *dst = (Uint8 *) info->dst;
 267    int dstskip = info->dst_skip;
 268    SDL_PixelFormat *srcfmt = info->src_fmt;
 269    SDL_PixelFormat *dstfmt = info->dst_fmt;
 270    unsigned alpha;
 271    vector unsigned char valpha;
 272    vector unsigned char vpermute;
 273    vector unsigned short vf800;
 274    vector unsigned int v8 = vec_splat_u32(8);
 275    vector unsigned int v16 = vec_add(v8, v8);
 276    vector unsigned short v2 = vec_splat_u16(2);
 277    vector unsigned short v3 = vec_splat_u16(3);
 278    /* 
 279       0x10 - 0x1f is the alpha
 280       0x00 - 0x0e evens are the red
 281       0x01 - 0x0f odds are zero
 282     */
 283    vector unsigned char vredalpha1 = VECUINT8_LITERAL(0x10, 0x00, 0x01, 0x01,
 284                                                       0x10, 0x02, 0x01, 0x01,
 285                                                       0x10, 0x04, 0x01, 0x01,
 286                                                       0x10, 0x06, 0x01,
 287                                                       0x01);
 288    vector unsigned char vredalpha2 =
 289        (vector unsigned
 290         char) (vec_add((vector unsigned int) vredalpha1, vec_sl(v8, v16))
 291        );
 292    /*
 293       0x00 - 0x0f is ARxx ARxx ARxx ARxx
 294       0x11 - 0x0f odds are blue
 295     */
 296    vector unsigned char vblue1 = VECUINT8_LITERAL(0x00, 0x01, 0x02, 0x11,
 297                                                   0x04, 0x05, 0x06, 0x13,
 298                                                   0x08, 0x09, 0x0a, 0x15,
 299                                                   0x0c, 0x0d, 0x0e, 0x17);
 300    vector unsigned char vblue2 =
 301        (vector unsigned char) (vec_add((vector unsigned int) vblue1, v8)
 302        );
 303    /*
 304       0x00 - 0x0f is ARxB ARxB ARxB ARxB
 305       0x10 - 0x0e evens are green
 306     */
 307    vector unsigned char vgreen1 = VECUINT8_LITERAL(0x00, 0x01, 0x10, 0x03,
 308                                                    0x04, 0x05, 0x12, 0x07,
 309                                                    0x08, 0x09, 0x14, 0x0b,
 310                                                    0x0c, 0x0d, 0x16, 0x0f);
 311    vector unsigned char vgreen2 =
 312        (vector unsigned
 313         char) (vec_add((vector unsigned int) vgreen1, vec_sl(v8, v8))
 314        );
 315
 316    SDL_assert(srcfmt->BytesPerPixel == 2);
 317    SDL_assert(dstfmt->BytesPerPixel == 4);
 318
 319    vf800 = (vector unsigned short) vec_splat_u8(-7);
 320    vf800 = vec_sl(vf800, vec_splat_u16(8));
 321
 322    if (dstfmt->Amask && info->a) {
 323        ((unsigned char *) &valpha)[0] = alpha = info->a;
 324        valpha = vec_splat(valpha, 0);
 325    } else {
 326        alpha = 0;
 327        valpha = vec_splat_u8(0);
 328    }
 329
 330    vpermute = calc_swizzle32(NULL, dstfmt);
 331    while (height--) {
 332        vector unsigned char valigner;
 333        vector unsigned char voverflow;
 334        vector unsigned char vsrc;
 335
 336        int width = info->dst_w;
 337        int extrawidth;
 338
 339        /* do scalar until we can align... */
 340#define ONE_PIXEL_BLEND(condition, widthvar) \
 341        while (condition) { \
 342            unsigned sR, sG, sB; \
 343            unsigned short Pixel = *((unsigned short *)src); \
 344            sR = (Pixel >> 8) & 0xf8; \
 345            sG = (Pixel >> 3) & 0xfc; \
 346            sB = (Pixel << 3) & 0xf8; \
 347            ASSEMBLE_RGBA(dst, 4, dstfmt, sR, sG, sB, alpha); \
 348            src += 2; \
 349            dst += 4; \
 350            widthvar--; \
 351        }
 352        ONE_PIXEL_BLEND(((UNALIGNED_PTR(dst)) && (width)), width);
 353
 354        /* After all that work, here's the vector part! */
 355        extrawidth = (width % 8);       /* trailing unaligned stores */
 356        width -= extrawidth;
 357        vsrc = vec_ld(0, src);
 358        valigner = VEC_ALIGNER(src);
 359
 360        while (width) {
 361            vector unsigned short vR, vG, vB;
 362            vector unsigned char vdst1, vdst2;
 363
 364            voverflow = vec_ld(15, src);
 365            vsrc = vec_perm(vsrc, voverflow, valigner);
 366
 367            vR = vec_and((vector unsigned short) vsrc, vf800);
 368            vB = vec_sl((vector unsigned short) vsrc, v3);
 369            vG = vec_sl(vB, v2);
 370
 371            vdst1 =
 372                (vector unsigned char) vec_perm((vector unsigned char) vR,
 373                                                valpha, vredalpha1);
 374            vdst1 = vec_perm(vdst1, (vector unsigned char) vB, vblue1);
 375            vdst1 = vec_perm(vdst1, (vector unsigned char) vG, vgreen1);
 376            vdst1 = vec_perm(vdst1, valpha, vpermute);
 377            vec_st(vdst1, 0, dst);
 378
 379            vdst2 =
 380                (vector unsigned char) vec_perm((vector unsigned char) vR,
 381                                                valpha, vredalpha2);
 382            vdst2 = vec_perm(vdst2, (vector unsigned char) vB, vblue2);
 383            vdst2 = vec_perm(vdst2, (vector unsigned char) vG, vgreen2);
 384            vdst2 = vec_perm(vdst2, valpha, vpermute);
 385            vec_st(vdst2, 16, dst);
 386
 387            width -= 8;
 388            dst += 32;
 389            src += 16;
 390            vsrc = voverflow;
 391        }
 392
 393        SDL_assert(width == 0);
 394
 395
 396        /* do scalar until we can align... */
 397        ONE_PIXEL_BLEND((extrawidth), extrawidth);
 398#undef ONE_PIXEL_BLEND
 399
 400        src += srcskip;         /* move to next row, accounting for pitch. */
 401        dst += dstskip;
 402    }
 403
 404}
 405
 406
 407static void
 408Blit_RGB555_32Altivec(SDL_BlitInfo * info)
 409{
 410    int height = info->dst_h;
 411    Uint8 *src = (Uint8 *) info->src;
 412    int srcskip = info->src_skip;
 413    Uint8 *dst = (Uint8 *) info->dst;
 414    int dstskip = info->dst_skip;
 415    SDL_PixelFormat *srcfmt = info->src_fmt;
 416    SDL_PixelFormat *dstfmt = info->dst_fmt;
 417    unsigned alpha;
 418    vector unsigned char valpha;
 419    vector unsigned char vpermute;
 420    vector unsigned short vf800;
 421    vector unsigned int v8 = vec_splat_u32(8);
 422    vector unsigned int v16 = vec_add(v8, v8);
 423    vector unsigned short v1 = vec_splat_u16(1);
 424    vector unsigned short v3 = vec_splat_u16(3);
 425    /* 
 426       0x10 - 0x1f is the alpha
 427       0x00 - 0x0e evens are the red
 428       0x01 - 0x0f odds are zero
 429     */
 430    vector unsigned char vredalpha1 = VECUINT8_LITERAL(0x10, 0x00, 0x01, 0x01,
 431                                                       0x10, 0x02, 0x01, 0x01,
 432                                                       0x10, 0x04, 0x01, 0x01,
 433                                                       0x10, 0x06, 0x01,
 434                                                       0x01);
 435    vector unsigned char vredalpha2 =
 436        (vector unsigned
 437         char) (vec_add((vector unsigned int) vredalpha1, vec_sl(v8, v16))
 438        );
 439    /*
 440       0x00 - 0x0f is ARxx ARxx ARxx ARxx
 441       0x11 - 0x0f odds are blue
 442     */
 443    vector unsigned char vblue1 = VECUINT8_LITERAL(0x00, 0x01, 0x02, 0x11,
 444                                                   0x04, 0x05, 0x06, 0x13,
 445                                                   0x08, 0x09, 0x0a, 0x15,
 446                                                   0x0c, 0x0d, 0x0e, 0x17);
 447    vector unsigned char vblue2 =
 448        (vector unsigned char) (vec_add((vector unsigned int) vblue1, v8)
 449        );
 450    /*
 451       0x00 - 0x0f is ARxB ARxB ARxB ARxB
 452       0x10 - 0x0e evens are green
 453     */
 454    vector unsigned char vgreen1 = VECUINT8_LITERAL(0x00, 0x01, 0x10, 0x03,
 455                                                    0x04, 0x05, 0x12, 0x07,
 456                                                    0x08, 0x09, 0x14, 0x0b,
 457                                                    0x0c, 0x0d, 0x16, 0x0f);
 458    vector unsigned char vgreen2 =
 459        (vector unsigned
 460         char) (vec_add((vector unsigned int) vgreen1, vec_sl(v8, v8))
 461        );
 462
 463    SDL_assert(srcfmt->BytesPerPixel == 2);
 464    SDL_assert(dstfmt->BytesPerPixel == 4);
 465
 466    vf800 = (vector unsigned short) vec_splat_u8(-7);
 467    vf800 = vec_sl(vf800, vec_splat_u16(8));
 468
 469    if (dstfmt->Amask && info->a) {
 470        ((unsigned char *) &valpha)[0] = alpha = info->a;
 471        valpha = vec_splat(valpha, 0);
 472    } else {
 473        alpha = 0;
 474        valpha = vec_splat_u8(0);
 475    }
 476
 477    vpermute = calc_swizzle32(NULL, dstfmt);
 478    while (height--) {
 479        vector unsigned char valigner;
 480        vector unsigned char voverflow;
 481        vector unsigned char vsrc;
 482
 483        int width = info->dst_w;
 484        int extrawidth;
 485
 486        /* do scalar until we can align... */
 487#define ONE_PIXEL_BLEND(condition, widthvar) \
 488        while (condition) { \
 489            unsigned sR, sG, sB; \
 490            unsigned short Pixel = *((unsigned short *)src); \
 491            sR = (Pixel >> 7) & 0xf8; \
 492            sG = (Pixel >> 2) & 0xf8; \
 493            sB = (Pixel << 3) & 0xf8; \
 494            ASSEMBLE_RGBA(dst, 4, dstfmt, sR, sG, sB, alpha); \
 495            src += 2; \
 496            dst += 4; \
 497            widthvar--; \
 498        }
 499        ONE_PIXEL_BLEND(((UNALIGNED_PTR(dst)) && (width)), width);
 500
 501        /* After all that work, here's the vector part! */
 502        extrawidth = (width % 8);       /* trailing unaligned stores */
 503        width -= extrawidth;
 504        vsrc = vec_ld(0, src);
 505        valigner = VEC_ALIGNER(src);
 506
 507        while (width) {
 508            vector unsigned short vR, vG, vB;
 509            vector unsigned char vdst1, vdst2;
 510
 511            voverflow = vec_ld(15, src);
 512            vsrc = vec_perm(vsrc, voverflow, valigner);
 513
 514            vR = vec_and(vec_sl((vector unsigned short) vsrc, v1), vf800);
 515            vB = vec_sl((vector unsigned short) vsrc, v3);
 516            vG = vec_sl(vB, v3);
 517
 518            vdst1 =
 519                (vector unsigned char) vec_perm((vector unsigned char) vR,
 520                                                valpha, vredalpha1);
 521            vdst1 = vec_perm(vdst1, (vector unsigned char) vB, vblue1);
 522            vdst1 = vec_perm(vdst1, (vector unsigned char) vG, vgreen1);
 523            vdst1 = vec_perm(vdst1, valpha, vpermute);
 524            vec_st(vdst1, 0, dst);
 525
 526            vdst2 =
 527                (vector unsigned char) vec_perm((vector unsigned char) vR,
 528                                                valpha, vredalpha2);
 529            vdst2 = vec_perm(vdst2, (vector unsigned char) vB, vblue2);
 530            vdst2 = vec_perm(vdst2, (vector unsigned char) vG, vgreen2);
 531            vdst2 = vec_perm(vdst2, valpha, vpermute);
 532            vec_st(vdst2, 16, dst);
 533
 534            width -= 8;
 535            dst += 32;
 536            src += 16;
 537            vsrc = voverflow;
 538        }
 539
 540        SDL_assert(width == 0);
 541
 542
 543        /* do scalar until we can align... */
 544        ONE_PIXEL_BLEND((extrawidth), extrawidth);
 545#undef ONE_PIXEL_BLEND
 546
 547        src += srcskip;         /* move to next row, accounting for pitch. */
 548        dst += dstskip;
 549    }
 550
 551}
 552
 553static void BlitNtoNKey(SDL_BlitInfo * info);
 554static void BlitNtoNKeyCopyAlpha(SDL_BlitInfo * info);
 555static void
 556Blit32to32KeyAltivec(SDL_BlitInfo * info)
 557{
 558    int height = info->dst_h;
 559    Uint32 *srcp = (Uint32 *) info->src;
 560    int srcskip = info->src_skip / 4;
 561    Uint32 *dstp = (Uint32 *) info->dst;
 562    int dstskip = info->dst_skip / 4;
 563    SDL_PixelFormat *srcfmt = info->src_fmt;
 564    int srcbpp = srcfmt->BytesPerPixel;
 565    SDL_PixelFormat *dstfmt = info->dst_fmt;
 566    int dstbpp = dstfmt->BytesPerPixel;
 567    int copy_alpha = (srcfmt->Amask && dstfmt->Amask);
 568    unsigned alpha = dstfmt->Amask ? info->a : 0;
 569    Uint32 rgbmask = srcfmt->Rmask | srcfmt->Gmask | srcfmt->Bmask;
 570    Uint32 ckey = info->colorkey;
 571    vector unsigned int valpha;
 572    vector unsigned char vpermute;
 573    vector unsigned char vzero;
 574    vector unsigned int vckey;
 575    vector unsigned int vrgbmask;
 576    vpermute = calc_swizzle32(srcfmt, dstfmt);
 577    if (info->dst_w < 16) {
 578        if (copy_alpha) {
 579            BlitNtoNKeyCopyAlpha(info);
 580        } else {
 581            BlitNtoNKey(info);
 582        }
 583        return;
 584    }
 585    vzero = vec_splat_u8(0);
 586    if (alpha) {
 587        ((unsigned char *) &valpha)[0] = (unsigned char) alpha;
 588        valpha =
 589            (vector unsigned int) vec_splat((vector unsigned char) valpha, 0);
 590    } else {
 591        valpha = (vector unsigned int) vzero;
 592    }
 593    ckey &= rgbmask;
 594    ((unsigned int *) (char *) &vckey)[0] = ckey;
 595    vckey = vec_splat(vckey, 0);
 596    ((unsigned int *) (char *) &vrgbmask)[0] = rgbmask;
 597    vrgbmask = vec_splat(vrgbmask, 0);
 598
 599    while (height--) {
 600#define ONE_PIXEL_BLEND(condition, widthvar) \
 601        if (copy_alpha) { \
 602            while (condition) { \
 603                Uint32 Pixel; \
 604                unsigned sR, sG, sB, sA; \
 605                DISEMBLE_RGBA((Uint8 *)srcp, srcbpp, srcfmt, Pixel, \
 606                          sR, sG, sB, sA); \
 607                if ( (Pixel & rgbmask) != ckey ) { \
 608                      ASSEMBLE_RGBA((Uint8 *)dstp, dstbpp, dstfmt, \
 609                            sR, sG, sB, sA); \
 610                } \
 611                dstp = (Uint32 *) (((Uint8 *) dstp) + dstbpp); \
 612                srcp = (Uint32 *) (((Uint8 *) srcp) + srcbpp); \
 613                widthvar--; \
 614            } \
 615        } else { \
 616            while (condition) { \
 617                Uint32 Pixel; \
 618                unsigned sR, sG, sB; \
 619                RETRIEVE_RGB_PIXEL((Uint8 *)srcp, srcbpp, Pixel); \
 620                if ( Pixel != ckey ) { \
 621                    RGB_FROM_PIXEL(Pixel, srcfmt, sR, sG, sB); \
 622                    ASSEMBLE_RGBA((Uint8 *)dstp, dstbpp, dstfmt, \
 623                              sR, sG, sB, alpha); \
 624                } \
 625                dstp = (Uint32 *) (((Uint8 *)dstp) + dstbpp); \
 626                srcp = (Uint32 *) (((Uint8 *)srcp) + srcbpp); \
 627                widthvar--; \
 628            } \
 629        }
 630        int width = info->dst_w;
 631        ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width);
 632        SDL_assert(width > 0);
 633        if (width > 0) {
 634            int extrawidth = (width % 4);
 635            vector unsigned char valigner = VEC_ALIGNER(srcp);
 636            vector unsigned int vs = vec_ld(0, srcp);
 637            width -= extrawidth;
 638            SDL_assert(width >= 4);
 639            while (width) {
 640                vector unsigned char vsel;
 641                vector unsigned int vd;
 642                vector unsigned int voverflow = vec_ld(15, srcp);
 643                /* load the source vec */
 644                vs = vec_perm(vs, voverflow, valigner);
 645                /* vsel is set for items that match the key */
 646                vsel = (vector unsigned char) vec_and(vs, vrgbmask);
 647                vsel = (vector unsigned char) vec_cmpeq(vs, vckey);
 648                /* permute the src vec to the dest format */
 649                vs = vec_perm(vs, valpha, vpermute);
 650                /* load the destination vec */
 651                vd = vec_ld(0, dstp);
 652                /* select the source and dest into vs */
 653                vd = (vector unsigned int) vec_sel((vector unsigned char) vs,
 654                                                   (vector unsigned char) vd,
 655                                                   vsel);
 656
 657                vec_st(vd, 0, dstp);
 658                srcp += 4;
 659                width -= 4;
 660                dstp += 4;
 661                vs = voverflow;
 662            }
 663            ONE_PIXEL_BLEND((extrawidth), extrawidth);
 664#undef ONE_PIXEL_BLEND
 665            srcp += srcskip;
 666            dstp += dstskip;
 667        }
 668    }
 669}
 670
 671/* Altivec code to swizzle one 32-bit surface to a different 32-bit format. */
 672/* Use this on a G5 */
 673static void
 674ConvertAltivec32to32_noprefetch(SDL_BlitInfo * info)
 675{
 676    int height = info->dst_h;
 677    Uint32 *src = (Uint32 *) info->src;
 678    int srcskip = info->src_skip / 4;
 679    Uint32 *dst = (Uint32 *) info->dst;
 680    int dstskip = info->dst_skip / 4;
 681    SDL_PixelFormat *srcfmt = info->src_fmt;
 682    SDL_PixelFormat *dstfmt = info->dst_fmt;
 683    vector unsigned int vzero = vec_splat_u32(0);
 684    vector unsigned char vpermute = calc_swizzle32(srcfmt, dstfmt);
 685    if (dstfmt->Amask && !srcfmt->Amask) {
 686        if (info->a) {
 687            vector unsigned char valpha;
 688            ((unsigned char *) &valpha)[0] = info->a;
 689            vzero = (vector unsigned int) vec_splat(valpha, 0);
 690        }
 691    }
 692
 693    SDL_assert(srcfmt->BytesPerPixel == 4);
 694    SDL_assert(dstfmt->BytesPerPixel == 4);
 695
 696    while (height--) {
 697        vector unsigned char valigner;
 698        vector unsigned int vbits;
 699        vector unsigned int voverflow;
 700        Uint32 bits;
 701        Uint8 r, g, b, a;
 702
 703        int width = info->dst_w;
 704        int extrawidth;
 705
 706        /* do scalar until we can align... */
 707        while ((UNALIGNED_PTR(dst)) && (width)) {
 708            bits = *(src++);
 709            RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
 710            if(!srcfmt->Amask)
 711              a = info->a;
 712            *(dst++) = MAKE8888(dstfmt, r, g, b, a);
 713            width--;
 714        }
 715
 716        /* After all that work, here's the vector part! */
 717        extrawidth = (width % 4);
 718        width -= extrawidth;
 719        valigner = VEC_ALIGNER(src);
 720        vbits = vec_ld(0, src);
 721
 722        while (width) {
 723            voverflow = vec_ld(15, src);
 724            src += 4;
 725            width -= 4;
 726            vbits = vec_perm(vbits, voverflow, valigner);       /* src is ready. */
 727            vbits = vec_perm(vbits, vzero, vpermute);   /* swizzle it. */
 728            vec_st(vbits, 0, dst);      /* store it back out. */
 729            dst += 4;
 730            vbits = voverflow;
 731        }
 732
 733        SDL_assert(width == 0);
 734
 735        /* cover pixels at the end of the row that didn't fit in 16 bytes. */
 736        while (extrawidth) {
 737            bits = *(src++);    /* max 7 pixels, don't bother with prefetch. */
 738            RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
 739            if(!srcfmt->Amask)
 740              a = info->a;
 741            *(dst++) = MAKE8888(dstfmt, r, g, b, a);
 742            extrawidth--;
 743        }
 744
 745        src += srcskip;
 746        dst += dstskip;
 747    }
 748
 749}
 750
 751/* Altivec code to swizzle one 32-bit surface to a different 32-bit format. */
 752/* Use this on a G4 */
 753static void
 754ConvertAltivec32to32_prefetch(SDL_BlitInfo * info)
 755{
 756    const int scalar_dst_lead = sizeof(Uint32) * 4;
 757    const int vector_dst_lead = sizeof(Uint32) * 16;
 758
 759    int height = info->dst_h;
 760    Uint32 *src = (Uint32 *) info->src;
 761    int srcskip = info->src_skip / 4;
 762    Uint32 *dst = (Uint32 *) info->dst;
 763    int dstskip = info->dst_skip / 4;
 764    SDL_PixelFormat *srcfmt = info->src_fmt;
 765    SDL_PixelFormat *dstfmt = info->dst_fmt;
 766    vector unsigned int vzero = vec_splat_u32(0);
 767    vector unsigned char vpermute = calc_swizzle32(srcfmt, dstfmt);
 768    if (dstfmt->Amask && !srcfmt->Amask) {
 769        if (info->a) {
 770            vector unsigned char valpha;
 771            ((unsigned char *) &valpha)[0] = info->a;
 772            vzero = (vector unsigned int) vec_splat(valpha, 0);
 773        }
 774    }
 775
 776    SDL_assert(srcfmt->BytesPerPixel == 4);
 777    SDL_assert(dstfmt->BytesPerPixel == 4);
 778
 779    while (height--) {
 780        vector unsigned char valigner;
 781        vector unsigned int vbits;
 782        vector unsigned int voverflow;
 783        Uint32 bits;
 784        Uint8 r, g, b, a;
 785
 786        int width = info->dst_w;
 787        int extrawidth;
 788
 789        /* do scalar until we can align... */
 790        while ((UNALIGNED_PTR(dst)) && (width)) {
 791            vec_dstt(src + scalar_dst_lead, DST_CTRL(2, 32, 1024),
 792                     DST_CHAN_SRC);
 793            vec_dstst(dst + scalar_dst_lead, DST_CTRL(2, 32, 1024),
 794                      DST_CHAN_DEST);
 795            bits = *(src++);
 796            RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
 797            if(!srcfmt->Amask)
 798              a = info->a;
 799            *(dst++) = MAKE8888(dstfmt, r, g, b, a);
 800            width--;
 801        }
 802
 803        /* After all that work, here's the vector part! */
 804        extrawidth = (width % 4);
 805        width -= extrawidth;
 806        valigner = VEC_ALIGNER(src);
 807        vbits = vec_ld(0, src);
 808
 809        while (width) {
 810            vec_dstt(src + vector_dst_lead, DST_CTRL(2, 32, 1024),
 811                     DST_CHAN_SRC);
 812            vec_dstst(dst + vector_dst_lead, DST_CTRL(2, 32, 1024),
 813                      DST_CHAN_DEST);
 814            voverflow = vec_ld(15, src);
 815            src += 4;
 816            width -= 4;
 817            vbits = vec_perm(vbits, voverflow, valigner);       /* src is ready. */
 818            vbits = vec_perm(vbits, vzero, vpermute);   /* swizzle it. */
 819            vec_st(vbits, 0, dst);      /* store it back out. */
 820            dst += 4;
 821            vbits = voverflow;
 822        }
 823
 824        SDL_assert(width == 0);
 825
 826        /* cover pixels at the end of the row that didn't fit in 16 bytes. */
 827        while (extrawidth) {
 828            bits = *(src++);    /* max 7 pixels, don't bother with prefetch. */
 829            RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
 830            if(!srcfmt->Amask)
 831              a = info->a;
 832            *(dst++) = MAKE8888(dstfmt, r, g, b, a);
 833            extrawidth--;
 834        }
 835
 836        src += srcskip;
 837        dst += dstskip;
 838    }
 839
 840    vec_dss(DST_CHAN_SRC);
 841    vec_dss(DST_CHAN_DEST);
 842}
 843
 844static Uint32
 845GetBlitFeatures(void)
 846{
 847    static Uint32 features = 0xffffffff;
 848    if (features == 0xffffffff) {
 849        /* Provide an override for testing .. */
 850        char *override = SDL_getenv("SDL_ALTIVEC_BLIT_FEATURES");
 851        if (override) {
 852            features = 0;
 853            SDL_sscanf(override, "%u", &features);
 854        } else {
 855            features = (0
 856                        /* Feature 1 is has-MMX */
 857                        | ((SDL_HasMMX())? 1 : 0)
 858                        /* Feature 2 is has-AltiVec */
 859                        | ((SDL_HasAltiVec())? 2 : 0)
 860                        /* Feature 4 is dont-use-prefetch */
 861                        /* !!!! FIXME: Check for G5 or later, not the cache size! Always prefetch on a G4. */
 862                        | ((GetL3CacheSize() == 0) ? 4 : 0)
 863                );
 864        }
 865    }
 866    return features;
 867}
 868
 869#if __MWERKS__
 870#pragma altivec_model off
 871#endif
 872#else
 873/* Feature 1 is has-MMX */
 874#define GetBlitFeatures() ((Uint32)(SDL_HasMMX() ? 1 : 0))
 875#endif
 876
 877/* This is now endian dependent */
 878#if SDL_BYTEORDER == SDL_LIL_ENDIAN
 879#define HI	1
 880#define LO	0
 881#else /* SDL_BYTEORDER == SDL_BIG_ENDIAN */
 882#define HI	0
 883#define LO	1
 884#endif
 885
 886/* Special optimized blit for RGB 8-8-8 --> RGB 3-3-2 */
 887#define RGB888_RGB332(dst, src) { \
 888	dst = (Uint8)((((src)&0x00E00000)>>16)| \
 889	              (((src)&0x0000E000)>>11)| \
 890	              (((src)&0x000000C0)>>6)); \
 891}
 892static void
 893Blit_RGB888_index8(SDL_BlitInfo * info)
 894{
 895#ifndef USE_DUFFS_LOOP
 896    int c;
 897#endif
 898    int width, height;
 899    Uint32 *src;
 900    const Uint8 *map;
 901    Uint8 *dst;
 902    int srcskip, dstskip;
 903
 904    /* Set up some basic variables */
 905    width = info->dst_w;
 906    height = info->dst_h;
 907    src = (Uint32 *) info->src;
 908    srcskip = info->src_skip / 4;
 909    dst = info->dst;
 910    dstskip = info->dst_skip;
 911    map = info->table;
 912
 913    if (map == NULL) {
 914        while (height--) {
 915#ifdef USE_DUFFS_LOOP
 916			/* *INDENT-OFF* */
 917			DUFFS_LOOP(
 918				RGB888_RGB332(*dst++, *src);
 919			, width);
 920			/* *INDENT-ON* */
 921#else
 922            for (c = width / 4; c; --c) {
 923                /* Pack RGB into 8bit pixel */
 924                ++src;
 925                RGB888_RGB332(*dst++, *src);
 926                ++src;
 927                RGB888_RGB332(*dst++, *src);
 928                ++src;
 929                RGB888_RGB332(*dst++, *src);
 930                ++src;
 931            }
 932            switch (width & 3) {
 933            case 3:
 934                RGB888_RGB332(*dst++, *src);
 935                ++src;
 936            case 2:
 937                RGB888_RGB332(*dst++, *src);
 938                ++src;
 939            case 1:
 940                RGB888_RGB332(*dst++, *src);
 941                ++src;
 942            }
 943#endif /* USE_DUFFS_LOOP */
 944            src += srcskip;
 945            dst += dstskip;
 946        }
 947    } else {
 948        int Pixel;
 949
 950        while (height--) {
 951#ifdef USE_DUFFS_LOOP
 952			/* *INDENT-OFF* */
 953			DUFFS_LOOP(
 954				RGB888_RGB332(Pixel, *src);
 955				*dst++ = map[Pixel];
 956				++src;
 957			, width);
 958			/* *INDENT-ON* */
 959#else
 960            for (c = width / 4; c; --c) {
 961                /* Pack RGB into 8bit pixel */
 962                RGB888_RGB332(Pixel, *src);
 963                *dst++ = map[Pixel];
 964                ++src;
 965                RGB888_RGB332(Pixel, *src);
 966                *dst++ = map[Pixel];
 967                ++src;
 968                RGB888_RGB332(Pixel, *src);
 969                *dst++ = map[Pixel];
 970                ++src;
 971                RGB888_RGB332(Pixel, *src);
 972                *dst++ = map[Pixel];
 973                ++src;
 974            }
 975            switch (width & 3) {
 976            case 3:
 977                RGB888_RGB332(Pixel, *src);
 978                *dst++ = map[Pixel];
 979                ++src;
 980            case 2:
 981                RGB888_RGB332(Pixel, *src);
 982                *dst++ = map[Pixel];
 983                ++src;
 984            case 1:
 985                RGB888_RGB332(Pixel, *src);
 986                *dst++ = map[Pixel];
 987                ++src;
 988            }
 989#endif /* USE_DUFFS_LOOP */
 990            src += srcskip;
 991            dst += dstskip;
 992        }
 993    }
 994}
 995
 996/* Special optimized blit for RGB 8-8-8 --> RGB 5-5-5 */
 997#define RGB888_RGB555(dst, src) { \
 998	*(Uint16 *)(dst) = (Uint16)((((*src)&0x00F80000)>>9)| \
 999	                            (((*src)&0x0000F800)>>6)| \
1000	                            (((*src)&0x000000F8)>>3)); \
1001}
1002#define RGB888_RGB555_TWO(dst, src) { \
1003	*(Uint32 *)(dst) = (((((src[HI])&0x00F80000)>>9)| \
1004	                     (((src[HI])&0x0000F800)>>6)| \
1005	                     (((src[HI])&0x000000F8)>>3))<<16)| \
1006	                     (((src[LO])&0x00F80000)>>9)| \
1007	                     (((src[LO])&0x0000F800)>>6)| \
1008	                     (((src[LO])&0x000000F8)>>3); \
1009}
1010static void
1011Blit_RGB888_RGB555(SDL_BlitInfo * info)
1012{
1013#ifndef USE_DUFFS_LOOP
1014    int c;
1015#endif
1016    int width, height;
1017    Uint32 *src;
1018    Uint16 *dst;
1019    int srcskip, dstskip;
1020
1021    /* Set up some basic variables */
1022    width = info->dst_w;
1023    height = info->dst_h;
1024    src = (Uint32 *) info->src;
1025    srcskip = info->src_skip / 4;
1026    dst = (Uint16 *) info->dst;
1027    dstskip = info->dst_skip / 2;
1028
1029#ifdef USE_DUFFS_LOOP
1030    while (height--) {
1031		/* *INDENT-OFF* */
1032		DUFFS_LOOP(
1033			RGB888_RGB555(dst, src);
1034			++src;
1035			++dst;
1036		, width);
1037		/* *INDENT-ON* */
1038        src += srcskip;
1039        dst += dstskip;
1040    }
1041#else
1042    /* Memory align at 4-byte boundary, if necessary */
1043    if ((long) dst & 0x03) {
1044        /* Don't do anything if width is 0 */
1045        if (width == 0) {
1046            return;
1047        }
1048        --width;
1049
1050        while (height--) {
1051            /* Perform copy alignment */
1052            RGB888_RGB555(dst, src);
1053            ++src;
1054            ++dst;
1055
1056            /* Copy in 4 pixel chunks */
1057            for (c = width / 4; c; --c) {
1058                RGB888_RGB555_TWO(dst, src);
1059                src += 2;
1060                dst += 2;
1061                RGB888_RGB555_TWO(dst, src);
1062                src += 2;
1063                dst += 2;
1064            }
1065            /* Get any leftovers */
1066            switch (width & 3) {
1067            case 3:
1068                RGB888_RGB555(dst, src);
1069                ++src;
1070                ++dst;
1071            case 2:
1072                RGB888_RGB555_TWO(dst, src);
1073                src += 2;
1074                dst += 2;
1075                break;
1076            case 1:
1077                RGB888_RGB555(dst, src);
1078                ++src;
1079                ++dst;
1080                break;
1081            }
1082            src += srcskip;
1083            dst += dstskip;
1084        }
1085    } else {
1086        while (height--) {
1087            /* Copy in 4 pixel chunks */
1088            for (c = width / 4; c; --c) {
1089                RGB888_RGB555_TWO(dst, src);
1090                src += 2;
1091                dst += 2;
1092                RGB888_RGB555_TWO(dst, src);
1093                src += 2;
1094                dst += 2;
1095            }
1096            /* Get any leftovers */
1097            switch (width & 3) {
1098            case 3:
1099                RGB888_RGB555(dst, src);
1100                ++src;
1101                ++dst;
1102            case 2:
1103                RGB888_RGB555_TWO(dst, src);
1104                src += 2;
1105                dst += 2;
1106                break;
1107            case 1:
1108                RGB888_RGB555(dst, src);
1109                ++src;
1110                ++dst;
1111                break;
1112            }
1113            src += srcskip;
1114            dst += dstskip;
1115        }
1116    }
1117#endif /* USE_DUFFS_LOOP */
1118}
1119
1120/* Special optimized blit for RGB 8-8-8 --> RGB 5-6-5 */
1121#define RGB888_RGB565(dst, src) { \
1122	*(Uint16 *)(dst) = (Uint16)((((*src)&0x00F80000)>>8)| \
1123	                            (((*src)&0x0000FC00)>>5)| \
1124	                            (((*src)&0x000000F8)>>3)); \
1125}
1126#define RGB888_RGB565_TWO(dst, src) { \
1127	*(Uint32 *)(dst) = (((((src[HI])&0x00F80000)>>8)| \
1128	                     (((src[HI])&0x0000FC00)>>5)| \
1129	                     (((src[HI])&0x000000F8)>>3))<<16)| \
1130	                     (((src[LO])&0x00F80000)>>8)| \
1131	                     (((src[LO])&0x0000FC00)>>5)| \
1132	                     (((src[LO])&0x000000F8)>>3); \
1133}
1134static void
1135Blit_RGB888_RGB565(SDL_BlitInfo * info)
1136{
1137#ifndef USE_DUFFS_LOOP
1138    int c;
1139#endif
1140    int width, height;
1141    Uint32 *src;
1142    Uint16 *dst;
1143    int srcskip, dstskip;
1144
1145    /* Set up some basic variables */
1146    width = info->dst_w;
1147    height = info->dst_h;
1148    src = (Uint32 *) info->src;
1149    srcskip = info->src_skip / 4;
1150    dst = (Uint16 *) info->dst;
1151    dstskip = info->dst_skip / 2;
1152
1153#ifdef USE_DUFFS_LOOP
1154    while (height--) {
1155		/* *INDENT-OFF* */
1156		DUFFS_LOOP(
1157			RGB888_RGB565(dst, src);
1158			++src;
1159			++dst;
1160		, width);
1161		/* *INDENT-ON* */
1162        src += srcskip;
1163        dst += dstskip;
1164    }
1165#else
1166    /* Memory align at 4-byte boundary, if necessary */
1167    if ((long) dst & 0x03) {
1168        /* Don't do anything if width is 0 */
1169        if (width == 0) {
1170            return;
1171        }
1172        --width;
1173
1174        while (height--) {
1175            /* Perform copy alignment */
1176            RGB888_RGB565(dst, src);
1177            ++src;
1178            ++dst;
1179
1180            /* Copy in 4 pixel chunks */
1181            for (c = width / 4; c; --c) {
1182                RGB888_RGB565_TWO(dst, src);
1183                src += 2;
1184                dst += 2;
1185                RGB888_RGB565_TWO(dst, src);
1186                src += 2;
1187                dst += 2;
1188            }
1189            /* Get any leftovers */
1190            switch (width & 3) {
1191            case 3:
1192                RGB888_RGB565(dst, src);
1193                ++src;
1194                ++dst;
1195            case 2:
1196                RGB888_RGB565_TWO(dst, src);
1197                src += 2;
1198                dst += 2;
1199                break;
1200            case 1:
1201                RGB888_RGB565(dst, src);
1202                ++src;
1203                ++dst;
1204                break;
1205            }
1206            src += srcskip;
1207            dst += dstskip;
1208        }
1209    } else {
1210        while (height--) {
1211            /* Copy in 4 pixel chunks */
1212            for (c = width / 4; c; --c) {
1213                RGB888_RGB565_TWO(dst, src);
1214                src += 2;
1215                dst += 2;
1216                RGB888_RGB565_TWO(dst, src);
1217                src += 2;
1218                dst += 2;
1219            }
1220            /* Get any leftovers */
1221            switch (width & 3) {
1222            case 3:
1223                RGB888_RGB565(dst, src);
1224                ++src;
1225                ++dst;
1226            case 2:
1227                RGB888_RGB565_TWO(dst, src);
1228                src += 2;
1229                dst += 2;
1230                break;
1231            case 1:
1232                RGB888_RGB565(dst, src);
1233                ++src;
1234                ++dst;
1235                break;
1236            }
1237            src += srcskip;
1238            dst += dstskip;
1239        }
1240    }
1241#endif /* USE_DUFFS_LOOP */
1242}
1243
1244
1245/* Special optimized blit for RGB 5-6-5 --> 32-bit RGB surfaces */
1246#define RGB565_32(dst, src, map) (map[src[LO]*2] + map[src[HI]*2+1])
1247static void
1248Blit_RGB565_32(SDL_BlitInfo * info, const Uint32 * map)
1249{
1250#ifndef USE_DUFFS_LOOP
1251    int c;
1252#endif
1253    int width, height;
1254    Uint8 *src;
1255    Uint32 *dst;
1256    int srcskip, dstskip;
1257
1258    /* Set up some basic variables */
1259    width = info->dst_w;
1260    height = info->dst_h;
1261    src = (Uint8 *) info->src;
1262    srcskip = info->src_skip;
1263    dst = (Uint32 *) info->dst;
1264    dstskip = info->dst_skip / 4;
1265
1266#ifdef USE_DUFFS_LOOP
1267    while (height--) {
1268		/* *INDENT-OFF* */
1269		DUFFS_LOOP(
1270		{
1271			*dst++ = RGB565_32(dst, src, map);
1272			src += 2;
1273		},
1274		width);
1275		/* *INDENT-ON* */
1276        src += srcskip;
1277        dst += dstskip;
1278    }
1279#else
1280    while (height--) {
1281        /* Copy in 4 pixel chunks */
1282        for (c = width / 4; c; --c) {
1283            *dst++ = RGB565_32(dst, src, map);
1284            src += 2;
1285            *dst++ = RGB565_32(dst, src, map);
1286            src += 2;
1287            *dst++ = RGB565_32(dst, src, map);
1288            src += 2;
1289            *dst++ = RGB565_32(dst, src, map);
1290            src += 2;
1291        }
1292        /* Get any leftovers */
1293        switch (width & 3) {
1294        case 3:
1295            *dst++ = RGB565_32(dst, src, map);
1296            src += 2;
1297        case 2:
1298            *dst++ = RGB565_32(dst, src, map);
1299            src += 2;
1300        case 1:
1301            *dst++ = RGB565_32(dst, src, map);
1302            src += 2;
1303            break;
1304        }
1305        src += srcskip;
1306        dst += dstskip;
1307    }
1308#endif /* USE_DUFFS_LOOP */
1309}
1310
1311/* Special optimized blit for RGB 5-6-5 --> ARGB 8-8-8-8 */
1312static const Uint32 RGB565_ARGB8888_LUT[512] = {
1313    0x00000000, 0xff000000, 0x00000008, 0xff002000,
1314    0x00000010, 0xff004000, 0x00000018, 0xff006100,
1315    0x00000020, 0xff008100, 0x00000029, 0xff00a100,
1316    0x00000031, 0xff00c200, 0x00000039, 0xff00e200,
1317    0x00000041, 0xff080000, 0x0000004a, 0xff082000,
1318    0x00000052, 0xff084000, 0x0000005a, 0xff086100,
1319    0x00000062, 0xff088100, 0x0000006a, 0xff08a100,
1320    0x00000073, 0xff08c200, 0x0000007b, 0xff08e200,
1321    0x00000083, 0xff100000, 0x0000008b, 0xff102000,
1322    0x00000094, 0xff104000, 0x0000009c, 0xff106100,
1323    0x000000a4, 0xff108100, 0x000000ac, 0xff10a100,
1324    0x000000b4, 0xff10c200, 0x000000bd, 0xff10e200,
1325    0x000000c5, 0xff180000, 0x000000cd, 0xff182000,
1326    0x000000d5, 0xff184000, 0x000000de, 0xff186100,
1327    0x000000e6, 0xff188100, 0x000000ee, 0xff18a100,
1328    0x000000f6, 0xff18c200, 0x000000ff, 0xff18e200,
1329    0x00000400, 0xff200000, 0x00000408, 0xff202000,
1330    0x00000410, 0xff204000, 0x00000418, 0xff206100,
1331    0x00000420, 0xff208100, 0x00000429, 0xff20a100,
1332    0x00000431, 0xff20c200, 0x00000439, 0xff20e200,
1333    0x00000441, 0xff290000, 0x0000044a, 0xff292000,
1334    0x00000452, 0xff294000, 0x0000045a, 0xff296100,
1335    0x00000462, 0xff298100, 0x0000046a, 0xff29a100,
1336    0x00000473, 0xff29c200, 0x0000047b, 0xff29e200,
1337    0x00000483, 0xff310000, 0x0000048b, 0xff312000,
1338    0x00000494, 0xff314000, 0x0000049c, 0xff316100,
1339    0x000004a4, 0xff318100, 0x000004ac, 0xff31a100,
1340    0x000004b4, 0xff31c200, 0x000004bd, 0xff31e200,
1341    0x000004c5, 0xff390000, 0x000004cd, 0xff392000,
1342    0x000004d5, 0xff394000, 0x000004de, 0xff396100,
1343    0x000004e6, 0xff398100, 0x000004ee, 0xff39a100,
1344    0x000004f6, 0xff39c200, 0x000004ff, 0xff39e200,
1345    0x00000800, 0xff410000, 0x00000808, 0xff412000,
1346    0x00000810, 0xff414000, 0x00000818, 0xff416100,
1347    0x00000820, 0xff418100, 0x00000829, 0xff41a100,
1348    0x00000831, 0xff41c200, 0x00000839, 0xff41e200,
1349    0x00000841, 0xff4a0000, 0x0000084a, 0xff4a2000,
1350    0x00000852, 0xff4a4000, 0x0000085a, 0xff4a6100,
1351    0x00000862, 0xff4a8100, 0x0000086a, 0xff4aa100,
1352    0x00000873, 0xff4ac200, 0x0000087b, 0xff4ae200,
1353    0x00000883, 0xff520000, 0x0000088b, 0xff522000,
1354    0x00000894, 0xff524000, 0x0000089c, 0xff526100,
1355    0x000008a4, 0xff528100, 0x000008ac, 0xff52a100,
1356    0x000008b4, 0xff52c200, 0x000008bd, 0xff52e200,
1357    0x000008c5, 0xff5a0000, 0x000008cd, 0xff5a2000,
1358    0x000008d5, 0xff5a4000, 0x000008de, 0xff5a6100,
1359    0x000008e6, 0xff5a8100, 0x000008ee, 0xff5aa100,
1360    0x000008f6, 0xff5ac200, 0x000008ff, 0xff5ae200,
1361    0x00000c00, 0xff620000, 0x00000c08, 0xff622000,
1362    0x00000c10, 0xff624000, 0x00000c18, 0xff626100,
1363    0x00000c20, 0xff628100, 0x00000c29, 0xff62a100,
1364    0x00000c31, 0xff62c200, 0x00000c39, 0xff62e200,
1365    0x00000c41, 0xff6a0000, 0x00000c4a, 0xff6a2000,
1366    0x00000c52, 0xff6a4000, 0x00000c5a, 0xff6a6100,
1367    0x00000c62, 0xff6a8100, 0x00000c6a, 0xff6aa100,
1368    0x00000c73, 0xff6ac200, 0x00000c7b, 0xff6ae200,
1369    0x00000c83, 0xff730000, 0x00000c8b, 0xff732000,
1370    0x00000c94, 0xff734000, 0x00000c9c, 0xff736100,
1371    0x00000ca4, 0xff738100, 0x00000cac, 0xff73a100,
1372    0x00000cb4, 0xff73c200, 0x00000cbd, 0xff73e200,
1373    0x00000cc5, 0xff7b0000, 0x00000ccd, 0xff7b2000,
1374    0x00000cd5, 0xff7b4000, 0x00000cde, 0xff7b6100,
1375    0x00000ce6, 0xff7b8100, 0x00000cee, 0xff7ba100,
1376    0x00000cf6, 0xff7bc200, 0x00000cff, 0xff7be200,
1377    0x00001000, 0xff830000, 0x00001008, 0xff832000,
1378    0x00001010, 0xff834000, 0x00001018, 0xff836100,
1379    0x00001020, 0xff838100, 0x00001029, 0xff83a100,
1380    0x00001031, 0xff83c200, 0x00001039, 0xff83e200,
1381    0x00001041, 0xff8b0000, 0x0000104a, 0xff8b2000,
1382    0x00001052, 0xff8b4000, 0x0000105a, 0xff8b6100,
1383    0x00001062, 0xff8b8100, 0x0000106a, 0xff8ba100,
1384    0x00001073, 0xff8bc200, 0x0000107b, 0xff8be200,
1385    0x00001083, 0xff940000, 0x0000108b, 0xff942000,
1386    0x00001094, 0xff944000, 0x0000109c, 0xff946100,
1387    0x000010a4, 0xff948100, 0x000010ac, 0xff94a100,
1388    0x000010b4, 0xff94c200, 0x000010bd, 0xff94e200,
1389    0x000010c5, 0xff9c0000, 0x000010cd, 0xff9c2000,
1390    0x000010d5, 0xff9c4000, 0x000010de, 0xff9c6100,
1391    0x000010e6, 0xff9c8100, 0x000010ee, 0xff9ca100,
1392    0x000010f6, 0xff9cc200, 0x000010ff, 0xff9ce200,
1393    0x00001400, 0xffa40000, 0x00001408, 0xffa42000,
1394    0x00001410, 0xffa44000, 0x00001418, 0xffa46100,
1395    0x00001420, 0xffa48100, 0x00001429, 0xffa4a100,
1396    0x00001431, 0xffa4c200, 0x00001439, 0xffa4e200,
1397    0x00001441, 0xffac0000, 0x0000144a, 0xffac2000,
1398    0x00001452, 0xffac4000, 0x0000145a, 0xffac6100,
1399    0x00001462, 0xffac8100, 0x0000146a, 0xffaca100,
1400    0x00001473, 0xffacc200, 0x0000147b, 0xfface200,
1401    0x00001483, 0xffb40000, 0x0000148b, 0xffb42000,
1402    0x00001494, 0xffb44000, 0x0000149c, 0xffb46100,
1403    0x000014a4, 0xffb48100, 0x000014ac, 0xffb4a100,
1404    0x000014b4, 0xffb4c200, 0x000014bd, 0xffb4e200,
1405    0x000014c5, 0xffbd0000, 0x000014cd, 0xffbd2000,
1406    0x000014d5, 0xffbd4000, 0x000014de, 0xffbd6100,
1407    0x000014e6, 0xffbd8100, 0x000014ee, 0xffbda100,
1408    0x000014f6, 0xffbdc200, 0x000014ff, 0xffbde200,
1409    0x00001800, 0xffc50000, 0x00001808, 0xffc52000,
1410    0x00001810, 0xffc54000, 0x00001818, 0xffc56100,
1411    0x00001820, 0xffc58100, 0x00001829, 0xffc5a100,
1412    0x00001831, 0xffc5c200, 0x00001839, 0xffc5e200,
1413    0x00001841, 0xffcd0000, 0x0000184a, 0xffcd2000,
1414    0x00001852, 0xffcd4000, 0x0000185a, 0xffcd6100,
1415    0x00001862, 0xffcd8100, 0x0000186a, 0xffcda100,
1416    0x00001873, 0xffcdc200, 0x0000187b, 0xffcde200,
1417    0x00001883, 0xffd50000, 0x0000188b, 0xffd52000,
1418    0x00001894, 0xffd54000, 0x0000189c, 0xffd56100,
1419    0x000018a4, 0xffd58100, 0x000018ac, 0xffd5a100,
1420    0x000018b4, 0xffd5c200, 0x000018bd, 0xffd5e200,
1421    0x000018c5, 0xffde0000, 0x000018cd, 0xffde2000,
1422    0x000018d5, 0xffde4000, 0x000018de, 0xffde6100,
1423    0x000018e6, 0xffde8100, 0x000018ee, 0xffdea100,
1424    0x000018f6, 0xffdec200, 0x000018ff, 0xffdee200,
1425    0x00001c00, 0xffe60000, 0x00001c08, 0xffe62000,
1426    0x00001c10, 0xffe64000, 0x00001c18, 0xffe66100,
1427    0x00001c20, 0xffe68100, 0x00001c29, 0xffe6a100,
1428    0x00001c31, 0xffe6c200, 0x00001c39, 0xffe6e200,
1429    0x00001c41, 0xffee0000, 0x00001c4a, 0xffee2000,
1430    0x00001c52, 0xffee4000, 0x00001c5a, 0xffee6100,
1431    0x00001c62, 0xffee8100, 0x00001c6a, 0xffeea100,
1432    0x00001c73, 0xffeec200, 0x00001c7b, 0xffeee200,
1433    0x00001c83, 0xfff60000, 0x00001c8b, 0xfff62000,
1434    0x00001c94, 0xfff64000, 0x00001c9c, 0xfff66100,
1435    0x00001ca4, 0xfff68100, 0x00001cac, 0xfff6a100,
1436    0x00001cb4, 0xfff6c200, 0x00001cbd, 0xfff6e200,
1437    0x00001cc5, 0xffff0000, 0x00001ccd, 0xffff2000,
1438    0x00001cd5, 0xffff4000, 0x00001cde, 0xffff6100,
1439    0x00001ce6, 0xffff8100, 0x00001cee, 0xffffa100,
1440    0x00001cf6, 0xffffc200, 0x00001cff, 0xffffe200
1441};
1442
1443static void
1444Blit_RGB565_ARGB8888(SDL_BlitInfo * info)
1445{
1446    Blit_RGB565_32(info, RGB565_ARGB8888_LUT);
1447}
1448
1449/* Special optimized blit for RGB 5-6-5 --> ABGR 8-8-8-8 */
1450static const Uint32 RGB565_ABGR8888_LUT[512] = {
1451    0xff000000, 0x00000000, 0xff080000, 0x00002000,
1452    0xff100000, 0x00004000, 0xff180000, 0x00006100,
1453    0xff200000, 0x00008100, 0xff290000, 0x0000a100,
1454    0xff310000, 0x0000c200, 0xff390000, 0x0000e200,
1455    0xff410000, 0x00000008, 0xff4a0000, 0x00002008,
1456    0xff520000, 0x00004008, 0xff5a0000, 0x00006108,
1457    0xff620000, 0x00008108, 0xff6a0000, 0x0000a108,
1458    0xff730000, 0x0000c208, 0xff7b0000, 0x0000e208,
1459    0xff830000, 0x00000010, 0xff8b0000, 0x00002010,
1460    0xff940000, 0x00004010, 0xff9c0000, 0x00006110,
1461    0xffa40000, 0x00008110, 0xffac0000, 0x0000a110,
1462    0xffb40000, 0x0000c210, 0xffbd0000, 0x0000e210,
1463    0xffc50000, 0x00000018, 0xffcd0000, 0x00002018,
1464    0xffd50000, 0x00004018, 0xffde0000, 0x00006118,
1465    0xffe60000, 0x00008118, 0xffee0000, 0x0000a118,
1466    0xfff60000, 0x0000c218, 0xffff0000, 0x0000e

Large files files are truncated, but you can click here to view the full file