sdl

FORK: Simple Directmedia Layer
git clone https://git.neptards.moe/neptards/sdl.git
Log | Files | Refs

SDL_blit_N.c (119422B)


      1 /*
      2   Simple DirectMedia Layer
      3   Copyright (C) 1997-2020 Sam Lantinga <slouken@libsdl.org>
      4 
      5   This software is provided 'as-is', without any express or implied
      6   warranty.  In no event will the authors be held liable for any damages
      7   arising from the use of this software.
      8 
      9   Permission is granted to anyone to use this software for any purpose,
     10   including commercial applications, and to alter it and redistribute it
     11   freely, subject to the following restrictions:
     12 
     13   1. The origin of this software must not be misrepresented; you must not
     14      claim that you wrote the original software. If you use this software
     15      in a product, an acknowledgment in the product documentation would be
     16      appreciated but is not required.
     17   2. Altered source versions must be plainly marked as such, and must not be
     18      misrepresented as being the original software.
     19   3. This notice may not be removed or altered from any source distribution.
     20 */
     21 #include "../SDL_internal.h"
     22 
     23 #if SDL_HAVE_BLIT_N
     24 
     25 #include "SDL_video.h"
     26 #include "SDL_endian.h"
     27 #include "SDL_cpuinfo.h"
     28 #include "SDL_blit.h"
     29 
     30 
     31 /* General optimized routines that write char by char */
     32 #define HAVE_FAST_WRITE_INT8 1
     33 
     34 /* On some CPU, it's slower than combining and write a word */
     35 #if defined(__MIPS__) 
     36 #  undef  HAVE_FAST_WRITE_INT8
     37 #  define HAVE_FAST_WRITE_INT8 0
     38 #endif
     39 
     40 /* Functions to blit from N-bit surfaces to other surfaces */
     41 
     42 enum blit_features {
     43 	BLIT_FEATURE_NONE = 0,
     44 	BLIT_FEATURE_HAS_MMX = 1,
     45 	BLIT_FEATURE_HAS_ALTIVEC = 2,
     46 	BLIT_FEATURE_ALTIVEC_DONT_USE_PREFETCH = 4,
     47 	BLIT_FEATURE_HAS_ARM_SIMD = 8
     48 };
     49 
     50 #if SDL_ALTIVEC_BLITTERS
     51 #ifdef HAVE_ALTIVEC_H
     52 #include <altivec.h>
     53 #endif
     54 #ifdef __MACOSX__
     55 #include <sys/sysctl.h>
     56 static size_t
     57 GetL3CacheSize(void)
     58 {
     59     const char key[] = "hw.l3cachesize";
     60     u_int64_t result = 0;
     61     size_t typeSize = sizeof(result);
     62 
     63 
     64     int err = sysctlbyname(key, &result, &typeSize, NULL, 0);
     65     if (0 != err)
     66         return 0;
     67 
     68     return result;
     69 }
     70 #else
     71 static size_t
     72 GetL3CacheSize(void)
     73 {
     74     /* XXX: Just guess G4 */
     75     return 2097152;
     76 }
     77 #endif /* __MACOSX__ */
     78 
     79 #if (defined(__MACOSX__) && (__GNUC__ < 4))
     80 #define VECUINT8_LITERAL(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) \
     81         (vector unsigned char) ( a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p )
     82 #define VECUINT16_LITERAL(a,b,c,d,e,f,g,h) \
     83         (vector unsigned short) ( a,b,c,d,e,f,g,h )
     84 #else
     85 #define VECUINT8_LITERAL(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) \
     86         (vector unsigned char) { a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p }
     87 #define VECUINT16_LITERAL(a,b,c,d,e,f,g,h) \
     88         (vector unsigned short) { a,b,c,d,e,f,g,h }
     89 #endif
     90 
     91 #define UNALIGNED_PTR(x) (((size_t) x) & 0x0000000F)
     92 #define VSWIZZLE32(a,b,c,d) (vector unsigned char) \
     93                                ( 0x00+a, 0x00+b, 0x00+c, 0x00+d, \
     94                                  0x04+a, 0x04+b, 0x04+c, 0x04+d, \
     95                                  0x08+a, 0x08+b, 0x08+c, 0x08+d, \
     96                                  0x0C+a, 0x0C+b, 0x0C+c, 0x0C+d )
     97 
     98 #define MAKE8888(dstfmt, r, g, b, a)  \
     99     ( ((r<<dstfmt->Rshift)&dstfmt->Rmask) | \
    100       ((g<<dstfmt->Gshift)&dstfmt->Gmask) | \
    101       ((b<<dstfmt->Bshift)&dstfmt->Bmask) | \
    102       ((a<<dstfmt->Ashift)&dstfmt->Amask) )
    103 
    104 /*
    105  * Data Stream Touch...Altivec cache prefetching.
    106  *
    107  *  Don't use this on a G5...however, the speed boost is very significant
    108  *   on a G4.
    109  */
    110 #define DST_CHAN_SRC 1
    111 #define DST_CHAN_DEST 2
    112 
    113 /* macro to set DST control word value... */
    114 #define DST_CTRL(size, count, stride) \
    115     (((size) << 24) | ((count) << 16) | (stride))
    116 
    117 #define VEC_ALIGNER(src) ((UNALIGNED_PTR(src)) \
    118     ? vec_lvsl(0, src) \
    119     : vec_add(vec_lvsl(8, src), vec_splat_u8(8)))
    120 
    121 /* Calculate the permute vector used for 32->32 swizzling */
    122 static vector unsigned char
    123 calc_swizzle32(const SDL_PixelFormat * srcfmt, const SDL_PixelFormat * dstfmt)
    124 {
    125     /*
    126      * We have to assume that the bits that aren't used by other
    127      *  colors is alpha, and it's one complete byte, since some formats
    128      *  leave alpha with a zero mask, but we should still swizzle the bits.
    129      */
    130     /* ARGB */
    131     const static const struct SDL_PixelFormat default_pixel_format = {
    132         0, NULL, 0, 0,
    133         {0, 0},
    134         0x00FF0000, 0x0000FF00, 0x000000FF, 0xFF000000,
    135         0, 0, 0, 0,
    136         16, 8, 0, 24,
    137         0, NULL
    138     };
    139     const vector unsigned char plus = VECUINT8_LITERAL(0x00, 0x00, 0x00, 0x00,
    140                                                        0x04, 0x04, 0x04, 0x04,
    141                                                        0x08, 0x08, 0x08, 0x08,
    142                                                        0x0C, 0x0C, 0x0C,
    143                                                        0x0C);
    144     vector unsigned char vswiz;
    145     vector unsigned int srcvec;
    146     Uint32 rmask, gmask, bmask, amask;
    147 
    148     if (!srcfmt) {
    149         srcfmt = &default_pixel_format;
    150     }
    151     if (!dstfmt) {
    152         dstfmt = &default_pixel_format;
    153     }
    154 
    155 #define RESHIFT(X) (3 - ((X) >> 3))
    156     rmask = RESHIFT(srcfmt->Rshift) << (dstfmt->Rshift);
    157     gmask = RESHIFT(srcfmt->Gshift) << (dstfmt->Gshift);
    158     bmask = RESHIFT(srcfmt->Bshift) << (dstfmt->Bshift);
    159 
    160     /* Use zero for alpha if either surface doesn't have alpha */
    161     if (dstfmt->Amask) {
    162         amask =
    163             ((srcfmt->Amask) ? RESHIFT(srcfmt->
    164                                        Ashift) : 0x10) << (dstfmt->Ashift);
    165     } else {
    166         amask =
    167             0x10101010 & ((dstfmt->Rmask | dstfmt->Gmask | dstfmt->Bmask) ^
    168                           0xFFFFFFFF);
    169     }
    170 #undef RESHIFT
    171 
    172     ((unsigned int *) (char *) &srcvec)[0] = (rmask | gmask | bmask | amask);
    173     vswiz = vec_add(plus, (vector unsigned char) vec_splat(srcvec, 0));
    174     return (vswiz);
    175 }
    176 
    177 #if defined(__powerpc__) && (SDL_BYTEORDER == SDL_LIL_ENDIAN)
    178 /* reorder bytes for PowerPC little endian */
    179 static vector unsigned char reorder_ppc64le_vec(vector unsigned char vpermute)
    180 {
    181     /* The result vector of calc_swizzle32 reorder bytes using vec_perm.
    182        The LE transformation for vec_perm has an implicit assumption
    183        that the permutation is being used to reorder vector elements,
    184        not to reorder bytes within those elements.  
    185        Unfortunatly the result order is not the expected one for powerpc
    186        little endian when the two first vector parameters of vec_perm are
    187        not of type 'vector char'. This is because the numbering from the
    188        left for BE, and numbering from the right for LE, produces a
    189        different interpretation of what the odd and even lanes are.
    190        Refer to fedora bug 1392465
    191      */
    192 
    193     const vector unsigned char ppc64le_reorder = VECUINT8_LITERAL(
    194                                       0x01, 0x00, 0x03, 0x02,
    195                                       0x05, 0x04, 0x07, 0x06,
    196                                       0x09, 0x08, 0x0B, 0x0A,
    197                                       0x0D, 0x0C, 0x0F, 0x0E );
    198 
    199     vector unsigned char vswiz_ppc64le;
    200     vswiz_ppc64le = vec_perm(vpermute, vpermute, ppc64le_reorder);
    201     return(vswiz_ppc64le);
    202 }
    203 #endif
    204 
    205 static void Blit_RGB888_RGB565(SDL_BlitInfo * info);
    206 static void
    207 Blit_RGB888_RGB565Altivec(SDL_BlitInfo * info)
    208 {
    209     int height = info->dst_h;
    210     Uint8 *src = (Uint8 *) info->src;
    211     int srcskip = info->src_skip;
    212     Uint8 *dst = (Uint8 *) info->dst;
    213     int dstskip = info->dst_skip;
    214     SDL_PixelFormat *srcfmt = info->src_fmt;
    215     vector unsigned char valpha = vec_splat_u8(0);
    216     vector unsigned char vpermute = calc_swizzle32(srcfmt, NULL);
    217     vector unsigned char vgmerge = VECUINT8_LITERAL(0x00, 0x02, 0x00, 0x06,
    218                                                     0x00, 0x0a, 0x00, 0x0e,
    219                                                     0x00, 0x12, 0x00, 0x16,
    220                                                     0x00, 0x1a, 0x00, 0x1e);
    221     vector unsigned short v1 = vec_splat_u16(1);
    222     vector unsigned short v3 = vec_splat_u16(3);
    223     vector unsigned short v3f =
    224         VECUINT16_LITERAL(0x003f, 0x003f, 0x003f, 0x003f,
    225                           0x003f, 0x003f, 0x003f, 0x003f);
    226     vector unsigned short vfc =
    227         VECUINT16_LITERAL(0x00fc, 0x00fc, 0x00fc, 0x00fc,
    228                           0x00fc, 0x00fc, 0x00fc, 0x00fc);
    229     vector unsigned short vf800 = (vector unsigned short) vec_splat_u8(-7);
    230     vf800 = vec_sl(vf800, vec_splat_u16(8));
    231 
    232     while (height--) {
    233         vector unsigned char valigner;
    234         vector unsigned char voverflow;
    235         vector unsigned char vsrc;
    236 
    237         int width = info->dst_w;
    238         int extrawidth;
    239 
    240         /* do scalar until we can align... */
    241 #define ONE_PIXEL_BLEND(condition, widthvar) \
    242         while (condition) { \
    243             Uint32 Pixel; \
    244             unsigned sR, sG, sB, sA; \
    245             DISEMBLE_RGBA((Uint8 *)src, 4, srcfmt, Pixel, \
    246                           sR, sG, sB, sA); \
    247             *(Uint16 *)(dst) = (((sR << 8) & 0x0000F800) | \
    248                                 ((sG << 3) & 0x000007E0) | \
    249                                 ((sB >> 3) & 0x0000001F)); \
    250             dst += 2; \
    251             src += 4; \
    252             widthvar--; \
    253         }
    254 
    255         ONE_PIXEL_BLEND(((UNALIGNED_PTR(dst)) && (width)), width);
    256 
    257         /* After all that work, here's the vector part! */
    258         extrawidth = (width % 8);       /* trailing unaligned stores */
    259         width -= extrawidth;
    260         vsrc = vec_ld(0, src);
    261         valigner = VEC_ALIGNER(src);
    262 
    263         while (width) {
    264             vector unsigned short vpixel, vrpixel, vgpixel, vbpixel;
    265             vector unsigned int vsrc1, vsrc2;
    266             vector unsigned char vdst;
    267 
    268             voverflow = vec_ld(15, src);
    269             vsrc = vec_perm(vsrc, voverflow, valigner);
    270             vsrc1 = (vector unsigned int) vec_perm(vsrc, valpha, vpermute);
    271             src += 16;
    272             vsrc = voverflow;
    273             voverflow = vec_ld(15, src);
    274             vsrc = vec_perm(vsrc, voverflow, valigner);
    275             vsrc2 = (vector unsigned int) vec_perm(vsrc, valpha, vpermute);
    276             /* 1555 */
    277             vpixel = (vector unsigned short) vec_packpx(vsrc1, vsrc2);
    278             vgpixel = (vector unsigned short) vec_perm(vsrc1, vsrc2, vgmerge);
    279             vgpixel = vec_and(vgpixel, vfc);
    280             vgpixel = vec_sl(vgpixel, v3);
    281             vrpixel = vec_sl(vpixel, v1);
    282             vrpixel = vec_and(vrpixel, vf800);
    283             vbpixel = vec_and(vpixel, v3f);
    284             vdst =
    285                 vec_or((vector unsigned char) vrpixel,
    286                        (vector unsigned char) vgpixel);
    287             /* 565 */
    288             vdst = vec_or(vdst, (vector unsigned char) vbpixel);
    289             vec_st(vdst, 0, dst);
    290 
    291             width -= 8;
    292             src += 16;
    293             dst += 16;
    294             vsrc = voverflow;
    295         }
    296 
    297         SDL_assert(width == 0);
    298 
    299         /* do scalar until we can align... */
    300         ONE_PIXEL_BLEND((extrawidth), extrawidth);
    301 #undef ONE_PIXEL_BLEND
    302 
    303         src += srcskip;         /* move to next row, accounting for pitch. */
    304         dst += dstskip;
    305     }
    306 
    307 
    308 }
    309 
    310 static void
    311 Blit_RGB565_32Altivec(SDL_BlitInfo * info)
    312 {
    313     int height = info->dst_h;
    314     Uint8 *src = (Uint8 *) info->src;
    315     int srcskip = info->src_skip;
    316     Uint8 *dst = (Uint8 *) info->dst;
    317     int dstskip = info->dst_skip;
    318     SDL_PixelFormat *srcfmt = info->src_fmt;
    319     SDL_PixelFormat *dstfmt = info->dst_fmt;
    320     unsigned alpha;
    321     vector unsigned char valpha;
    322     vector unsigned char vpermute;
    323     vector unsigned short vf800;
    324     vector unsigned int v8 = vec_splat_u32(8);
    325     vector unsigned int v16 = vec_add(v8, v8);
    326     vector unsigned short v2 = vec_splat_u16(2);
    327     vector unsigned short v3 = vec_splat_u16(3);
    328     /*
    329        0x10 - 0x1f is the alpha
    330        0x00 - 0x0e evens are the red
    331        0x01 - 0x0f odds are zero
    332      */
    333     vector unsigned char vredalpha1 = VECUINT8_LITERAL(0x10, 0x00, 0x01, 0x01,
    334                                                        0x10, 0x02, 0x01, 0x01,
    335                                                        0x10, 0x04, 0x01, 0x01,
    336                                                        0x10, 0x06, 0x01,
    337                                                        0x01);
    338     vector unsigned char vredalpha2 =
    339         (vector unsigned
    340          char) (vec_add((vector unsigned int) vredalpha1, vec_sl(v8, v16))
    341         );
    342     /*
    343        0x00 - 0x0f is ARxx ARxx ARxx ARxx
    344        0x11 - 0x0f odds are blue
    345      */
    346     vector unsigned char vblue1 = VECUINT8_LITERAL(0x00, 0x01, 0x02, 0x11,
    347                                                    0x04, 0x05, 0x06, 0x13,
    348                                                    0x08, 0x09, 0x0a, 0x15,
    349                                                    0x0c, 0x0d, 0x0e, 0x17);
    350     vector unsigned char vblue2 =
    351         (vector unsigned char) (vec_add((vector unsigned int) vblue1, v8)
    352         );
    353     /*
    354        0x00 - 0x0f is ARxB ARxB ARxB ARxB
    355        0x10 - 0x0e evens are green
    356      */
    357     vector unsigned char vgreen1 = VECUINT8_LITERAL(0x00, 0x01, 0x10, 0x03,
    358                                                     0x04, 0x05, 0x12, 0x07,
    359                                                     0x08, 0x09, 0x14, 0x0b,
    360                                                     0x0c, 0x0d, 0x16, 0x0f);
    361     vector unsigned char vgreen2 =
    362         (vector unsigned
    363          char) (vec_add((vector unsigned int) vgreen1, vec_sl(v8, v8))
    364         );
    365 
    366     SDL_assert(srcfmt->BytesPerPixel == 2);
    367     SDL_assert(dstfmt->BytesPerPixel == 4);
    368 
    369     vf800 = (vector unsigned short) vec_splat_u8(-7);
    370     vf800 = vec_sl(vf800, vec_splat_u16(8));
    371 
    372     if (dstfmt->Amask && info->a) {
    373         ((unsigned char *) &valpha)[0] = alpha = info->a;
    374         valpha = vec_splat(valpha, 0);
    375     } else {
    376         alpha = 0;
    377         valpha = vec_splat_u8(0);
    378     }
    379 
    380     vpermute = calc_swizzle32(NULL, dstfmt);
    381     while (height--) {
    382         vector unsigned char valigner;
    383         vector unsigned char voverflow;
    384         vector unsigned char vsrc;
    385 
    386         int width = info->dst_w;
    387         int extrawidth;
    388 
    389         /* do scalar until we can align... */
    390 #define ONE_PIXEL_BLEND(condition, widthvar) \
    391         while (condition) { \
    392             unsigned sR, sG, sB; \
    393             unsigned short Pixel = *((unsigned short *)src); \
    394             sR = (Pixel >> 8) & 0xf8; \
    395             sG = (Pixel >> 3) & 0xfc; \
    396             sB = (Pixel << 3) & 0xf8; \
    397             ASSEMBLE_RGBA(dst, 4, dstfmt, sR, sG, sB, alpha); \
    398             src += 2; \
    399             dst += 4; \
    400             widthvar--; \
    401         }
    402         ONE_PIXEL_BLEND(((UNALIGNED_PTR(dst)) && (width)), width);
    403 
    404         /* After all that work, here's the vector part! */
    405         extrawidth = (width % 8);       /* trailing unaligned stores */
    406         width -= extrawidth;
    407         vsrc = vec_ld(0, src);
    408         valigner = VEC_ALIGNER(src);
    409 
    410         while (width) {
    411             vector unsigned short vR, vG, vB;
    412             vector unsigned char vdst1, vdst2;
    413 
    414             voverflow = vec_ld(15, src);
    415             vsrc = vec_perm(vsrc, voverflow, valigner);
    416 
    417             vR = vec_and((vector unsigned short) vsrc, vf800);
    418             vB = vec_sl((vector unsigned short) vsrc, v3);
    419             vG = vec_sl(vB, v2);
    420 
    421             vdst1 =
    422                 (vector unsigned char) vec_perm((vector unsigned char) vR,
    423                                                 valpha, vredalpha1);
    424             vdst1 = vec_perm(vdst1, (vector unsigned char) vB, vblue1);
    425             vdst1 = vec_perm(vdst1, (vector unsigned char) vG, vgreen1);
    426             vdst1 = vec_perm(vdst1, valpha, vpermute);
    427             vec_st(vdst1, 0, dst);
    428 
    429             vdst2 =
    430                 (vector unsigned char) vec_perm((vector unsigned char) vR,
    431                                                 valpha, vredalpha2);
    432             vdst2 = vec_perm(vdst2, (vector unsigned char) vB, vblue2);
    433             vdst2 = vec_perm(vdst2, (vector unsigned char) vG, vgreen2);
    434             vdst2 = vec_perm(vdst2, valpha, vpermute);
    435             vec_st(vdst2, 16, dst);
    436 
    437             width -= 8;
    438             dst += 32;
    439             src += 16;
    440             vsrc = voverflow;
    441         }
    442 
    443         SDL_assert(width == 0);
    444 
    445 
    446         /* do scalar until we can align... */
    447         ONE_PIXEL_BLEND((extrawidth), extrawidth);
    448 #undef ONE_PIXEL_BLEND
    449 
    450         src += srcskip;         /* move to next row, accounting for pitch. */
    451         dst += dstskip;
    452     }
    453 
    454 }
    455 
    456 
    457 static void
    458 Blit_RGB555_32Altivec(SDL_BlitInfo * info)
    459 {
    460     int height = info->dst_h;
    461     Uint8 *src = (Uint8 *) info->src;
    462     int srcskip = info->src_skip;
    463     Uint8 *dst = (Uint8 *) info->dst;
    464     int dstskip = info->dst_skip;
    465     SDL_PixelFormat *srcfmt = info->src_fmt;
    466     SDL_PixelFormat *dstfmt = info->dst_fmt;
    467     unsigned alpha;
    468     vector unsigned char valpha;
    469     vector unsigned char vpermute;
    470     vector unsigned short vf800;
    471     vector unsigned int v8 = vec_splat_u32(8);
    472     vector unsigned int v16 = vec_add(v8, v8);
    473     vector unsigned short v1 = vec_splat_u16(1);
    474     vector unsigned short v3 = vec_splat_u16(3);
    475     /*
    476        0x10 - 0x1f is the alpha
    477        0x00 - 0x0e evens are the red
    478        0x01 - 0x0f odds are zero
    479      */
    480     vector unsigned char vredalpha1 = VECUINT8_LITERAL(0x10, 0x00, 0x01, 0x01,
    481                                                        0x10, 0x02, 0x01, 0x01,
    482                                                        0x10, 0x04, 0x01, 0x01,
    483                                                        0x10, 0x06, 0x01,
    484                                                        0x01);
    485     vector unsigned char vredalpha2 =
    486         (vector unsigned
    487          char) (vec_add((vector unsigned int) vredalpha1, vec_sl(v8, v16))
    488         );
    489     /*
    490        0x00 - 0x0f is ARxx ARxx ARxx ARxx
    491        0x11 - 0x0f odds are blue
    492      */
    493     vector unsigned char vblue1 = VECUINT8_LITERAL(0x00, 0x01, 0x02, 0x11,
    494                                                    0x04, 0x05, 0x06, 0x13,
    495                                                    0x08, 0x09, 0x0a, 0x15,
    496                                                    0x0c, 0x0d, 0x0e, 0x17);
    497     vector unsigned char vblue2 =
    498         (vector unsigned char) (vec_add((vector unsigned int) vblue1, v8)
    499         );
    500     /*
    501        0x00 - 0x0f is ARxB ARxB ARxB ARxB
    502        0x10 - 0x0e evens are green
    503      */
    504     vector unsigned char vgreen1 = VECUINT8_LITERAL(0x00, 0x01, 0x10, 0x03,
    505                                                     0x04, 0x05, 0x12, 0x07,
    506                                                     0x08, 0x09, 0x14, 0x0b,
    507                                                     0x0c, 0x0d, 0x16, 0x0f);
    508     vector unsigned char vgreen2 =
    509         (vector unsigned
    510          char) (vec_add((vector unsigned int) vgreen1, vec_sl(v8, v8))
    511         );
    512 
    513     SDL_assert(srcfmt->BytesPerPixel == 2);
    514     SDL_assert(dstfmt->BytesPerPixel == 4);
    515 
    516     vf800 = (vector unsigned short) vec_splat_u8(-7);
    517     vf800 = vec_sl(vf800, vec_splat_u16(8));
    518 
    519     if (dstfmt->Amask && info->a) {
    520         ((unsigned char *) &valpha)[0] = alpha = info->a;
    521         valpha = vec_splat(valpha, 0);
    522     } else {
    523         alpha = 0;
    524         valpha = vec_splat_u8(0);
    525     }
    526 
    527     vpermute = calc_swizzle32(NULL, dstfmt);
    528     while (height--) {
    529         vector unsigned char valigner;
    530         vector unsigned char voverflow;
    531         vector unsigned char vsrc;
    532 
    533         int width = info->dst_w;
    534         int extrawidth;
    535 
    536         /* do scalar until we can align... */
    537 #define ONE_PIXEL_BLEND(condition, widthvar) \
    538         while (condition) { \
    539             unsigned sR, sG, sB; \
    540             unsigned short Pixel = *((unsigned short *)src); \
    541             sR = (Pixel >> 7) & 0xf8; \
    542             sG = (Pixel >> 2) & 0xf8; \
    543             sB = (Pixel << 3) & 0xf8; \
    544             ASSEMBLE_RGBA(dst, 4, dstfmt, sR, sG, sB, alpha); \
    545             src += 2; \
    546             dst += 4; \
    547             widthvar--; \
    548         }
    549         ONE_PIXEL_BLEND(((UNALIGNED_PTR(dst)) && (width)), width);
    550 
    551         /* After all that work, here's the vector part! */
    552         extrawidth = (width % 8);       /* trailing unaligned stores */
    553         width -= extrawidth;
    554         vsrc = vec_ld(0, src);
    555         valigner = VEC_ALIGNER(src);
    556 
    557         while (width) {
    558             vector unsigned short vR, vG, vB;
    559             vector unsigned char vdst1, vdst2;
    560 
    561             voverflow = vec_ld(15, src);
    562             vsrc = vec_perm(vsrc, voverflow, valigner);
    563 
    564             vR = vec_and(vec_sl((vector unsigned short) vsrc, v1), vf800);
    565             vB = vec_sl((vector unsigned short) vsrc, v3);
    566             vG = vec_sl(vB, v3);
    567 
    568             vdst1 =
    569                 (vector unsigned char) vec_perm((vector unsigned char) vR,
    570                                                 valpha, vredalpha1);
    571             vdst1 = vec_perm(vdst1, (vector unsigned char) vB, vblue1);
    572             vdst1 = vec_perm(vdst1, (vector unsigned char) vG, vgreen1);
    573             vdst1 = vec_perm(vdst1, valpha, vpermute);
    574             vec_st(vdst1, 0, dst);
    575 
    576             vdst2 =
    577                 (vector unsigned char) vec_perm((vector unsigned char) vR,
    578                                                 valpha, vredalpha2);
    579             vdst2 = vec_perm(vdst2, (vector unsigned char) vB, vblue2);
    580             vdst2 = vec_perm(vdst2, (vector unsigned char) vG, vgreen2);
    581             vdst2 = vec_perm(vdst2, valpha, vpermute);
    582             vec_st(vdst2, 16, dst);
    583 
    584             width -= 8;
    585             dst += 32;
    586             src += 16;
    587             vsrc = voverflow;
    588         }
    589 
    590         SDL_assert(width == 0);
    591 
    592 
    593         /* do scalar until we can align... */
    594         ONE_PIXEL_BLEND((extrawidth), extrawidth);
    595 #undef ONE_PIXEL_BLEND
    596 
    597         src += srcskip;         /* move to next row, accounting for pitch. */
    598         dst += dstskip;
    599     }
    600 
    601 }
    602 
    603 static void BlitNtoNKey(SDL_BlitInfo * info);
    604 static void BlitNtoNKeyCopyAlpha(SDL_BlitInfo * info);
    605 static void
    606 Blit32to32KeyAltivec(SDL_BlitInfo * info)
    607 {
    608     int height = info->dst_h;
    609     Uint32 *srcp = (Uint32 *) info->src;
    610     int srcskip = info->src_skip / 4;
    611     Uint32 *dstp = (Uint32 *) info->dst;
    612     int dstskip = info->dst_skip / 4;
    613     SDL_PixelFormat *srcfmt = info->src_fmt;
    614     int srcbpp = srcfmt->BytesPerPixel;
    615     SDL_PixelFormat *dstfmt = info->dst_fmt;
    616     int dstbpp = dstfmt->BytesPerPixel;
    617     int copy_alpha = (srcfmt->Amask && dstfmt->Amask);
    618     unsigned alpha = dstfmt->Amask ? info->a : 0;
    619     Uint32 rgbmask = srcfmt->Rmask | srcfmt->Gmask | srcfmt->Bmask;
    620     Uint32 ckey = info->colorkey;
    621     vector unsigned int valpha;
    622     vector unsigned char vpermute;
    623     vector unsigned char vzero;
    624     vector unsigned int vckey;
    625     vector unsigned int vrgbmask;
    626     vpermute = calc_swizzle32(srcfmt, dstfmt);
    627     if (info->dst_w < 16) {
    628         if (copy_alpha) {
    629             BlitNtoNKeyCopyAlpha(info);
    630         } else {
    631             BlitNtoNKey(info);
    632         }
    633         return;
    634     }
    635     vzero = vec_splat_u8(0);
    636     if (alpha) {
    637         ((unsigned char *) &valpha)[0] = (unsigned char) alpha;
    638         valpha =
    639             (vector unsigned int) vec_splat((vector unsigned char) valpha, 0);
    640     } else {
    641         valpha = (vector unsigned int) vzero;
    642     }
    643     ckey &= rgbmask;
    644     ((unsigned int *) (char *) &vckey)[0] = ckey;
    645     vckey = vec_splat(vckey, 0);
    646     ((unsigned int *) (char *) &vrgbmask)[0] = rgbmask;
    647     vrgbmask = vec_splat(vrgbmask, 0);
    648 
    649     while (height--) {
    650 #define ONE_PIXEL_BLEND(condition, widthvar) \
    651         if (copy_alpha) { \
    652             while (condition) { \
    653                 Uint32 Pixel; \
    654                 unsigned sR, sG, sB, sA; \
    655                 DISEMBLE_RGBA((Uint8 *)srcp, srcbpp, srcfmt, Pixel, \
    656                           sR, sG, sB, sA); \
    657                 if ( (Pixel & rgbmask) != ckey ) { \
    658                       ASSEMBLE_RGBA((Uint8 *)dstp, dstbpp, dstfmt, \
    659                             sR, sG, sB, sA); \
    660                 } \
    661                 dstp = (Uint32 *) (((Uint8 *) dstp) + dstbpp); \
    662                 srcp = (Uint32 *) (((Uint8 *) srcp) + srcbpp); \
    663                 widthvar--; \
    664             } \
    665         } else { \
    666             while (condition) { \
    667                 Uint32 Pixel; \
    668                 unsigned sR, sG, sB; \
    669                 RETRIEVE_RGB_PIXEL((Uint8 *)srcp, srcbpp, Pixel); \
    670                 if ( Pixel != ckey ) { \
    671                     RGB_FROM_PIXEL(Pixel, srcfmt, sR, sG, sB); \
    672                     ASSEMBLE_RGBA((Uint8 *)dstp, dstbpp, dstfmt, \
    673                               sR, sG, sB, alpha); \
    674                 } \
    675                 dstp = (Uint32 *) (((Uint8 *)dstp) + dstbpp); \
    676                 srcp = (Uint32 *) (((Uint8 *)srcp) + srcbpp); \
    677                 widthvar--; \
    678             } \
    679         }
    680         int width = info->dst_w;
    681         ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width);
    682         SDL_assert(width > 0);
    683         if (width > 0) {
    684             int extrawidth = (width % 4);
    685             vector unsigned char valigner = VEC_ALIGNER(srcp);
    686             vector unsigned int vs = vec_ld(0, srcp);
    687             width -= extrawidth;
    688             SDL_assert(width >= 4);
    689             while (width) {
    690                 vector unsigned char vsel;
    691                 vector unsigned int vd;
    692                 vector unsigned int voverflow = vec_ld(15, srcp);
    693                 /* load the source vec */
    694                 vs = vec_perm(vs, voverflow, valigner);
    695                 /* vsel is set for items that match the key */
    696                 vsel = (vector unsigned char) vec_and(vs, vrgbmask);
    697                 vsel = (vector unsigned char) vec_cmpeq(vs, vckey);
    698 #if defined(__powerpc__) && (SDL_BYTEORDER == SDL_LIL_ENDIAN)
    699                 /* reorder bytes for PowerPC little endian */
    700                 vpermute = reorder_ppc64le_vec(vpermute);
    701 #endif
    702                 /* permute the src vec to the dest format */
    703                 vs = vec_perm(vs, valpha, vpermute);
    704                 /* load the destination vec */
    705                 vd = vec_ld(0, dstp);
    706                 /* select the source and dest into vs */
    707                 vd = (vector unsigned int) vec_sel((vector unsigned char) vs,
    708                                                    (vector unsigned char) vd,
    709                                                    vsel);
    710 
    711                 vec_st(vd, 0, dstp);
    712                 srcp += 4;
    713                 width -= 4;
    714                 dstp += 4;
    715                 vs = voverflow;
    716             }
    717             ONE_PIXEL_BLEND((extrawidth), extrawidth);
    718 #undef ONE_PIXEL_BLEND
    719             srcp += srcskip;
    720             dstp += dstskip;
    721         }
    722     }
    723 }
    724 
    725 /* Altivec code to swizzle one 32-bit surface to a different 32-bit format. */
    726 /* Use this on a G5 */
    727 static void
    728 ConvertAltivec32to32_noprefetch(SDL_BlitInfo * info)
    729 {
    730     int height = info->dst_h;
    731     Uint32 *src = (Uint32 *) info->src;
    732     int srcskip = info->src_skip / 4;
    733     Uint32 *dst = (Uint32 *) info->dst;
    734     int dstskip = info->dst_skip / 4;
    735     SDL_PixelFormat *srcfmt = info->src_fmt;
    736     SDL_PixelFormat *dstfmt = info->dst_fmt;
    737     vector unsigned int vzero = vec_splat_u32(0);
    738     vector unsigned char vpermute = calc_swizzle32(srcfmt, dstfmt);
    739     if (dstfmt->Amask && !srcfmt->Amask) {
    740         if (info->a) {
    741             vector unsigned char valpha;
    742             ((unsigned char *) &valpha)[0] = info->a;
    743             vzero = (vector unsigned int) vec_splat(valpha, 0);
    744         }
    745     }
    746 
    747     SDL_assert(srcfmt->BytesPerPixel == 4);
    748     SDL_assert(dstfmt->BytesPerPixel == 4);
    749 
    750     while (height--) {
    751         vector unsigned char valigner;
    752         vector unsigned int vbits;
    753         vector unsigned int voverflow;
    754         Uint32 bits;
    755         Uint8 r, g, b, a;
    756 
    757         int width = info->dst_w;
    758         int extrawidth;
    759 
    760         /* do scalar until we can align... */
    761         while ((UNALIGNED_PTR(dst)) && (width)) {
    762             bits = *(src++);
    763             RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
    764             if(!srcfmt->Amask)
    765               a = info->a;
    766             *(dst++) = MAKE8888(dstfmt, r, g, b, a);
    767             width--;
    768         }
    769 
    770         /* After all that work, here's the vector part! */
    771         extrawidth = (width % 4);
    772         width -= extrawidth;
    773         valigner = VEC_ALIGNER(src);
    774         vbits = vec_ld(0, src);
    775 
    776         while (width) {
    777             voverflow = vec_ld(15, src);
    778             src += 4;
    779             width -= 4;
    780             vbits = vec_perm(vbits, voverflow, valigner);       /* src is ready. */
    781 #if defined(__powerpc__) && (SDL_BYTEORDER == SDL_LIL_ENDIAN)
    782             /* reorder bytes for PowerPC little endian */
    783             vpermute = reorder_ppc64le_vec(vpermute);
    784 #endif
    785             vbits = vec_perm(vbits, vzero, vpermute);   /* swizzle it. */
    786             vec_st(vbits, 0, dst);      /* store it back out. */
    787             dst += 4;
    788             vbits = voverflow;
    789         }
    790 
    791         SDL_assert(width == 0);
    792 
    793         /* cover pixels at the end of the row that didn't fit in 16 bytes. */
    794         while (extrawidth) {
    795             bits = *(src++);    /* max 7 pixels, don't bother with prefetch. */
    796             RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
    797             if(!srcfmt->Amask)
    798               a = info->a;
    799             *(dst++) = MAKE8888(dstfmt, r, g, b, a);
    800             extrawidth--;
    801         }
    802 
    803         src += srcskip;
    804         dst += dstskip;
    805     }
    806 
    807 }
    808 
    809 /* Altivec code to swizzle one 32-bit surface to a different 32-bit format. */
    810 /* Use this on a G4 */
    811 static void
    812 ConvertAltivec32to32_prefetch(SDL_BlitInfo * info)
    813 {
    814     const int scalar_dst_lead = sizeof(Uint32) * 4;
    815     const int vector_dst_lead = sizeof(Uint32) * 16;
    816 
    817     int height = info->dst_h;
    818     Uint32 *src = (Uint32 *) info->src;
    819     int srcskip = info->src_skip / 4;
    820     Uint32 *dst = (Uint32 *) info->dst;
    821     int dstskip = info->dst_skip / 4;
    822     SDL_PixelFormat *srcfmt = info->src_fmt;
    823     SDL_PixelFormat *dstfmt = info->dst_fmt;
    824     vector unsigned int vzero = vec_splat_u32(0);
    825     vector unsigned char vpermute = calc_swizzle32(srcfmt, dstfmt);
    826     if (dstfmt->Amask && !srcfmt->Amask) {
    827         if (info->a) {
    828             vector unsigned char valpha;
    829             ((unsigned char *) &valpha)[0] = info->a;
    830             vzero = (vector unsigned int) vec_splat(valpha, 0);
    831         }
    832     }
    833 
    834     SDL_assert(srcfmt->BytesPerPixel == 4);
    835     SDL_assert(dstfmt->BytesPerPixel == 4);
    836 
    837     while (height--) {
    838         vector unsigned char valigner;
    839         vector unsigned int vbits;
    840         vector unsigned int voverflow;
    841         Uint32 bits;
    842         Uint8 r, g, b, a;
    843 
    844         int width = info->dst_w;
    845         int extrawidth;
    846 
    847         /* do scalar until we can align... */
    848         while ((UNALIGNED_PTR(dst)) && (width)) {
    849             vec_dstt(src + scalar_dst_lead, DST_CTRL(2, 32, 1024),
    850                      DST_CHAN_SRC);
    851             vec_dstst(dst + scalar_dst_lead, DST_CTRL(2, 32, 1024),
    852                       DST_CHAN_DEST);
    853             bits = *(src++);
    854             RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
    855             if(!srcfmt->Amask)
    856               a = info->a;
    857             *(dst++) = MAKE8888(dstfmt, r, g, b, a);
    858             width--;
    859         }
    860 
    861         /* After all that work, here's the vector part! */
    862         extrawidth = (width % 4);
    863         width -= extrawidth;
    864         valigner = VEC_ALIGNER(src);
    865         vbits = vec_ld(0, src);
    866 
    867         while (width) {
    868             vec_dstt(src + vector_dst_lead, DST_CTRL(2, 32, 1024),
    869                      DST_CHAN_SRC);
    870             vec_dstst(dst + vector_dst_lead, DST_CTRL(2, 32, 1024),
    871                       DST_CHAN_DEST);
    872             voverflow = vec_ld(15, src);
    873             src += 4;
    874             width -= 4;
    875             vbits = vec_perm(vbits, voverflow, valigner);       /* src is ready. */
    876 #if defined(__powerpc__) && (SDL_BYTEORDER == SDL_LIL_ENDIAN)
    877             /* reorder bytes for PowerPC little endian */
    878             vpermute = reorder_ppc64le_vec(vpermute);
    879 #endif
    880             vbits = vec_perm(vbits, vzero, vpermute);   /* swizzle it. */
    881             vec_st(vbits, 0, dst);      /* store it back out. */
    882             dst += 4;
    883             vbits = voverflow;
    884         }
    885 
    886         SDL_assert(width == 0);
    887 
    888         /* cover pixels at the end of the row that didn't fit in 16 bytes. */
    889         while (extrawidth) {
    890             bits = *(src++);    /* max 7 pixels, don't bother with prefetch. */
    891             RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
    892             if(!srcfmt->Amask)
    893               a = info->a;
    894             *(dst++) = MAKE8888(dstfmt, r, g, b, a);
    895             extrawidth--;
    896         }
    897 
    898         src += srcskip;
    899         dst += dstskip;
    900     }
    901 
    902     vec_dss(DST_CHAN_SRC);
    903     vec_dss(DST_CHAN_DEST);
    904 }
    905 
    906 static enum blit_features
    907 GetBlitFeatures(void)
    908 {
    909     static enum blit_features features = -1;
    910     if (features == (enum blit_features) -1) {
    911         /* Provide an override for testing .. */
    912         char *override = SDL_getenv("SDL_ALTIVEC_BLIT_FEATURES");
    913         if (override) {
    914             unsigned int features_as_uint = 0;
    915             SDL_sscanf(override, "%u", &features_as_uint);
    916             features = (enum blit_features) features_as_uint;
    917         } else {
    918             features = (0
    919                         /* Feature 1 is has-MMX */
    920                         | ((SDL_HasMMX())? BLIT_FEATURE_HAS_MMX : 0)
    921                         /* Feature 2 is has-AltiVec */
    922                         | ((SDL_HasAltiVec())? BLIT_FEATURE_HAS_ALTIVEC : 0)
    923                         /* Feature 4 is dont-use-prefetch */
    924                         /* !!!! FIXME: Check for G5 or later, not the cache size! Always prefetch on a G4. */
    925                         | ((GetL3CacheSize() == 0) ? BLIT_FEATURE_ALTIVEC_DONT_USE_PREFETCH : 0)
    926                 );
    927         }
    928     }
    929     return features;
    930 }
    931 
    932 #if __MWERKS__
    933 #pragma altivec_model off
    934 #endif
    935 #else
    936 /* Feature 1 is has-MMX */
    937 #define GetBlitFeatures() ((SDL_HasMMX() ? BLIT_FEATURE_HAS_MMX : 0) | (SDL_HasARMSIMD() ? BLIT_FEATURE_HAS_ARM_SIMD : 0))
    938 #endif
    939 
    940 #if SDL_ARM_SIMD_BLITTERS
    941 void Blit_BGR888_RGB888ARMSIMDAsm(int32_t w, int32_t h, uint32_t *dst, int32_t dst_stride, uint32_t *src, int32_t src_stride);
    942 
    943 static void
    944 Blit_BGR888_RGB888ARMSIMD(SDL_BlitInfo * info)
    945 {
    946 	int32_t width = info->dst_w;
    947 	int32_t height = info->dst_h;
    948 	uint32_t *dstp = (uint32_t *)info->dst;
    949 	int32_t dststride = width + (info->dst_skip >> 2);
    950 	uint32_t *srcp = (uint32_t *)info->src;
    951 	int32_t srcstride = width + (info->src_skip >> 2);
    952 
    953 	Blit_BGR888_RGB888ARMSIMDAsm(width, height, dstp, dststride, srcp, srcstride);
    954 }
    955 
    956 void Blit_RGB444_RGB888ARMSIMDAsm(int32_t w, int32_t h, uint32_t *dst, int32_t dst_stride, uint16_t *src, int32_t src_stride);
    957 
    958 static void
    959 Blit_RGB444_RGB888ARMSIMD(SDL_BlitInfo * info)
    960 {
    961 	int32_t width = info->dst_w;
    962 	int32_t height = info->dst_h;
    963 	uint32_t *dstp = (uint32_t *)info->dst;
    964 	int32_t dststride = width + (info->dst_skip >> 2);
    965 	uint16_t *srcp = (uint16_t *)info->src;
    966 	int32_t srcstride = width + (info->src_skip >> 1);
    967 
    968 	Blit_RGB444_RGB888ARMSIMDAsm(width, height, dstp, dststride, srcp, srcstride);
    969 }
    970 #endif
    971 
    972 /* This is now endian dependent */
    973 #if SDL_BYTEORDER == SDL_LIL_ENDIAN
    974 #define HI  1
    975 #define LO  0
    976 #else /* SDL_BYTEORDER == SDL_BIG_ENDIAN */
    977 #define HI  0
    978 #define LO  1
    979 #endif
    980 
    981 /* Special optimized blit for RGB 8-8-8 --> RGB 3-3-2 */
    982 #define RGB888_RGB332(dst, src) { \
    983     dst = (Uint8)((((src)&0x00E00000)>>16)| \
    984                   (((src)&0x0000E000)>>11)| \
    985                   (((src)&0x000000C0)>>6)); \
    986 }
    987 static void
    988 Blit_RGB888_index8(SDL_BlitInfo * info)
    989 {
    990 #ifndef USE_DUFFS_LOOP
    991     int c;
    992 #endif
    993     int width, height;
    994     Uint32 *src;
    995     const Uint8 *map;
    996     Uint8 *dst;
    997     int srcskip, dstskip;
    998 
    999     /* Set up some basic variables */
   1000     width = info->dst_w;
   1001     height = info->dst_h;
   1002     src = (Uint32 *) info->src;
   1003     srcskip = info->src_skip / 4;
   1004     dst = info->dst;
   1005     dstskip = info->dst_skip;
   1006     map = info->table;
   1007 
   1008     if (map == NULL) {
   1009         while (height--) {
   1010 #ifdef USE_DUFFS_LOOP
   1011             /* *INDENT-OFF* */
   1012             DUFFS_LOOP(
   1013                 RGB888_RGB332(*dst++, *src);
   1014             , width);
   1015             /* *INDENT-ON* */
   1016 #else
   1017             for (c = width / 4; c; --c) {
   1018                 /* Pack RGB into 8bit pixel */
   1019                 ++src;
   1020                 RGB888_RGB332(*dst++, *src);
   1021                 ++src;
   1022                 RGB888_RGB332(*dst++, *src);
   1023                 ++src;
   1024                 RGB888_RGB332(*dst++, *src);
   1025                 ++src;
   1026             }
   1027             switch (width & 3) {
   1028             case 3:
   1029                 RGB888_RGB332(*dst++, *src);
   1030                 ++src;
   1031             case 2:
   1032                 RGB888_RGB332(*dst++, *src);
   1033                 ++src;
   1034             case 1:
   1035                 RGB888_RGB332(*dst++, *src);
   1036                 ++src;
   1037             }
   1038 #endif /* USE_DUFFS_LOOP */
   1039             src += srcskip;
   1040             dst += dstskip;
   1041         }
   1042     } else {
   1043         int Pixel;
   1044 
   1045         while (height--) {
   1046 #ifdef USE_DUFFS_LOOP
   1047             /* *INDENT-OFF* */
   1048             DUFFS_LOOP(
   1049                 RGB888_RGB332(Pixel, *src);
   1050                 *dst++ = map[Pixel];
   1051                 ++src;
   1052             , width);
   1053             /* *INDENT-ON* */
   1054 #else
   1055             for (c = width / 4; c; --c) {
   1056                 /* Pack RGB into 8bit pixel */
   1057                 RGB888_RGB332(Pixel, *src);
   1058                 *dst++ = map[Pixel];
   1059                 ++src;
   1060                 RGB888_RGB332(Pixel, *src);
   1061                 *dst++ = map[Pixel];
   1062                 ++src;
   1063                 RGB888_RGB332(Pixel, *src);
   1064                 *dst++ = map[Pixel];
   1065                 ++src;
   1066                 RGB888_RGB332(Pixel, *src);
   1067                 *dst++ = map[Pixel];
   1068                 ++src;
   1069             }
   1070             switch (width & 3) {
   1071             case 3:
   1072                 RGB888_RGB332(Pixel, *src);
   1073                 *dst++ = map[Pixel];
   1074                 ++src;
   1075             case 2:
   1076                 RGB888_RGB332(Pixel, *src);
   1077                 *dst++ = map[Pixel];
   1078                 ++src;
   1079             case 1:
   1080                 RGB888_RGB332(Pixel, *src);
   1081                 *dst++ = map[Pixel];
   1082                 ++src;
   1083             }
   1084 #endif /* USE_DUFFS_LOOP */
   1085             src += srcskip;
   1086             dst += dstskip;
   1087         }
   1088     }
   1089 }
   1090 
   1091 /* Special optimized blit for RGB 10-10-10 --> RGB 3-3-2 */
   1092 #define RGB101010_RGB332(dst, src) { \
   1093     dst = (Uint8)((((src)&0x38000000)>>22)| \
   1094                   (((src)&0x000E0000)>>15)| \
   1095                   (((src)&0x00000300)>>8)); \
   1096 }
   1097 static void
   1098 Blit_RGB101010_index8(SDL_BlitInfo * info)
   1099 {
   1100 #ifndef USE_DUFFS_LOOP
   1101     int c;
   1102 #endif
   1103     int width, height;
   1104     Uint32 *src;
   1105     const Uint8 *map;
   1106     Uint8 *dst;
   1107     int srcskip, dstskip;
   1108 
   1109     /* Set up some basic variables */
   1110     width = info->dst_w;
   1111     height = info->dst_h;
   1112     src = (Uint32 *) info->src;
   1113     srcskip = info->src_skip / 4;
   1114     dst = info->dst;
   1115     dstskip = info->dst_skip;
   1116     map = info->table;
   1117 
   1118     if (map == NULL) {
   1119         while (height--) {
   1120 #ifdef USE_DUFFS_LOOP
   1121             /* *INDENT-OFF* */
   1122             DUFFS_LOOP(
   1123                 RGB101010_RGB332(*dst++, *src);
   1124             , width);
   1125             /* *INDENT-ON* */
   1126 #else
   1127             for (c = width / 4; c; --c) {
   1128                 /* Pack RGB into 8bit pixel */
   1129                 ++src;
   1130                 RGB101010_RGB332(*dst++, *src);
   1131                 ++src;
   1132                 RGB101010_RGB332(*dst++, *src);
   1133                 ++src;
   1134                 RGB101010_RGB332(*dst++, *src);
   1135                 ++src;
   1136             }
   1137             switch (width & 3) {
   1138             case 3:
   1139                 RGB101010_RGB332(*dst++, *src);
   1140                 ++src;
   1141             case 2:
   1142                 RGB101010_RGB332(*dst++, *src);
   1143                 ++src;
   1144             case 1:
   1145                 RGB101010_RGB332(*dst++, *src);
   1146                 ++src;
   1147             }
   1148 #endif /* USE_DUFFS_LOOP */
   1149             src += srcskip;
   1150             dst += dstskip;
   1151         }
   1152     } else {
   1153         int Pixel;
   1154 
   1155         while (height--) {
   1156 #ifdef USE_DUFFS_LOOP
   1157             /* *INDENT-OFF* */
   1158             DUFFS_LOOP(
   1159                 RGB101010_RGB332(Pixel, *src);
   1160                 *dst++ = map[Pixel];
   1161                 ++src;
   1162             , width);
   1163             /* *INDENT-ON* */
   1164 #else
   1165             for (c = width / 4; c; --c) {
   1166                 /* Pack RGB into 8bit pixel */
   1167                 RGB101010_RGB332(Pixel, *src);
   1168                 *dst++ = map[Pixel];
   1169                 ++src;
   1170                 RGB101010_RGB332(Pixel, *src);
   1171                 *dst++ = map[Pixel];
   1172                 ++src;
   1173                 RGB101010_RGB332(Pixel, *src);
   1174                 *dst++ = map[Pixel];
   1175                 ++src;
   1176                 RGB101010_RGB332(Pixel, *src);
   1177                 *dst++ = map[Pixel];
   1178                 ++src;
   1179             }
   1180             switch (width & 3) {
   1181             case 3:
   1182                 RGB101010_RGB332(Pixel, *src);
   1183                 *dst++ = map[Pixel];
   1184                 ++src;
   1185             case 2:
   1186                 RGB101010_RGB332(Pixel, *src);
   1187                 *dst++ = map[Pixel];
   1188                 ++src;
   1189             case 1:
   1190                 RGB101010_RGB332(Pixel, *src);
   1191                 *dst++ = map[Pixel];
   1192                 ++src;
   1193             }
   1194 #endif /* USE_DUFFS_LOOP */
   1195             src += srcskip;
   1196             dst += dstskip;
   1197         }
   1198     }
   1199 }
   1200 
   1201 /* Special optimized blit for RGB 8-8-8 --> RGB 5-5-5 */
   1202 #define RGB888_RGB555(dst, src) { \
   1203     *(Uint16 *)(dst) = (Uint16)((((*src)&0x00F80000)>>9)| \
   1204                                 (((*src)&0x0000F800)>>6)| \
   1205                                 (((*src)&0x000000F8)>>3)); \
   1206 }
   1207 #ifndef USE_DUFFS_LOOP
   1208 #define RGB888_RGB555_TWO(dst, src) { \
   1209     *(Uint32 *)(dst) = (((((src[HI])&0x00F80000)>>9)| \
   1210                          (((src[HI])&0x0000F800)>>6)| \
   1211                          (((src[HI])&0x000000F8)>>3))<<16)| \
   1212                          (((src[LO])&0x00F80000)>>9)| \
   1213                          (((src[LO])&0x0000F800)>>6)| \
   1214                          (((src[LO])&0x000000F8)>>3); \
   1215 }
   1216 #endif
   1217 static void
   1218 Blit_RGB888_RGB555(SDL_BlitInfo * info)
   1219 {
   1220 #ifndef USE_DUFFS_LOOP
   1221     int c;
   1222 #endif
   1223     int width, height;
   1224     Uint32 *src;
   1225     Uint16 *dst;
   1226     int srcskip, dstskip;
   1227 
   1228     /* Set up some basic variables */
   1229     width = info->dst_w;
   1230     height = info->dst_h;
   1231     src = (Uint32 *) info->src;
   1232     srcskip = info->src_skip / 4;
   1233     dst = (Uint16 *) info->dst;
   1234     dstskip = info->dst_skip / 2;
   1235 
   1236 #ifdef USE_DUFFS_LOOP
   1237     while (height--) {
   1238         /* *INDENT-OFF* */
   1239         DUFFS_LOOP(
   1240             RGB888_RGB555(dst, src);
   1241             ++src;
   1242             ++dst;
   1243         , width);
   1244         /* *INDENT-ON* */
   1245         src += srcskip;
   1246         dst += dstskip;
   1247     }
   1248 #else
   1249     /* Memory align at 4-byte boundary, if necessary */
   1250     if ((long) dst & 0x03) {
   1251         /* Don't do anything if width is 0 */
   1252         if (width == 0) {
   1253             return;
   1254         }
   1255         --width;
   1256 
   1257         while (height--) {
   1258             /* Perform copy alignment */
   1259             RGB888_RGB555(dst, src);
   1260             ++src;
   1261             ++dst;
   1262 
   1263             /* Copy in 4 pixel chunks */
   1264             for (c = width / 4; c; --c) {
   1265                 RGB888_RGB555_TWO(dst, src);
   1266                 src += 2;
   1267                 dst += 2;
   1268                 RGB888_RGB555_TWO(dst, src);
   1269                 src += 2;
   1270                 dst += 2;
   1271             }
   1272             /* Get any leftovers */
   1273             switch (width & 3) {
   1274             case 3:
   1275                 RGB888_RGB555(dst, src);
   1276                 ++src;
   1277                 ++dst;
   1278             case 2:
   1279                 RGB888_RGB555_TWO(dst, src);
   1280                 src += 2;
   1281                 dst += 2;
   1282                 break;
   1283             case 1:
   1284                 RGB888_RGB555(dst, src);
   1285                 ++src;
   1286                 ++dst;
   1287                 break;
   1288             }
   1289             src += srcskip;
   1290             dst += dstskip;
   1291         }
   1292     } else {
   1293         while (height--) {
   1294             /* Copy in 4 pixel chunks */
   1295             for (c = width / 4; c; --c) {
   1296                 RGB888_RGB555_TWO(dst, src);
   1297                 src += 2;
   1298                 dst += 2;
   1299                 RGB888_RGB555_TWO(dst, src);
   1300                 src += 2;
   1301                 dst += 2;
   1302             }
   1303             /* Get any leftovers */
   1304             switch (width & 3) {
   1305             case 3:
   1306                 RGB888_RGB555(dst, src);
   1307                 ++src;
   1308                 ++dst;
   1309             case 2:
   1310                 RGB888_RGB555_TWO(dst, src);
   1311                 src += 2;
   1312                 dst += 2;
   1313                 break;
   1314             case 1:
   1315                 RGB888_RGB555(dst, src);
   1316                 ++src;
   1317                 ++dst;
   1318                 break;
   1319             }
   1320             src += srcskip;
   1321             dst += dstskip;
   1322         }
   1323     }
   1324 #endif /* USE_DUFFS_LOOP */
   1325 }
   1326 
   1327 /* Special optimized blit for RGB 8-8-8 --> RGB 5-6-5 */
   1328 #define RGB888_RGB565(dst, src) { \
   1329     *(Uint16 *)(dst) = (Uint16)((((*src)&0x00F80000)>>8)| \
   1330                                 (((*src)&0x0000FC00)>>5)| \
   1331                                 (((*src)&0x000000F8)>>3)); \
   1332 }
   1333 #ifndef USE_DUFFS_LOOP
   1334 #define RGB888_RGB565_TWO(dst, src) { \
   1335     *(Uint32 *)(dst) = (((((src[HI])&0x00F80000)>>8)| \
   1336                          (((src[HI])&0x0000FC00)>>5)| \
   1337                          (((src[HI])&0x000000F8)>>3))<<16)| \
   1338                          (((src[LO])&0x00F80000)>>8)| \
   1339                          (((src[LO])&0x0000FC00)>>5)| \
   1340                          (((src[LO])&0x000000F8)>>3); \
   1341 }
   1342 #endif
   1343 static void
   1344 Blit_RGB888_RGB565(SDL_BlitInfo * info)
   1345 {
   1346 #ifndef USE_DUFFS_LOOP
   1347     int c;
   1348 #endif
   1349     int width, height;
   1350     Uint32 *src;
   1351     Uint16 *dst;
   1352     int srcskip, dstskip;
   1353 
   1354     /* Set up some basic variables */
   1355     width = info->dst_w;
   1356     height = info->dst_h;
   1357     src = (Uint32 *) info->src;
   1358     srcskip = info->src_skip / 4;
   1359     dst = (Uint16 *) info->dst;
   1360     dstskip = info->dst_skip / 2;
   1361 
   1362 #ifdef USE_DUFFS_LOOP
   1363     while (height--) {
   1364         /* *INDENT-OFF* */
   1365         DUFFS_LOOP(
   1366             RGB888_RGB565(dst, src);
   1367             ++src;
   1368             ++dst;
   1369         , width);
   1370         /* *INDENT-ON* */
   1371         src += srcskip;
   1372         dst += dstskip;
   1373     }
   1374 #else
   1375     /* Memory align at 4-byte boundary, if necessary */
   1376     if ((long) dst & 0x03) {
   1377         /* Don't do anything if width is 0 */
   1378         if (width == 0) {
   1379             return;
   1380         }
   1381         --width;
   1382 
   1383         while (height--) {
   1384             /* Perform copy alignment */
   1385             RGB888_RGB565(dst, src);
   1386             ++src;
   1387             ++dst;
   1388 
   1389             /* Copy in 4 pixel chunks */
   1390             for (c = width / 4; c; --c) {
   1391                 RGB888_RGB565_TWO(dst, src);
   1392                 src += 2;
   1393                 dst += 2;
   1394                 RGB888_RGB565_TWO(dst, src);
   1395                 src += 2;
   1396                 dst += 2;
   1397             }
   1398             /* Get any leftovers */
   1399             switch (width & 3) {
   1400             case 3:
   1401                 RGB888_RGB565(dst, src);
   1402                 ++src;
   1403                 ++dst;
   1404             case 2:
   1405                 RGB888_RGB565_TWO(dst, src);
   1406                 src += 2;
   1407                 dst += 2;
   1408                 break;
   1409             case 1:
   1410                 RGB888_RGB565(dst, src);
   1411                 ++src;
   1412                 ++dst;
   1413                 break;
   1414             }
   1415             src += srcskip;
   1416             dst += dstskip;
   1417         }
   1418     } else {
   1419         while (height--) {
   1420             /* Copy in 4 pixel chunks */
   1421             for (c = width / 4; c; --c) {
   1422                 RGB888_RGB565_TWO(dst, src);
   1423                 src += 2;
   1424                 dst += 2;
   1425                 RGB888_RGB565_TWO(dst, src);
   1426                 src += 2;
   1427                 dst += 2;
   1428             }
   1429             /* Get any leftovers */
   1430             switch (width & 3) {
   1431             case 3:
   1432                 RGB888_RGB565(dst, src);
   1433                 ++src;
   1434                 ++dst;
   1435             case 2:
   1436                 RGB888_RGB565_TWO(dst, src);
   1437                 src += 2;
   1438                 dst += 2;
   1439                 break;
   1440             case 1:
   1441                 RGB888_RGB565(dst, src);
   1442                 ++src;
   1443                 ++dst;
   1444                 break;
   1445             }
   1446             src += srcskip;
   1447             dst += dstskip;
   1448         }
   1449     }
   1450 #endif /* USE_DUFFS_LOOP */
   1451 }
   1452 
   1453 
   1454 #if SDL_HAVE_BLIT_N_RGB565
   1455 
   1456 /* Special optimized blit for RGB 5-6-5 --> 32-bit RGB surfaces */
   1457 #define RGB565_32(dst, src, map) (map[src[LO]*2] + map[src[HI]*2+1])
   1458 static void
   1459 Blit_RGB565_32(SDL_BlitInfo * info, const Uint32 * map)
   1460 {
   1461 #ifndef USE_DUFFS_LOOP
   1462     int c;
   1463 #endif
   1464     int width, height;
   1465     Uint8 *src;
   1466     Uint32 *dst;
   1467     int srcskip, dstskip;
   1468 
   1469     /* Set up some basic variables */
   1470     width = info->dst_w;
   1471     height = info->dst_h;
   1472     src = info->src;
   1473     srcskip = info->src_skip;
   1474     dst = (Uint32 *) info->dst;
   1475     dstskip = info->dst_skip / 4;
   1476 
   1477 #ifdef USE_DUFFS_LOOP
   1478     while (height--) {
   1479         /* *INDENT-OFF* */
   1480         DUFFS_LOOP(
   1481         {
   1482             *dst++ = RGB565_32(dst, src, map);
   1483             src += 2;
   1484         },
   1485         width);
   1486         /* *INDENT-ON* */
   1487         src += srcskip;
   1488         dst += dstskip;
   1489     }
   1490 #else
   1491     while (height--) {
   1492         /* Copy in 4 pixel chunks */
   1493         for (c = width / 4; c; --c) {
   1494             *dst++ = RGB565_32(dst, src, map);
   1495             src += 2;
   1496             *dst++ = RGB565_32(dst, src, map);
   1497             src += 2;
   1498             *dst++ = RGB565_32(dst, src, map);
   1499             src += 2;
   1500             *dst++ = RGB565_32(dst, src, map);
   1501             src += 2;
   1502         }
   1503         /* Get any leftovers */
   1504         switch (width & 3) {
   1505         case 3:
   1506             *dst++ = RGB565_32(dst, src, map);
   1507             src += 2;
   1508         case 2:
   1509             *dst++ = RGB565_32(dst, src, map);
   1510             src += 2;
   1511         case 1:
   1512             *dst++ = RGB565_32(dst, src, map);
   1513             src += 2;
   1514             break;
   1515         }
   1516         src += srcskip;
   1517         dst += dstskip;
   1518     }
   1519 #endif /* USE_DUFFS_LOOP */
   1520 }
   1521 
   1522 /* Special optimized blit for RGB 5-6-5 --> ARGB 8-8-8-8 */
   1523 static const Uint32 RGB565_ARGB8888_LUT[512] = {
   1524     0x00000000, 0xff000000, 0x00000008, 0xff002000,
   1525     0x00000010, 0xff004000, 0x00000018, 0xff006100,
   1526     0x00000020, 0xff008100, 0x00000029, 0xff00a100,
   1527     0x00000031, 0xff00c200, 0x00000039, 0xff00e200,
   1528     0x00000041, 0xff080000, 0x0000004a, 0xff082000,
   1529     0x00000052, 0xff084000, 0x0000005a, 0xff086100,
   1530     0x00000062, 0xff088100, 0x0000006a, 0xff08a100,
   1531     0x00000073, 0xff08c200, 0x0000007b, 0xff08e200,
   1532     0x00000083, 0xff100000, 0x0000008b, 0xff102000,
   1533     0x00000094, 0xff104000, 0x0000009c, 0xff106100,
   1534     0x000000a4, 0xff108100, 0x000000ac, 0xff10a100,
   1535     0x000000b4, 0xff10c200, 0x000000bd, 0xff10e200,
   1536     0x000000c5, 0xff180000, 0x000000cd, 0xff182000,
   1537     0x000000d5, 0xff184000, 0x000000de, 0xff186100,
   1538     0x000000e6, 0xff188100, 0x000000ee, 0xff18a100,
   1539     0x000000f6, 0xff18c200, 0x000000ff, 0xff18e200,
   1540     0x00000400, 0xff200000, 0x00000408, 0xff202000,
   1541     0x00000410, 0xff204000, 0x00000418, 0xff206100,
   1542     0x00000420, 0xff208100, 0x00000429, 0xff20a100,
   1543     0x00000431, 0xff20c200, 0x00000439, 0xff20e200,
   1544     0x00000441, 0xff290000, 0x0000044a, 0xff292000,
   1545     0x00000452, 0xff294000, 0x0000045a, 0xff296100,
   1546     0x00000462, 0xff298100, 0x0000046a, 0xff29a100,
   1547     0x00000473, 0xff29c200, 0x0000047b, 0xff29e200,
   1548     0x00000483, 0xff310000, 0x0000048b, 0xff312000,
   1549     0x00000494, 0xff314000, 0x0000049c, 0xff316100,
   1550     0x000004a4, 0xff318100, 0x000004ac, 0xff31a100,
   1551     0x000004b4, 0xff31c200, 0x000004bd, 0xff31e200,
   1552     0x000004c5, 0xff390000, 0x000004cd, 0xff392000,
   1553     0x000004d5, 0xff394000, 0x000004de, 0xff396100,
   1554     0x000004e6, 0xff398100, 0x000004ee, 0xff39a100,
   1555     0x000004f6, 0xff39c200, 0x000004ff, 0xff39e200,
   1556     0x00000800, 0xff410000, 0x00000808, 0xff412000,
   1557     0x00000810, 0xff414000, 0x00000818, 0xff416100,
   1558     0x00000820, 0xff418100, 0x00000829, 0xff41a100,
   1559     0x00000831, 0xff41c200, 0x00000839, 0xff41e200,
   1560     0x00000841, 0xff4a0000, 0x0000084a, 0xff4a2000,
   1561     0x00000852, 0xff4a4000, 0x0000085a, 0xff4a6100,
   1562     0x00000862, 0xff4a8100, 0x0000086a, 0xff4aa100,
   1563     0x00000873, 0xff4ac200, 0x0000087b, 0xff4ae200,
   1564     0x00000883, 0xff520000, 0x0000088b, 0xff522000,
   1565     0x00000894, 0xff524000, 0x0000089c, 0xff526100,
   1566     0x000008a4, 0xff528100, 0x000008ac, 0xff52a100,
   1567     0x000008b4, 0xff52c200, 0x000008bd, 0xff52e200,
   1568     0x000008c5, 0xff5a0000, 0x000008cd, 0xff5a2000,
   1569     0x000008d5, 0xff5a4000, 0x000008de, 0xff5a6100,
   1570     0x000008e6, 0xff5a8100, 0x000008ee, 0xff5aa100,
   1571     0x000008f6, 0xff5ac200, 0x000008ff, 0xff5ae200,
   1572     0x00000c00, 0xff620000, 0x00000c08, 0xff622000,
   1573     0x00000c10, 0xff624000, 0x00000c18, 0xff626100,
   1574     0x00000c20, 0xff628100, 0x00000c29, 0xff62a100,
   1575     0x00000c31, 0xff62c200, 0x00000c39, 0xff62e200,
   1576     0x00000c41, 0xff6a0000, 0x00000c4a, 0xff6a2000,
   1577     0x00000c52, 0xff6a4000, 0x00000c5a, 0xff6a6100,
   1578     0x00000c62, 0xff6a8100, 0x00000c6a, 0xff6aa100,
   1579     0x00000c73, 0xff6ac200, 0x00000c7b, 0xff6ae200,
   1580     0x00000c83, 0xff730000, 0x00000c8b, 0xff732000,
   1581     0x00000c94, 0xff734000, 0x00000c9c, 0xff736100,
   1582     0x00000ca4, 0xff738100, 0x00000cac, 0xff73a100,
   1583     0x00000cb4, 0xff73c200, 0x00000cbd, 0xff73e200,
   1584     0x00000cc5, 0xff7b0000, 0x00000ccd, 0xff7b2000,
   1585     0x00000cd5, 0xff7b4000, 0x00000cde, 0xff7b6100,
   1586     0x00000ce6, 0xff7b8100, 0x00000cee, 0xff7ba100,
   1587     0x00000cf6, 0xff7bc200, 0x00000cff, 0xff7be200,
   1588     0x00001000, 0xff830000, 0x00001008, 0xff832000,
   1589     0x00001010, 0xff834000, 0x00001018, 0xff836100,
   1590     0x00001020, 0xff838100, 0x00001029, 0xff83a100,
   1591     0x00001031, 0xff83c200, 0x00001039, 0xff83e200,
   1592     0x00001041, 0xff8b0000, 0x0000104a, 0xff8b2000,
   1593     0x00001052, 0xff8b4000, 0x0000105a, 0xff8b6100,
   1594     0x00001062, 0xff8b8100, 0x0000106a, 0xff8ba100,
   1595     0x00001073, 0xff8bc200, 0x0000107b, 0xff8be200,
   1596     0x00001083, 0xff940000, 0x0000108b, 0xff942000,
   1597     0x00001094, 0xff944000, 0x0000109c, 0xff946100,
   1598     0x000010a4, 0xff948100, 0x000010ac, 0xff94a100,
   1599     0x000010b4, 0xff94c200, 0x000010bd, 0xff94e200,
   1600     0x000010c5, 0xff9c0000, 0x000010cd, 0xff9c2000,
   1601     0x000010d5, 0xff9c4000, 0x000010de, 0xff9c6100,
   1602     0x000010e6, 0xff9c8100, 0x000010ee, 0xff9ca100,
   1603     0x000010f6, 0xff9cc200, 0x000010ff, 0xff9ce200,
   1604     0x00001400, 0xffa40000, 0x00001408, 0xffa42000,
   1605     0x00001410, 0xffa44000, 0x00001418, 0xffa46100,
   1606     0x00001420, 0xffa48100, 0x00001429, 0xffa4a100,
   1607     0x00001431, 0xffa4c200, 0x00001439, 0xffa4e200,
   1608     0x00001441, 0xffac0000, 0x0000144a, 0xffac2000,
   1609     0x00001452, 0xffac4000, 0x0000145a, 0xffac6100,
   1610     0x00001462, 0xffac8100, 0x0000146a, 0xffaca100,
   1611     0x00001473, 0xffacc200, 0x0000147b, 0xfface200,
   1612     0x00001483, 0xffb40000, 0x0000148b, 0xffb42000,
   1613     0x00001494, 0xffb44000, 0x0000149c, 0xffb46100,
   1614     0x000014a4, 0xffb48100, 0x000014ac, 0xffb4a100,
   1615     0x000014b4, 0xffb4c200, 0x000014bd, 0xffb4e200,
   1616     0x000014c5, 0xffbd0000, 0x000014cd, 0xffbd2000,
   1617     0x000014d5, 0xffbd4000, 0x000014de, 0xffbd6100,
   1618     0x000014e6, 0xffbd8100, 0x000014ee, 0xffbda100,
   1619     0x000014f6, 0xffbdc200, 0x000014ff, 0xffbde200,
   1620     0x00001800, 0xffc50000, 0x00001808, 0xffc52000,
   1621     0x00001810, 0xffc54000, 0x00001818, 0xffc56100,
   1622     0x00001820, 0xffc58100, 0x00001829, 0xffc5a100,
   1623     0x00001831, 0xffc5c200, 0x00001839, 0xffc5e200,
   1624     0x00001841, 0xffcd0000, 0x0000184a, 0xffcd2000,
   1625     0x00001852, 0xffcd4000, 0x0000185a, 0xffcd6100,
   1626     0x00001862, 0xffcd8100, 0x0000186a, 0xffcda100,
   1627     0x00001873, 0xffcdc200, 0x0000187b, 0xffcde200,
   1628     0x00001883, 0xffd50000, 0x0000188b, 0xffd52000,
   1629     0x00001894, 0xffd54000, 0x0000189c, 0xffd56100,
   1630     0x000018a4, 0xffd58100, 0x000018ac, 0xffd5a100,
   1631     0x000018b4, 0xffd5c200, 0x000018bd, 0xffd5e200,
   1632     0x000018c5, 0xffde0000, 0x000018cd, 0xffde2000,
   1633     0x000018d5, 0xffde4000, 0x000018de, 0xffde6100,
   1634     0x000018e6, 0xffde8100, 0x000018ee, 0xffdea100,
   1635     0x000018f6, 0xffdec200, 0x000018ff, 0xffdee200,
   1636     0x00001c00, 0xffe60000, 0x00001c08, 0xffe62000,
   1637     0x00001c10, 0xffe64000, 0x00001c18, 0xffe66100,
   1638     0x00001c20, 0xffe68100, 0x00001c29, 0xffe6a100,
   1639     0x00001c31, 0xffe6c200, 0x00001c39, 0xffe6e200,
   1640     0x00001c41, 0xffee0000, 0x00001c4a, 0xffee2000,
   1641     0x00001c52, 0xffee4000, 0x00001c5a, 0xffee6100,
   1642     0x00001c62, 0xffee8100, 0x00001c6a, 0xffeea100,
   1643     0x00001c73, 0xffeec200, 0x00001c7b, 0xffeee200,
   1644     0x00001c83, 0xfff60000, 0x00001c8b, 0xfff62000,
   1645     0x00001c94, 0xfff64000, 0x00001c9c, 0xfff66100,
   1646     0x00001ca4, 0xfff68100, 0x00001cac, 0xfff6a100,
   1647     0x00001cb4, 0xfff6c200, 0x00001cbd, 0xfff6e200,
   1648     0x00001cc5, 0xffff0000, 0x00001ccd, 0xffff2000,
   1649     0x00001cd5, 0xffff4000, 0x00001cde, 0xffff6100,
   1650     0x00001ce6, 0xffff8100, 0x00001cee, 0xffffa100,
   1651     0x00001cf6, 0xffffc200, 0x00001cff, 0xffffe200
   1652 };
   1653 
   1654 static void
   1655 Blit_RGB565_ARGB8888(SDL_BlitInfo * info)
   1656 {
   1657     Blit_RGB565_32(info, RGB565_ARGB8888_LUT);
   1658 }
   1659 
   1660 /* Special optimized blit for RGB 5-6-5 --> ABGR 8-8-8-8 */
   1661 static const Uint32 RGB565_ABGR8888_LUT[512] = {
   1662     0xff000000, 0x00000000, 0xff080000, 0x00002000,
   1663     0xff100000, 0x00004000, 0xff180000, 0x00006100,
   1664     0xff200000, 0x00008100, 0xff290000, 0x0000a100,
   1665     0xff310000, 0x0000c200, 0xff390000, 0x0000e200,
   1666     0xff410000, 0x00000008, 0xff4a0000, 0x00002008,
   1667     0xff520000, 0x00004008, 0xff5a0000, 0x00006108,
   1668     0xff620000, 0x00008108, 0xff6a0000, 0x0000a108,
   1669     0xff730000, 0x0000c208, 0xff7b0000, 0x0000e208,
   1670     0xff830000, 0x00000010, 0xff8b0000, 0x00002010,
   1671     0xff940000, 0x00004010, 0xff9c0000, 0x00006110,
   1672     0xffa40000, 0x00008110, 0xffac0000, 0x0000a110,
   1673     0xffb40000, 0x0000c210, 0xffbd0000, 0x0000e210,
   1674     0xffc50000, 0x00000018, 0xffcd0000, 0x00002018,
   1675     0xffd50000, 0x00004018, 0xffde0000, 0x00006118,
   1676     0xffe60000, 0x00008118, 0xffee0000, 0x0000a118,
   1677     0xfff60000, 0x0000c218, 0xffff0000, 0x0000e218,
   1678     0xff000400, 0x00000020, 0xff080400, 0x00002020,
   1679     0xff100400, 0x00004020, 0xff180400, 0x00006120,
   1680     0xff200400, 0x00008120, 0xff290400, 0x0000a120,
   1681     0xff310400, 0x0000c220, 0xff390400, 0x0000e220,
   1682     0xff410400, 0x00000029, 0xff4a0400, 0x00002029,
   1683     0xff520400, 0x00004029, 0xff5a0400, 0x00006129,
   1684     0xff620400, 0x00008129, 0xff6a0400, 0x0000a129,
   1685     0xff730400, 0x0000c229, 0xff7b0400, 0x0000e229,
   1686     0xff830400, 0x00000031, 0xff8b0400, 0x00002031,
   1687     0xff940400, 0x00004031, 0xff9c0400, 0x00006131,
   1688     0xffa40400, 0x00008131, 0xffac0400, 0x0000a131,
   1689     0xffb40400, 0x0000c231, 0xffbd0400, 0x0000e231,
   1690     0xffc50400, 0x00000039, 0xffcd0400, 0x00002039,
   1691     0xffd50400, 0x00004039, 0xffde0400, 0x00006139,
   1692     0xffe60400, 0x00008139, 0xffee0400, 0x0000a139,
   1693     0xfff60400, 0x0000c239, 0xffff0400, 0x0000e239,
   1694     0xff000800, 0x00000041, 0xff080800, 0x00002041,
   1695     0xff100800, 0x00004041, 0xff180800, 0x00006141,
   1696     0xff200800, 0x00008141, 0xff290800, 0x0000a141,
   1697     0xff310800, 0x0000c241, 0xff390800, 0x0000e241,
   1698     0xff410800, 0x0000004a, 0xff4a0800, 0x0000204a,
   1699     0xff520800, 0x0000404a, 0xff5a0800, 0x0000614a,
   1700     0xff620800, 0x0000814a, 0xff6a0800, 0x0000a14a,
   1701     0xff730800, 0x0000c24a, 0xff7b0800, 0x0000e24a,
   1702     0xff830800, 0x00000052, 0xff8b0800, 0x00002052,
   1703     0xff940800, 0x00004052, 0xff9c0800, 0x00006152,
   1704     0xffa40800, 0x00008152, 0xffac0800, 0x0000a152,
   1705     0xffb40800, 0x0000c252, 0xffbd0800, 0x0000e252,
   1706     0xffc50800, 0x0000005a, 0xffcd0800, 0x0000205a,
   1707     0xffd50800, 0x0000405a, 0xffde0800, 0x0000615a,
   1708     0xffe60800, 0x0000815a, 0xffee0800, 0x0000a15a,
   1709     0xfff60800, 0x0000c25a, 0xffff0800, 0x0000e25a,
   1710     0xff000c00, 0x00000062, 0xff080c00, 0x00002062,
   1711     0xff100c00, 0x00004062, 0xff180c00, 0x00006162,
   1712     0xff200c00, 0x00008162, 0xff290c00, 0x0000a162,
   1713     0xff310c00, 0x0000c262, 0xff390c00, 0x0000e262,
   1714     0xff410c00, 0x0000006a, 0xff4a0c00, 0x0000206a,
   1715     0xff520c00, 0x0000406a, 0xff5a0c00, 0x0000616a,
   1716     0xff620c00, 0x0000816a, 0xff6a0c00, 0x0000a16a,
   1717     0xff730c00, 0x0000c26a, 0xff7b0c00, 0x0000e26a,
   1718     0xff830c00, 0x00000073, 0xff8b0c00, 0x00002073,
   1719     0xff940c00, 0x00004073, 0xff9c0c00, 0x00006173,
   1720     0xffa40c00, 0x00008173, 0xffac0c00, 0x0000a173,
   1721     0xffb40c00, 0x0000c273, 0xffbd0c00, 0x0000e273,
   1722     0xffc50c00, 0x0000007b, 0xffcd0c00, 0x0000207b,
   1723     0xffd50c00, 0x0000407b, 0xffde0c00, 0x0000617b,
   1724     0xffe60c00, 0x0000817b, 0xffee0c00, 0x0000a17b,
   1725     0xfff60c00, 0x0000c27b, 0xffff0c00, 0x0000e27b,
   1726     0xff001000, 0x00000083, 0xff081000, 0x00002083,
   1727     0xff101000, 0x00004083, 0xff181000, 0x00006183,
   1728     0xff201000, 0x00008183, 0xff291000, 0x0000a183,
   1729     0xff311000, 0x0000c283, 0xff391000, 0x0000e283,
   1730     0xff411000, 0x0000008b, 0xff4a1000, 0x0000208b,
   1731     0xff521000, 0x0000408b, 0xff5a1000, 0x0000618b,
   1732     0xff621000, 0x0000818b, 0xff6a1000, 0x0000a18b,
   1733     0xff731000, 0x0000c28b, 0xff7b1000, 0x0000e28b,
   1734     0xff831000, 0x00000094, 0xff8b1000, 0x00002094,
   1735     0xff941000, 0x00004094, 0xff9c1000, 0x00006194,
   1736     0xffa41000, 0x00008194, 0xffac1000, 0x0000a194,
   1737     0xffb41000, 0x0000c294, 0xffbd1000, 0x0000e294,
   1738     0xffc51000, 0x0000009c, 0xffcd1000, 0x0000209c,
   1739     0xffd51000, 0x0000409c, 0xffde1000, 0x0000619c,
   1740     0xffe61000, 0x0000819c, 0xffee1000, 0x0000a19c,
   1741     0xfff61000, 0x0000c29c, 0xffff1000, 0x0000e29c,
   1742     0xff001400, 0x000000a4, 0xff081400, 0x000020a4,
   1743     0xff101400, 0x000040a4, 0xff181400, 0x000061a4,
   1744     0xff201400, 0x000081a4, 0xff291400, 0x0000a1a4,
   1745     0xff311400, 0x0000c2a4, 0xff391400, 0x0000e2a4,
   1746     0xff411400, 0x000000ac, 0xff4a1400, 0x000020ac,
   1747     0xff521400, 0x000040ac, 0xff5a1400, 0x000061ac,
   1748     0xff621400, 0x000081ac, 0xff6a1400, 0x0000a1ac,
   1749     0xff731400, 0x0000c2ac, 0xff7b1400, 0x0000e2ac,
   1750     0xff831400, 0x000000b4, 0xff8b1400, 0x000020b4,
   1751     0xff941400, 0x000040b4, 0xff9c1400, 0x000061b4,
   1752     0xffa41400, 0x000081b4, 0xffac1400, 0x0000a1b4,
   1753     0xffb41400, 0x0000c2b4, 0xffbd1400, 0x0000e2b4,
   1754     0xffc51400, 0x000000bd, 0xffcd1400, 0x000020bd,
   1755     0xffd51400, 0x000040bd, 0xffde1400, 0x000061bd,
   1756     0xffe61400, 0x000081bd, 0xffee1400, 0x0000a1bd,
   1757     0xfff61400, 0x0000c2bd, 0xffff1400, 0x0000e2bd,
   1758     0xff001800, 0x000000c5, 0xff081800, 0x000020c5,
   1759     0xff101800, 0x000040c5, 0xff181800, 0x000061c5,
   1760     0xff201800, 0x000081c5, 0xff291800, 0x0000a1c5,
   1761     0xff311800, 0x0000c2c5, 0xff391800, 0x0000e2c5,
   1762     0xff411800, 0x000000cd, 0xff4a1800, 0x000020cd,
   1763     0xff521800, 0x000040cd, 0xff5a1800, 0x000061cd,
   1764     0xff621800, 0x000081cd, 0xff6a1800, 0x0000a1cd,
   1765     0xff731800, 0x0000c2cd, 0xff7b1800, 0x0000e2cd,
   1766     0xff831800, 0x000000d5, 0xff8b1800, 0x000020d5,
   1767     0xff941800, 0x000040d5, 0xff9c1800, 0x000061d5,
   1768     0xffa41800, 0x000081d5, 0xffac1800, 0x0000a1d5,
   1769     0xffb41800, 0x0000c2d5, 0xffbd1800, 0x0000e2d5,
   1770     0xffc51800, 0x000000de, 0xffcd1800, 0x000020de,
   1771     0xffd51800, 0x000040de, 0xffde1800, 0x000061de,
   1772     0xffe61800, 0x000081de, 0xffee1800, 0x0000a1de,
   1773     0xfff61800, 0x0000c2de, 0xffff1800, 0x0000e2de,
   1774     0xff001c00, 0x000000e6, 0xff081c00, 0x000020e6,
   1775     0xff101c00, 0x000040e6, 0xff181c00, 0x000061e6,
   1776     0xff201c00, 0x000081e6, 0xff291c00, 0x0000a1e6,
   1777     0xff311c00, 0x0000c2e6, 0xff391c00, 0x0000e2e6,
   1778     0xff411c00, 0x000000ee, 0xff4a1c00, 0x000020ee,
   1779     0xff521c00, 0x000040ee, 0xff5a1c00, 0x000061ee,
   1780     0xff621c00, 0x000081ee, 0xff6a1c00, 0x0000a1ee,
   1781     0xff731c00, 0x0000c2ee, 0xff7b1c00, 0x0000e2ee,
   1782     0xff831c00, 0x000000f6, 0xff8b1c00, 0x000020f6,
   1783     0xff941c00, 0x000040f6, 0xff9c1c00, 0x000061f6,
   1784     0xffa41c00, 0x000081f6, 0xffac1c00, 0x0000a1f6,
   1785     0xffb41c00, 0x0000c2f6, 0xffbd1c00, 0x0000e2f6,
   1786     0xffc51c00, 0x000000ff, 0xffcd1c00, 0x000020ff,
   1787     0xffd51c00, 0x000040ff, 0xffde1c00, 0x000061ff,
   1788     0xffe61c00, 0x000081ff, 0xffee1c00, 0x0000a1ff,
   1789     0xfff61c00, 0x0000c2ff, 0xffff1c00, 0x0000e2ff
   1790 };
   1791 
   1792 static void
   1793 Blit_RGB565_ABGR8888(SDL_BlitInfo * info)
   1794 {
   1795     Blit_RGB565_32(info, RGB565_ABGR8888_LUT);
   1796 }
   1797 
   1798 /* Special optimized blit for RGB 5-6-5 --> RGBA 8-8-8-8 */
   1799 static const Uint32 RGB565_RGBA8888_LUT[512] = {
   1800     0x000000ff, 0x00000000, 0x000008ff, 0x00200000,
   1801     0x000010ff, 0x00400000, 0x000018ff, 0x00610000,
   1802     0x000020ff, 0x00810000, 0x000029ff, 0x00a10000,
   1803     0x000031ff, 0x00c20000, 0x000039ff, 0x00e20000,
   1804     0x000041ff, 0x08000000, 0x00004aff, 0x08200000,
   1805     0x000052ff, 0x08400000, 0x00005aff, 0x08610000,
   1806     0x000062ff, 0x08810000, 0x00006aff, 0x08a10000,
   1807     0x000073ff, 0x08c20000, 0x00007bff, 0x08e20000,
   1808     0x000083ff, 0x10000000, 0x00008bff, 0x10200000,
   1809     0x000094ff, 0x10400000, 0x00009cff, 0x10610000,
   1810     0x0000a4ff, 0x10810000, 0x0000acff, 0x10a10000,
   1811     0x0000b4ff, 0x10c20000, 0x0000bdff, 0x10e20000,
   1812     0x0000c5ff, 0x18000000, 0x0000cdff, 0x18200000,
   1813     0x0000d5ff, 0x18400000, 0x0000deff, 0x18610000,
   1814     0x0000e6ff, 0x18810000, 0x0000eeff, 0x18a10000,
   1815     0x0000f6ff, 0x18c20000, 0x0000ffff, 0x18e20000,
   1816     0x000400ff, 0x20000000, 0x000408ff, 0x20200000,
   1817     0x000410ff, 0x20400000, 0x000418ff, 0x20610000,
   1818     0x000420ff, 0x20810000, 0x000429ff, 0x20a10000,
   1819     0x000431ff, 0x20c20000, 0x000439ff, 0x20e20000,
   1820     0x000441ff, 0x29000000, 0x00044aff, 0x29200000,
   1821     0x000452ff, 0x29400000, 0x00045aff, 0x29610000,
   1822     0x000462ff, 0x29810000, 0x00046aff, 0x29a10000,
   1823     0x000473ff, 0x29c20000, 0x00047bff, 0x29e20000,
   1824     0x000483ff, 0x31000000, 0x00048bff, 0x31200000,
   1825     0x000494ff, 0x31400000, 0x00049cff, 0x31610000,
   1826     0x0004a4ff, 0x31810000, 0x0004acff, 0x31a10000,
   1827     0x0004b4ff, 0x31c20000, 0x0004bdff, 0x31e20000,
   1828     0x0004c5ff, 0x39000000, 0x0004cdff, 0x39200000,
   1829     0x0004d5ff, 0x39400000, 0x0004deff, 0x39610000,
   1830     0x0004e6ff, 0x39810000, 0x0004eeff, 0x39a10000,
   1831     0x0004f6ff, 0x39c20000, 0x0004ffff, 0x39e20000,
   1832     0x000800ff, 0x41000000, 0x000808ff, 0x41200000,
   1833     0x000810ff, 0x41400000, 0x000818ff, 0x41610000,
   1834     0x000820ff, 0x41810000, 0x000829ff, 0x41a10000,
   1835     0x000831ff, 0x41c20000, 0x000839ff, 0x41e20000,
   1836     0x000841ff, 0x4a000000, 0x00084aff, 0x4a200000,
   1837     0x000852ff, 0x4a400000, 0x00085aff, 0x4a610000,
   1838     0x000862ff, 0x4a810000, 0x00086aff, 0x4aa10000,
   1839     0x000873ff, 0x4ac20000, 0x00087bff, 0x4ae20000,
   1840     0x000883ff, 0x52000000, 0x00088bff, 0x52200000,
   1841     0x000894ff, 0x52400000, 0x00089cff, 0x52610000,
   1842     0x0008a4ff, 0x52810000, 0x0008acff, 0x52a10000,
   1843     0x0008b4ff, 0x52c20000, 0x0008bdff, 0x52e20000,
   1844     0x0008c5ff, 0x5a000000, 0x0008cdff, 0x5a200000,
   1845     0x0008d5ff, 0x5a400000, 0x0008deff, 0x5a610000,
   1846     0x0008e6ff, 0x5a810000, 0x0008eeff, 0x5aa10000,
   1847     0x0008f6ff, 0x5ac20000, 0x0008ffff, 0x5ae20000,
   1848     0x000c00ff, 0x62000000, 0x000c08ff, 0x62200000,
   1849     0x000c10ff, 0x62400000, 0x000c18ff, 0x62610000,
   1850     0x000c20ff, 0x62810000, 0x000c29ff, 0x62a10000,
   1851     0x000c31ff, 0x62c20000, 0x000c39ff, 0x62e20000,
   1852     0x000c41ff, 0x6a000000, 0x000c4aff, 0x6a200000,
   1853     0x000c52ff, 0x6a400000, 0x000c5aff, 0x6a610000,
   1854     0x000c62ff, 0x6a810000, 0x000c6aff, 0x6aa10000,
   1855     0x000c73ff, 0x6ac20000, 0x000c7bff, 0x6ae20000,
   1856     0x000c83ff, 0x73000000, 0x000c8bff, 0x73200000,
   1857     0x000c94ff, 0x73400000, 0x000c9cff, 0x73610000,
   1858     0x000ca4ff, 0x73810000, 0x000cacff, 0x73a10000,
   1859     0x000cb4ff, 0x73c20000, 0x000cbdff, 0x73e20000,
   1860     0x000cc5ff, 0x7b000000, 0x000ccdff, 0x7b200000,
   1861     0x000cd5ff, 0x7b400000, 0x000cdeff, 0x7b610000,
   1862     0x000ce6ff, 0x7b810000, 0x000ceeff, 0x7ba10000,
   1863     0x000cf6ff, 0x7bc20000, 0x000cffff, 0x7be20000,
   1864     0x001000ff, 0x83000000, 0x001008ff, 0x83200000,
   1865     0x001010ff, 0x83400000, 0x001018ff, 0x83610000,
   1866     0x001020ff, 0x83810000, 0x001029ff, 0x83a10000,
   1867     0x001031ff, 0x83c20000, 0x001039ff, 0x83e20000,
   1868     0x001041ff, 0x8b000000, 0x00104aff, 0x8b200000,
   1869     0x001052ff, 0x8b400000, 0x00105aff, 0x8b610000,
   1870     0x001062ff, 0x8b810000, 0x00106aff, 0x8ba10000,
   1871     0x001073ff, 0x8bc20000, 0x00107bff, 0x8be20000,
   1872     0x001083ff, 0x94000000, 0x00108bff, 0x94200000,
   1873     0x001094ff, 0x94400000, 0x00109cff, 0x94610000,
   1874     0x0010a4ff, 0x94810000, 0x0010acff, 0x94a10000,
   1875     0x0010b4ff, 0x94c20000, 0x0010bdff, 0x94e20000,
   1876     0x0010c5ff, 0x9c000000, 0x0010cdff, 0x9c200000,
   1877     0x0010d5ff, 0x9c400000, 0x0010deff, 0x9c610000,
   1878     0x0010e6ff, 0x9c810000, 0x0010eeff, 0x9ca10000,
   1879     0x0010f6ff, 0x9cc20000, 0x0010ffff, 0x9ce20000,
   1880     0x001400ff, 0xa4000000, 0x001408ff, 0xa4200000,
   1881     0x001410ff, 0xa4400000, 0x001418ff, 0xa4610000,
   1882     0x001420ff, 0xa4810000, 0x001429ff, 0xa4a10000,
   1883     0x001431ff, 0xa4c20000, 0x001439ff, 0xa4e20000,
   1884     0x001441ff, 0xac000000, 0x00144aff, 0xac200000,
   1885     0x001452ff, 0xac400000, 0x00145aff, 0xac610000,
   1886     0x001462ff, 0xac810000, 0x00146aff, 0xaca10000,
   1887     0x001473ff, 0xacc20000, 0x00147bff, 0xace20000,
   1888     0x001483ff, 0xb4000000, 0x00148bff, 0xb4200000,
   1889     0x001494ff, 0xb4400000, 0x00149cff, 0xb4610000,
   1890     0x0014a4ff, 0xb4810000, 0x0014acff, 0xb4a10000,
   1891     0x0014b4ff, 0xb4c20000, 0x0014bdff, 0xb4e20000,
   1892     0x0014c5ff, 0xbd000000, 0x0014cdff, 0xbd200000,
   1893     0x0014d5ff, 0xbd400000, 0x0014deff, 0xbd610000,
   1894     0x0014e6ff, 0xbd810000, 0x0014eeff, 0xbda10000,
   1895     0x0014f6ff, 0xbdc20000, 0x0014ffff, 0xbde20000,
   1896     0x001800ff, 0xc5000000, 0x001808ff, 0xc5200000,
   1897     0x001810ff, 0xc5400000, 0x001818ff, 0xc5610000,
   1898     0x001820ff, 0xc5810000, 0x001829ff, 0xc5a10000,
   1899     0x001831ff, 0xc5c20000, 0x001839ff, 0xc5e20000,
   1900     0x001841ff, 0xcd000000, 0x00184aff, 0xcd200000,
   1901     0x001852ff, 0xcd400000, 0x00185aff, 0xcd610000,
   1902     0x001862ff, 0xcd810000, 0x00186aff, 0xcda10000,
   1903     0x001873ff, 0xcdc20000, 0x00187bff, 0xcde20000,
   1904     0x001883ff, 0xd5000000, 0x00188bff, 0xd5200000,
   1905     0x001894ff, 0xd5400000, 0x00189cff, 0xd5610000,
   1906     0x0018a4ff, 0xd5810000, 0x0018acff, 0xd5a10000,
   1907     0x0018b4ff, 0xd5c20000, 0x0018bdff, 0xd5e20000,
   1908     0x0018c5ff, 0xde000000, 0x0018cdff, 0xde200000,
   1909     0x0018d5ff, 0xde400000, 0x0018deff, 0xde610000,
   1910     0x0018e6ff, 0xde810000, 0x0018eeff, 0xdea10000,
   1911     0x0018f6ff, 0xdec20000, 0x0018ffff, 0xdee20000,
   1912     0x001c00ff, 0xe6000000, 0x001c08ff, 0xe6200000,
   1913     0x001c10ff, 0xe6400000, 0x001c18ff, 0xe6610000,
   1914     0x001c20ff, 0xe6810000, 0x001c29ff, 0xe6a10000,
   1915     0x001c31ff, 0xe6c20000, 0x001c39ff, 0xe6e20000,
   1916     0x001c41ff, 0xee000000, 0x001c4aff, 0xee200000,
   1917     0x001c52ff, 0xee400000, 0x001c5aff, 0xee610000,
   1918     0x001c62ff, 0xee810000, 0x001c6aff, 0xeea10000,
   1919     0x001c73ff, 0xeec20000, 0x001c7bff, 0xeee20000,
   1920     0x001c83ff, 0xf6000000, 0x001c8bff, 0xf6200000,
   1921     0x001c94ff, 0xf6400000, 0x001c9cff, 0xf6610000,
   1922     0x001ca4ff, 0xf6810000, 0x001cacff, 0xf6a10000,
   1923     0x001cb4ff, 0xf6c20000, 0x001cbdff, 0xf6e20000,
   1924     0x001cc5ff, 0xff000000, 0x001ccdff, 0xff200000,
   1925     0x001cd5ff, 0xff400000, 0x001cdeff, 0xff610000,
   1926     0x001ce6ff, 0xff810000, 0x001ceeff, 0xffa10000,
   1927     0x001cf6ff, 0xffc20000, 0x001cffff, 0xffe20000,
   1928 };
   1929 
   1930 static void
   1931 Blit_RGB565_RGBA8888(SDL_BlitInfo * info)
   1932 {
   1933     Blit_RGB565_32(info, RGB565_RGBA8888_LUT);
   1934 }
   1935 
   1936 /* Special optimized blit for RGB 5-6-5 --> BGRA 8-8-8-8 */
   1937 static const Uint32 RGB565_BGRA8888_LUT[512] = {
   1938     0x00000000, 0x000000ff, 0x08000000, 0x002000ff,
   1939     0x10000000, 0x004000ff, 0x18000000, 0x006100ff,
   1940     0x20000000, 0x008100ff, 0x29000000, 0x00a100ff,
   1941     0x31000000, 0x00c200ff, 0x39000000, 0x00e200ff,
   1942     0x41000000, 0x000008ff, 0x4a000000, 0x002008ff,
   1943     0x52000000, 0x004008ff, 0x5a000000, 0x006108ff,
   1944     0x62000000, 0x008108ff, 0x6a000000, 0x00a108ff,
   1945     0x73000000, 0x00c208ff, 0x7b000000, 0x00e208ff,
   1946     0x83000000, 0x000010ff, 0x8b000000, 0x002010ff,
   1947     0x94000000, 0x004010ff, 0x9c000000, 0x006110ff,
   1948     0xa4000000, 0x008110ff, 0xac000000, 0x00a110ff,
   1949     0xb4000000, 0x00c210ff, 0xbd000000, 0x00e210ff,
   1950     0xc5000000, 0x000018ff, 0xcd000000, 0x002018ff,
   1951     0xd5000000, 0x004018ff, 0xde000000, 0x006118ff,
   1952     0xe6000000, 0x008118ff, 0xee000000, 0x00a118ff,
   1953     0xf6000000, 0x00c218ff, 0xff000000, 0x00e218ff,
   1954     0x00040000, 0x000020ff, 0x08040000, 0x002020ff,
   1955     0x10040000, 0x004020ff, 0x18040000, 0x006120ff,
   1956     0x20040000, 0x008120ff, 0x29040000, 0x00a120ff,
   1957     0x31040000, 0x00c220ff, 0x39040000, 0x00e220ff,
   1958     0x41040000, 0x000029ff, 0x4a040000, 0x002029ff,
   1959     0x52040000, 0x004029ff, 0x5a040000, 0x006129ff,
   1960     0x62040000, 0x008129ff, 0x6a040000, 0x00a129ff,
   1961     0x73040000, 0x00c229ff, 0x7b040000, 0x00e229ff,
   1962     0x83040000, 0x000031ff, 0x8b040000, 0x002031ff,
   1963     0x94040000, 0x004031ff, 0x9c040000, 0x006131ff,
   1964     0xa4040000, 0x008131ff, 0xac040000, 0x00a131ff,
   1965     0xb4040000, 0x00c231ff, 0xbd040000, 0x00e231ff,
   1966     0xc5040000, 0x000039ff, 0xcd040000, 0x002039ff,
   1967     0xd5040000, 0x004039ff, 0xde040000, 0x006139ff,
   1968     0xe6040000, 0x008139ff, 0xee040000, 0x00a139ff,
   1969     0xf6040000, 0x00c239ff, 0xff040000, 0x00e239ff,
   1970     0x00080000, 0x000041ff, 0x08080000, 0x002041ff,
   1971     0x10080000, 0x004041ff, 0x18080000, 0x006141ff,
   1972     0x20080000, 0x008141ff, 0x29080000, 0x00a141ff,
   1973     0x31080000, 0x00c241ff, 0x39080000, 0x00e241ff,
   1974     0x41080000, 0x00004aff, 0x4a080000, 0x00204aff,
   1975     0x52080000, 0x00404aff, 0x5a080000, 0x00614aff,
   1976     0x62080000, 0x00814aff, 0x6a080000, 0x00a14aff,
   1977     0x73080000, 0x00c24aff, 0x7b080000, 0x00e24aff,
   1978     0x83080000, 0x000052ff, 0x8b080000, 0x002052ff,
   1979     0x94080000, 0x004052ff, 0x9c080000, 0x006152ff,
   1980     0xa4080000, 0x008152ff, 0xac080000, 0x00a152ff,
   1981     0xb4080000, 0x00c252ff, 0xbd080000, 0x00e252ff,
   1982     0xc5080000, 0x00005aff, 0xcd080000, 0x00205aff,
   1983     0xd5080000, 0x00405aff, 0xde080000, 0x00615aff,
   1984     0xe6080000, 0x00815aff, 0xee080000, 0x00a15aff,
   1985     0xf6080000, 0x00c25aff, 0xff080000, 0x00e25aff,
   1986     0x000c0000, 0x000062ff, 0x080c0000, 0x002062ff,
   1987     0x100c0000, 0x004062ff, 0x180c0000, 0x006162ff,
   1988     0x200c0000, 0x008162ff, 0x290c0000, 0x00a162ff,
   1989     0x310c0000, 0x00c262ff, 0x390c0000, 0x00e262ff,
   1990     0x410c0000, 0x00006aff, 0x4a0c0000, 0x00206aff,
   1991     0x520c0000, 0x00406aff, 0x5a0c0000, 0x00616aff,
   1992     0x620c0000, 0x00816aff, 0x6a0c0000, 0x00a16aff,
   1993     0x730c0000, 0x00c26aff, 0x7b0c0000, 0x00e26aff,
   1994     0x830c0000, 0x000073ff, 0x8b0c0000, 0x002073ff,
   1995     0x940c0000, 0x004073ff, 0x9c0c0000, 0x006173ff,
   1996     0xa40c0000, 0x008173ff, 0xac0c0000, 0x00a173ff,
   1997     0xb40c0000, 0x00c273ff, 0xbd0c0000, 0x00e273ff,
   1998     0xc50c0000, 0x00007bff, 0xcd0c0000, 0x00207bff,
   1999     0xd50c0000, 0x00407bff, 0xde0c0000, 0x00617bff,
   2000     0xe60c0000, 0x00817bff, 0xee0c0000, 0x00a17bff,
   2001     0xf60c0000, 0x00c27bff, 0xff0c0000, 0x00e27bff,
   2002     0x00100000, 0x000083ff, 0x08100000, 0x002083ff,
   2003     0x10100000, 0x004083ff, 0x18100000, 0x006183ff,
   2004     0x20100000, 0x008183ff, 0x29100000, 0x00a183ff,
   2005     0x31100000, 0x00c283ff, 0x39100000, 0x00e283ff,
   2006     0x41100000, 0x00008bff, 0x4a100000, 0x00208bff,
   2007     0x52100000, 0x00408bff, 0x5a100000, 0x00618bff,
   2008     0x62100000, 0x00818bff, 0x6a100000, 0x00a18bff,
   2009     0x73100000, 0x00c28bff, 0x7b100000, 0x00e28bff,
   2010     0x83100000, 0x000094ff, 0x8b100000, 0x002094ff,
   2011     0x94100000, 0x004094ff, 0x9c100000, 0x006194ff,
   2012     0xa4100000, 0x008194ff, 0xac100000, 0x00a194ff,
   2013     0xb4100000, 0x00c294ff, 0xbd100000, 0x00e294ff,
   2014     0xc5100000, 0x00009cff, 0xcd100000, 0x00209cff,
   2015     0xd5100000, 0x00409cff, 0xde100000, 0x00619cff,
   2016     0xe6100000, 0x00819cff, 0xee100000, 0x00a19cff,
   2017     0xf6100000, 0x00c29cff, 0xff100000, 0x00e29cff,
   2018     0x00140000, 0x0000a4ff, 0x08140000, 0x0020a4ff,
   2019     0x10140000, 0x0040a4ff, 0x18140000, 0x0061a4ff,
   2020     0x20140000, 0x0081a4ff, 0x29140000, 0x00a1a4ff,
   2021     0x31140000, 0x00c2a4ff, 0x39140000, 0x00e2a4ff,
   2022     0x41140000, 0x0000acff, 0x4a140000, 0x0020acff,
   2023     0x52140000, 0x0040acff, 0x5a140000, 0x0061acff,
   2024     0x62140000, 0x0081acff, 0x6a140000, 0x00a1acff,
   2025     0x73140000, 0x00c2acff, 0x7b140000, 0x00e2acff,
   2026     0x83140000, 0x0000b4ff, 0x8b140000, 0x0020b4ff,
   2027     0x94140000, 0x0040b4ff, 0x9c140000, 0x0061b4ff,
   2028     0xa4140000, 0x0081b4ff, 0xac140000, 0x00a1b4ff,
   2029     0xb4140000, 0x00c2b4ff, 0xbd140000, 0x00e2b4ff,
   2030     0xc5140000, 0x0000bdff, 0xcd140000, 0x0020bdff,
   2031     0xd5140000, 0x0040bdff, 0xde140000, 0x0061bdff,
   2032     0xe6140000, 0x0081bdff, 0xee140000, 0x00a1bdff,
   2033     0xf6140000, 0x00c2bdff, 0xff140000, 0x00e2bdff,
   2034     0x00180000, 0x0000c5ff, 0x08180000, 0x0020c5ff,
   2035     0x10180000, 0x0040c5ff, 0x18180000, 0x0061c5ff,
   2036     0x20180000, 0x0081c5ff, 0x29180000, 0x00a1c5ff,
   2037     0x31180000, 0x00c2c5ff, 0x39180000, 0x00e2c5ff,
   2038     0x41180000, 0x0000cdff, 0x4a180000, 0x0020cdff,
   2039     0x52180000, 0x0040cdff, 0x5a180000, 0x0061cdff,
   2040     0x62180000, 0x0081cdff, 0x6a180000, 0x00a1cdff,
   2041     0x73180000, 0x00c2cdff, 0x7b180000, 0x00e2cdff,
   2042     0x83180000, 0x0000d5ff, 0x8b180000, 0x0020d5ff,
   2043     0x94180000, 0x0040d5ff, 0x9c180000, 0x0061d5ff,
   2044     0xa4180000, 0x0081d5ff, 0xac180000, 0x00a1d5ff,
   2045     0xb4180000, 0x00c2d5ff, 0xbd180000, 0x00e2d5ff,
   2046     0xc5180000, 0x0000deff, 0xcd180000, 0x0020deff,
   2047     0xd5180000, 0x0040deff, 0xde180000, 0x0061deff,
   2048     0xe6180000, 0x0081deff, 0xee180000, 0x00a1deff,
   2049     0xf6180000, 0x00c2deff, 0xff180000, 0x00e2deff,
   2050     0x001c0000, 0x0000e6ff, 0x081c0000, 0x0020e6ff,
   2051     0x101c0000, 0x0040e6ff, 0x181c0000, 0x0061e6ff,
   2052     0x201c0000, 0x0081e6ff, 0x291c0000, 0x00a1e6ff,
   2053     0x311c0000, 0x00c2e6ff, 0x391c0000, 0x00e2e6ff,
   2054     0x411c0000, 0x0000eeff, 0x4a1c0000, 0x0020eeff,
   2055     0x521c0000, 0x0040eeff, 0x5a1c0000, 0x0061eeff,
   2056     0x621c0000, 0x0081eeff, 0x6a1c0000, 0x00a1eeff,
   2057     0x731c0000, 0x00c2eeff, 0x7b1c0000, 0x00e2eeff,
   2058     0x831c0000, 0x0000f6ff, 0x8b1c0000, 0x0020f6ff,
   2059     0x941c0000, 0x0040f6ff, 0x9c1c0000, 0x0061f6ff,
   2060     0xa41c0000, 0x0081f6ff, 0xac1c0000, 0x00a1f6ff,
   2061     0xb41c0000, 0x00c2f6ff, 0xbd1c0000, 0x00e2f6ff,
   2062     0xc51c0000, 0x0000ffff, 0xcd1c0000, 0x0020ffff,
   2063     0xd51c0000, 0x0040ffff, 0xde1c0000, 0x0061ffff,
   2064     0xe61c0000, 0x0081ffff, 0xee1c0000, 0x00a1ffff,
   2065     0xf61c0000, 0x00c2ffff, 0xff1c0000, 0x00e2ffff
   2066 };
   2067 
   2068 static void
   2069 Blit_RGB565_BGRA8888(SDL_BlitInfo * info)
   2070 {
   2071     Blit_RGB565_32(info, RGB565_BGRA8888_LUT);
   2072 }
   2073 
   2074 #endif /* SDL_HAVE_BLIT_N_RGB565 */
   2075 
   2076 static void
   2077 BlitNto1(SDL_BlitInfo * info)
   2078 {
   2079 #ifndef USE_DUFFS_LOOP
   2080     int c;
   2081 #endif
   2082     int width, height;
   2083     Uint8 *src;
   2084     const Uint8 *map;
   2085     Uint8 *dst;
   2086     int srcskip, dstskip;
   2087     int srcbpp;
   2088     Uint32 Pixel;
   2089     int sR, sG, sB;
   2090     SDL_PixelFormat *srcfmt;
   2091 
   2092     /* Set up some basic variables */
   2093     width = info->dst_w;
   2094     height = info->dst_h;
   2095     src = info->src;
   2096     srcskip = info->src_skip;
   2097     dst = info->dst;
   2098     dstskip = info->dst_skip;
   2099     map = info->table;
   2100     srcfmt = info->src_fmt;
   2101     srcbpp = srcfmt->BytesPerPixel;
   2102 
   2103     if (map == NULL) {
   2104         while (height--) {
   2105 #ifdef USE_DUFFS_LOOP
   2106             /* *INDENT-OFF* */
   2107             DUFFS_LOOP(
   2108                 DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
   2109                                 sR, sG, sB);
   2110                 if ( 1 ) {
   2111                     /* Pack RGB into 8bit pixel */
   2112                     *dst = ((sR>>5)<<(3+2))|
   2113                             ((sG>>5)<<(2)) |
   2114                             ((sB>>6)<<(0)) ;
   2115                 }
   2116                 dst++;
   2117                 src += srcbpp;
   2118             , width);
   2119             /* *INDENT-ON* */
   2120 #else
   2121             for (c = width; c; --c) {
   2122                 DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel, sR, sG, sB);
   2123                 if (1) {
   2124                     /* Pack RGB into 8bit pixel */
   2125                     *dst = ((sR >> 5) << (3 + 2)) |
   2126                         ((sG >> 5) << (2)) | ((sB >> 6) << (0));
   2127                 }
   2128                 dst++;
   2129                 src += srcbpp;
   2130             }
   2131 #endif
   2132             src += srcskip;
   2133             dst += dstskip;
   2134         }
   2135     } else {
   2136         while (height--) {
   2137 #ifdef USE_DUFFS_LOOP
   2138             /* *INDENT-OFF* */
   2139             DUFFS_LOOP(
   2140                 DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
   2141                                 sR, sG, sB);
   2142                 if ( 1 ) {
   2143                     /* Pack RGB into 8bit pixel */
   2144                     *dst = map[((sR>>5)<<(3+2))|
   2145                            ((sG>>5)<<(2))  |
   2146                            ((sB>>6)<<(0))  ];
   2147                 }
   2148                 dst++;
   2149                 src += srcbpp;
   2150             , width);
   2151             /* *INDENT-ON* */
   2152 #else
   2153             for (c = width; c; --c) {
   2154                 DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel, sR, sG, sB);
   2155                 if (1) {
   2156                     /* Pack RGB into 8bit pixel */
   2157                     *dst = map[((sR >> 5) << (3 + 2)) |
   2158                                ((sG >> 5) << (2)) | ((sB >> 6) << (0))];
   2159                 }
   2160                 dst++;
   2161                 src += srcbpp;
   2162             }
   2163 #endif /* USE_DUFFS_LOOP */
   2164             src += srcskip;
   2165             dst += dstskip;
   2166         }
   2167     }
   2168 }
   2169 
   2170 /* blits 32 bit RGB<->RGBA with both surfaces having the same R,G,B fields */
   2171 static void
   2172 Blit4to4MaskAlpha(SDL_BlitInfo * info)
   2173 {
   2174     int width = info->dst_w;
   2175     int height = info->dst_h;
   2176     Uint32 *src = (Uint32 *) info->src;
   2177     int srcskip = info->src_skip;
   2178     Uint32 *dst = (Uint32 *) info->dst;
   2179     int dstskip = info->dst_skip;
   2180     SDL_PixelFormat *srcfmt = info->src_fmt;
   2181     SDL_PixelFormat *dstfmt = info->dst_fmt;
   2182 
   2183     if (dstfmt->Amask) {
   2184         /* RGB->RGBA, SET_ALPHA */
   2185         Uint32 mask = ((Uint32)info->a >> dstfmt->Aloss) << dstfmt->Ashift;
   2186 
   2187         while (height--) {
   2188             /* *INDENT-OFF* */
   2189             DUFFS_LOOP(
   2190             {
   2191                 *dst = *src | mask;
   2192                 ++dst;
   2193                 ++src;
   2194             },
   2195             width);
   2196             /* *INDENT-ON* */
   2197             src = (Uint32 *) ((Uint8 *) src + srcskip);
   2198             dst = (Uint32 *) ((Uint8 *) dst + dstskip);
   2199         }
   2200     } else {
   2201         /* RGBA->RGB, NO_ALPHA */
   2202         Uint32 mask = srcfmt->Rmask | srcfmt->Gmask | srcfmt->Bmask;
   2203 
   2204         while (height--) {
   2205             /* *INDENT-OFF* */
   2206             DUFFS_LOOP(
   2207             {
   2208                 *dst = *src & mask;
   2209                 ++dst;
   2210                 ++src;
   2211             },
   2212             width);
   2213             /* *INDENT-ON* */
   2214             src = (Uint32 *) ((Uint8 *) src + srcskip);
   2215             dst = (Uint32 *) ((Uint8 *) dst + dstskip);
   2216         }
   2217     }
   2218 }
   2219 
   2220 /* blits 32 bit RGBA<->RGBA with both surfaces having the same R,G,B,A fields */
   2221 static void
   2222 Blit4to4CopyAlpha(SDL_BlitInfo * info)
   2223 {
   2224     int width = info->dst_w;
   2225     int height = info->dst_h;
   2226     Uint32 *src = (Uint32 *) info->src;
   2227     int srcskip = info->src_skip;
   2228     Uint32 *dst = (Uint32 *) info->dst;
   2229     int dstskip = info->dst_skip;
   2230 
   2231     /* RGBA->RGBA, COPY_ALPHA */
   2232     while (height--) {
   2233         /* *INDENT-OFF* */
   2234         DUFFS_LOOP(
   2235         {
   2236             *dst = *src;
   2237             ++dst;
   2238             ++src;
   2239         },
   2240         width);
   2241         /* *INDENT-ON* */
   2242         src = (Uint32 *) ((Uint8 *) src + srcskip);
   2243         dst = (Uint32 *) ((Uint8 *) dst + dstskip);
   2244     }
   2245 }
   2246 
   2247 /* permutation for mapping srcfmt to dstfmt, overloading or not the alpha channel */
   2248 static void
   2249 get_permutation(SDL_PixelFormat *srcfmt, SDL_PixelFormat *dstfmt,
   2250         int *_p0 , int *_p1, int *_p2, int *_p3, int *_alpha_channel)
   2251 {
   2252     int alpha_channel = 0, p0, p1, p2, p3;
   2253 #if SDL_BYTEORDER == SDL_LIL_ENDIAN
   2254     int Pixel = 0x04030201; /* identity permutation */
   2255 #else
   2256     int Pixel = 0x01020304; /* identity permutation */
   2257     int srcbpp = srcfmt->BytesPerPixel;
   2258     int dstbpp = dstfmt->BytesPerPixel;
   2259 #endif
   2260 
   2261     if (srcfmt->Amask) {
   2262         RGBA_FROM_PIXEL(Pixel, srcfmt, p0, p1, p2, p3);
   2263     } else {
   2264         RGB_FROM_PIXEL(Pixel, srcfmt, p0, p1, p2);
   2265         p3 = 0;
   2266     }
   2267 
   2268     if (dstfmt->Amask) {
   2269         if (srcfmt->Amask) {
   2270             PIXEL_FROM_RGBA(Pixel, dstfmt, p0, p1, p2, p3);
   2271         } else {
   2272             PIXEL_FROM_RGBA(Pixel, dstfmt, p0, p1, p2, 0);
   2273         }
   2274     } else {
   2275         PIXEL_FROM_RGB(Pixel, dstfmt, p0, p1, p2);
   2276     }
   2277 
   2278 #if SDL_BYTEORDER == SDL_LIL_ENDIAN
   2279     p0 = Pixel & 0xFF;
   2280     p1 = (Pixel >> 8) & 0xFF;
   2281     p2 = (Pixel >> 16) & 0xFF;
   2282     p3 = (Pixel >> 24) & 0xFF;
   2283 #else
   2284     p3 = Pixel & 0xFF;
   2285     p2 = (Pixel >> 8) & 0xFF;
   2286     p1 = (Pixel >> 16) & 0xFF;
   2287     p0 = (Pixel >> 24) & 0xFF;
   2288 #endif
   2289 
   2290     if (p0 == 0) {
   2291         p0 = 1;
   2292         alpha_channel = 0;
   2293     } else if (p1 == 0) {
   2294         p1 = 1;
   2295         alpha_channel = 1;
   2296     } else if (p2 == 0) {
   2297         p2 = 1;
   2298         alpha_channel = 2;
   2299     } else if (p3 == 0) {
   2300         p3 = 1;
   2301         alpha_channel = 3;
   2302     }
   2303 
   2304 #if SDL_BYTEORDER == SDL_LIL_ENDIAN
   2305 #else
   2306     if (srcbpp == 3 && dstbpp == 4) {
   2307         if (p0 != 1) p0--;
   2308         if (p1 != 1) p1--;
   2309         if (p2 != 1) p2--;
   2310         if (p3 != 1) p3--;
   2311     } else if (srcbpp == 4 && dstbpp == 3) {
   2312         p0 = p1;
   2313         p1 = p2;
   2314         p2 = p3;
   2315     }
   2316 #endif
   2317     *_p0 = p0 - 1;
   2318     *_p1 = p1 - 1;
   2319     *_p2 = p2 - 1;
   2320     *_p3 = p3 - 1;
   2321 
   2322     if (_alpha_channel) {
   2323         *_alpha_channel = alpha_channel;
   2324     }
   2325 }
   2326 
   2327 
   2328 static void
   2329 BlitNtoN(SDL_BlitInfo * info)
   2330 {
   2331     int width = info->dst_w;
   2332     int height = info->dst_h;
   2333     Uint8 *src = info->src;
   2334     int srcskip = info->src_skip;
   2335     Uint8 *dst = info->dst;
   2336     int dstskip = info->dst_skip;
   2337     SDL_PixelFormat *srcfmt = info->src_fmt;
   2338     int srcbpp = srcfmt->BytesPerPixel;
   2339     SDL_PixelFormat *dstfmt = info->dst_fmt;
   2340     int dstbpp = dstfmt->BytesPerPixel;
   2341     unsigned alpha = dstfmt->Amask ? info->a : 0;
   2342 
   2343 #if HAVE_FAST_WRITE_INT8
   2344     /* Blit with permutation: 4->4 */
   2345     if (srcbpp == 4 && dstbpp == 4 &&
   2346         srcfmt->format != SDL_PIXELFORMAT_ARGB2101010 &&
   2347         dstfmt->format != SDL_PIXELFORMAT_ARGB2101010) {
   2348 
   2349         /* Find the appropriate permutation */
   2350         int alpha_channel, p0, p1, p2, p3;
   2351         get_permutation(srcfmt, dstfmt, &p0, &p1, &p2, &p3, &alpha_channel);
   2352 
   2353         while (height--) {
   2354             /* *INDENT-OFF* */
   2355             DUFFS_LOOP(
   2356             {
   2357                 dst[0] = src[p0];
   2358                 dst[1] = src[p1];
   2359                 dst[2] = src[p2];
   2360                 dst[3] = src[p3];
   2361                 dst[alpha_channel] = alpha;
   2362                 src += 4;
   2363                 dst += 4;
   2364             }, width);
   2365             /* *INDENT-ON* */
   2366             src += srcskip;
   2367             dst += dstskip;
   2368         }
   2369         return;
   2370     }
   2371 #endif
   2372 
   2373     /* Blit with permutation: 4->3 */
   2374     if (srcbpp == 4 && dstbpp == 3 &&
   2375         srcfmt->format != SDL_PIXELFORMAT_ARGB2101010) {
   2376 
   2377         /* Find the appropriate permutation */
   2378         int p0, p1, p2, p3;
   2379         get_permutation(srcfmt, dstfmt, &p0, &p1, &p2, &p3, NULL);
   2380 
   2381         while (height--) {
   2382             /* *INDENT-OFF* */
   2383             DUFFS_LOOP(
   2384             {
   2385                 dst[0] = src[p0];
   2386                 dst[1] = src[p1];
   2387                 dst[2] = src[p2];
   2388                 src += 4;
   2389                 dst += 3;
   2390             }, width);
   2391             /* *INDENT-ON* */
   2392             src += srcskip;
   2393             dst += dstskip;
   2394         }
   2395         return;
   2396     }
   2397 
   2398 #if HAVE_FAST_WRITE_INT8
   2399     /* Blit with permutation: 3->4 */
   2400     if (srcbpp == 3 && dstbpp == 4 &&
   2401         dstfmt->format != SDL_PIXELFORMAT_ARGB2101010) {
   2402 
   2403         /* Find the appropriate permutation */
   2404         int alpha_channel, p0, p1, p2, p3;
   2405         get_permutation(srcfmt, dstfmt, &p0, &p1, &p2, &p3, &alpha_channel);
   2406 
   2407         while (height--) {
   2408             /* *INDENT-OFF* */
   2409             DUFFS_LOOP(
   2410             {
   2411                 dst[0] = src[p0];
   2412                 dst[1] = src[p1];
   2413                 dst[2] = src[p2];
   2414                 dst[3] = src[p3];
   2415                 dst[alpha_channel] = alpha;
   2416                 src += 3;
   2417                 dst += 4;
   2418             }, width);
   2419             /* *INDENT-ON* */
   2420             src += srcskip;
   2421             dst += dstskip;
   2422         }
   2423         return;
   2424     }
   2425 #endif
   2426 
   2427     while (height--) {
   2428         /* *INDENT-OFF* */
   2429         DUFFS_LOOP(
   2430         {
   2431             Uint32 Pixel;
   2432             unsigned sR;
   2433             unsigned sG;
   2434             unsigned sB;
   2435             DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel, sR, sG, sB);
   2436             ASSEMBLE_RGBA(dst, dstbpp, dstfmt, sR, sG, sB, alpha);
   2437             dst += dstbpp;
   2438             src += srcbpp;
   2439         },
   2440         width);
   2441         /* *INDENT-ON* */
   2442         src += srcskip;
   2443         dst += dstskip;
   2444     }
   2445 }
   2446 
   2447 static void
   2448 BlitNtoNCopyAlpha(SDL_BlitInfo * info)
   2449 {
   2450     int width = info->dst_w;
   2451     int height = info->dst_h;
   2452     Uint8 *src = info->src;
   2453     int srcskip = info->src_skip;
   2454     Uint8 *dst = info->dst;
   2455     int dstskip = info->dst_skip;
   2456     SDL_PixelFormat *srcfmt = info->src_fmt;
   2457     int srcbpp = srcfmt->BytesPerPixel;
   2458     SDL_PixelFormat *dstfmt = info->dst_fmt;
   2459     int dstbpp = dstfmt->BytesPerPixel;
   2460     int c;
   2461 
   2462 #if HAVE_FAST_WRITE_INT8
   2463     /* Blit with permutation: 4->4 */
   2464     if (srcbpp == 4 && dstbpp == 4 &&
   2465         srcfmt->format != SDL_PIXELFORMAT_ARGB2101010 &&
   2466         dstfmt->format != SDL_PIXELFORMAT_ARGB2101010) {
   2467 
   2468         /* Find the appropriate permutation */
   2469         int p0, p1, p2, p3;
   2470         get_permutation(srcfmt, dstfmt, &p0, &p1, &p2, &p3, NULL);
   2471 
   2472         while (height--) {
   2473             /* *INDENT-OFF* */
   2474             DUFFS_LOOP(
   2475             {
   2476                 dst[0] = src[p0];
   2477                 dst[1] = src[p1];
   2478                 dst[2] = src[p2];
   2479                 dst[3] = src[p3];
   2480                 src += 4;
   2481                 dst += 4;
   2482             }, width);
   2483             /* *INDENT-ON* */
   2484             src += srcskip;
   2485             dst += dstskip;
   2486         }
   2487         return;
   2488     }
   2489 #endif
   2490 
   2491     while (height--) {
   2492         for (c = width; c; --c) {
   2493             Uint32 Pixel;
   2494             unsigned sR, sG, sB, sA;
   2495             DISEMBLE_RGBA(src, srcbpp, srcfmt, Pixel, sR, sG, sB, sA);
   2496             ASSEMBLE_RGBA(dst, dstbpp, dstfmt, sR, sG, sB, sA);
   2497             dst += dstbpp;
   2498             src += srcbpp;
   2499         }
   2500         src += srcskip;
   2501         dst += dstskip;
   2502     }
   2503 }
   2504 
   2505 static void
   2506 BlitNto1Key(SDL_BlitInfo * info)
   2507 {
   2508     int width = info->dst_w;
   2509     int height = info->dst_h;
   2510     Uint8 *src = info->src;
   2511     int srcskip = info->src_skip;
   2512     Uint8 *dst = info->dst;
   2513     int dstskip = info->dst_skip;
   2514     SDL_PixelFormat *srcfmt = info->src_fmt;
   2515     const Uint8 *palmap = info->table;
   2516     Uint32 ckey = info->colorkey;
   2517     Uint32 rgbmask = ~srcfmt->Amask;
   2518     int srcbpp;
   2519     Uint32 Pixel;
   2520     unsigned sR, sG, sB;
   2521 
   2522     /* Set up some basic variables */
   2523     srcbpp = srcfmt->BytesPerPixel;
   2524     ckey &= rgbmask;
   2525 
   2526     if (palmap == NULL) {
   2527         while (height--) {
   2528             /* *INDENT-OFF* */
   2529             DUFFS_LOOP(
   2530             {
   2531                 DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
   2532                                 sR, sG, sB);
   2533                 if ( (Pixel & rgbmask) != ckey ) {
   2534                     /* Pack RGB into 8bit pixel */
   2535                     *dst = (Uint8)(((sR>>5)<<(3+2))|
   2536                                    ((sG>>5)<<(2)) |
   2537                                    ((sB>>6)<<(0)));
   2538                 }
   2539                 dst++;
   2540                 src += srcbpp;
   2541             },
   2542             width);
   2543             /* *INDENT-ON* */
   2544             src += srcskip;
   2545             dst += dstskip;
   2546         }
   2547     } else {
   2548         while (height--) {
   2549             /* *INDENT-OFF* */
   2550             DUFFS_LOOP(
   2551             {
   2552                 DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
   2553                                 sR, sG, sB);
   2554                 if ( (Pixel & rgbmask) != ckey ) {
   2555                     /* Pack RGB into 8bit pixel */
   2556                     *dst = (Uint8)palmap[((sR>>5)<<(3+2))|
   2557                                          ((sG>>5)<<(2))  |
   2558                                          ((sB>>6)<<(0))  ];
   2559                 }
   2560                 dst++;
   2561                 src += srcbpp;
   2562             },
   2563             width);
   2564             /* *INDENT-ON* */
   2565             src += srcskip;
   2566             dst += dstskip;
   2567         }
   2568     }
   2569 }
   2570 
   2571 static void
   2572 Blit2to2Key(SDL_BlitInfo * info)
   2573 {
   2574     int width = info->dst_w;
   2575     int height = info->dst_h;
   2576     Uint16 *srcp = (Uint16 *) info->src;
   2577     int srcskip = info->src_skip;
   2578     Uint16 *dstp = (Uint16 *) info->dst;
   2579     int dstskip = info->dst_skip;
   2580     Uint32 ckey = info->colorkey;
   2581     Uint32 rgbmask = ~info->src_fmt->Amask;
   2582 
   2583     /* Set up some basic variables */
   2584     srcskip /= 2;
   2585     dstskip /= 2;
   2586     ckey &= rgbmask;
   2587 
   2588     while (height--) {
   2589         /* *INDENT-OFF* */
   2590         DUFFS_LOOP(
   2591         {
   2592             if ( (*srcp & rgbmask) != ckey ) {
   2593                 *dstp = *srcp;
   2594             }
   2595             dstp++;
   2596             srcp++;
   2597         },
   2598         width);
   2599         /* *INDENT-ON* */
   2600         srcp += srcskip;
   2601         dstp += dstskip;
   2602     }
   2603 }
   2604 
   2605 static void
   2606 BlitNtoNKey(SDL_BlitInfo * info)
   2607 {
   2608     int width = info->dst_w;
   2609     int height = info->dst_h;
   2610     Uint8 *src = info->src;
   2611     int srcskip = info->src_skip;
   2612     Uint8 *dst = info->dst;
   2613     int dstskip = info->dst_skip;
   2614     Uint32 ckey = info->colorkey;
   2615     SDL_PixelFormat *srcfmt = info->src_fmt;
   2616     SDL_PixelFormat *dstfmt = info->dst_fmt;
   2617     int srcbpp = srcfmt->BytesPerPixel;
   2618     int dstbpp = dstfmt->BytesPerPixel;
   2619     unsigned alpha = dstfmt->Amask ? info->a : 0;
   2620     Uint32 rgbmask = ~srcfmt->Amask;
   2621     int sfmt = srcfmt->format;
   2622     int dfmt = dstfmt->format;
   2623 
   2624     /* Set up some basic variables */
   2625     ckey &= rgbmask;
   2626 
   2627     /* BPP 4, same rgb */
   2628     if (srcbpp == 4 && dstbpp == 4 && srcfmt->Rmask == dstfmt->Rmask && srcfmt->Gmask == dstfmt->Gmask && srcfmt->Bmask == dstfmt->Bmask) {
   2629         Uint32 *src32 = (Uint32*)src;
   2630         Uint32 *dst32 = (Uint32*)dst;
   2631 
   2632         if (dstfmt->Amask) {
   2633             /* RGB->RGBA, SET_ALPHA */
   2634             Uint32 mask = ((Uint32)info->a) << dstfmt->Ashift;
   2635             while (height--) {
   2636                 /* *INDENT-OFF* */
   2637                 DUFFS_LOOP(
   2638                 {
   2639                     if ((*src32 & rgbmask) != ckey) {
   2640                         *dst32 = *src32 | mask;
   2641                     }
   2642                     ++dst32;
   2643                     ++src32;
   2644                 }, width);
   2645                 /* *INDENT-ON* */
   2646                 src32 = (Uint32 *) ((Uint8 *) src32 + srcskip);
   2647                 dst32 = (Uint32 *) ((Uint8 *) dst32 + dstskip);
   2648             }
   2649             return;
   2650         } else {
   2651             /* RGBA->RGB, NO_ALPHA */
   2652             Uint32 mask = srcfmt->Rmask | srcfmt->Gmask | srcfmt->Bmask;
   2653             while (height--) {
   2654                 /* *INDENT-OFF* */
   2655                 DUFFS_LOOP(
   2656                 {
   2657                     if ((*src32 & rgbmask) != ckey) {
   2658                         *dst32 = *src32 & mask;
   2659                     }
   2660                     ++dst32;
   2661                     ++src32;
   2662                 }, width);
   2663                 /* *INDENT-ON* */
   2664                 src32 = (Uint32 *) ((Uint8 *) src32 + srcskip);
   2665                 dst32 = (Uint32 *) ((Uint8 *) dst32 + dstskip);
   2666             }
   2667             return;
   2668         }
   2669     }
   2670 
   2671 #if HAVE_FAST_WRITE_INT8
   2672     /* Blit with permutation: 4->4 */
   2673     if (srcbpp == 4 && dstbpp == 4 &&
   2674         srcfmt->format != SDL_PIXELFORMAT_ARGB2101010 &&
   2675         dstfmt->format != SDL_PIXELFORMAT_ARGB2101010) {
   2676 
   2677         /* Find the appropriate permutation */
   2678         int alpha_channel, p0, p1, p2, p3;
   2679         get_permutation(srcfmt, dstfmt, &p0, &p1, &p2, &p3, &alpha_channel);
   2680 
   2681         while (height--) {
   2682             /* *INDENT-OFF* */
   2683             DUFFS_LOOP(
   2684             {
   2685                 Uint32 *src32 = (Uint32*)src;
   2686 
   2687                 if ((*src32 & rgbmask) != ckey) {
   2688                     dst[0] = src[p0];
   2689                     dst[1] = src[p1];
   2690                     dst[2] = src[p2];
   2691                     dst[3] = src[p3];
   2692                     dst[alpha_channel] = alpha;
   2693                 }
   2694                 src += 4;
   2695                 dst += 4;
   2696             }, width);
   2697             /* *INDENT-ON* */
   2698             src += srcskip;
   2699             dst += dstskip;
   2700         }
   2701         return;
   2702     }
   2703 #endif
   2704 
   2705     /* BPP 3, same rgb triplet */
   2706     if ((sfmt == SDL_PIXELFORMAT_RGB24 && dfmt == SDL_PIXELFORMAT_RGB24) ||
   2707         (sfmt == SDL_PIXELFORMAT_BGR24 && dfmt == SDL_PIXELFORMAT_BGR24)) {
   2708 
   2709 #if SDL_BYTEORDER == SDL_LIL_ENDIAN
   2710         Uint8 k0 = ckey & 0xFF;
   2711         Uint8 k1 = (ckey >> 8)  & 0xFF;
   2712         Uint8 k2 = (ckey >> 16) & 0xFF;
   2713 #else
   2714         Uint8 k0 = (ckey >> 16) & 0xFF;
   2715         Uint8 k1 = (ckey >> 8) & 0xFF;
   2716         Uint8 k2 = ckey & 0xFF;
   2717 #endif
   2718 
   2719         while (height--) {
   2720             /* *INDENT-OFF* */
   2721             DUFFS_LOOP(
   2722             {
   2723                 Uint8 s0 = src[0];
   2724                 Uint8 s1 = src[1];
   2725                 Uint8 s2 = src[2];
   2726 
   2727                 if (k0 != s0 || k1 != s1 || k2 != s2) {
   2728                     dst[0] = s0;
   2729                     dst[1] = s1;
   2730                     dst[2] = s2;
   2731                 }
   2732                 src += 3;
   2733                 dst += 3;
   2734             },
   2735             width);
   2736             /* *INDENT-ON* */
   2737             src += srcskip;
   2738             dst += dstskip;
   2739         }
   2740         return;
   2741     }
   2742 
   2743     /* BPP 3, inversed rgb triplet */
   2744     if ((sfmt == SDL_PIXELFORMAT_RGB24 && dfmt == SDL_PIXELFORMAT_BGR24) ||
   2745         (sfmt == SDL_PIXELFORMAT_BGR24 && dfmt == SDL_PIXELFORMAT_RGB24)) {
   2746 
   2747 #if SDL_BYTEORDER == SDL_LIL_ENDIAN
   2748         Uint8 k0 = ckey & 0xFF;
   2749         Uint8 k1 = (ckey >> 8)  & 0xFF;
   2750         Uint8 k2 = (ckey >> 16) & 0xFF;
   2751 #else
   2752         Uint8 k0 = (ckey >> 16) & 0xFF;
   2753         Uint8 k1 = (ckey >> 8) & 0xFF;
   2754         Uint8 k2 = ckey & 0xFF;
   2755 #endif
   2756 
   2757         while (height--) {
   2758             /* *INDENT-OFF* */
   2759             DUFFS_LOOP(
   2760             {
   2761                 Uint8 s0 = src[0];
   2762                 Uint8 s1 = src[1];
   2763                 Uint8 s2 = src[2];
   2764                 if (k0 != s0 || k1 != s1 || k2 != s2) {
   2765                     /* Inversed RGB */
   2766                     dst[0] = s2;
   2767                     dst[1] = s1;
   2768                     dst[2] = s0;
   2769                 }
   2770                 src += 3;
   2771                 dst += 3;
   2772             },
   2773             width);
   2774             /* *INDENT-ON* */
   2775             src += srcskip;
   2776             dst += dstskip;
   2777         }
   2778         return;
   2779     }
   2780 
   2781     /* Blit with permutation: 4->3 */
   2782     if (srcbpp == 4 && dstbpp == 3 &&
   2783         srcfmt->format != SDL_PIXELFORMAT_ARGB2101010) {
   2784 
   2785         /* Find the appropriate permutation */
   2786         int p0, p1, p2, p3;
   2787         get_permutation(srcfmt, dstfmt, &p0, &p1, &p2, &p3, NULL);
   2788 
   2789         while (height--) {
   2790             /* *INDENT-OFF* */
   2791             DUFFS_LOOP(
   2792             {
   2793                 Uint32 *src32 = (Uint32*)src;
   2794                 if ((*src32 & rgbmask) != ckey) {
   2795                     dst[0] = src[p0];
   2796                     dst[1] = src[p1];
   2797                     dst[2] = src[p2];
   2798                 }
   2799                 src += 4;
   2800                 dst += 3;
   2801             }, width);
   2802             /* *INDENT-ON* */
   2803             src += srcskip;
   2804             dst += dstskip;
   2805         }
   2806         return;
   2807     }
   2808 
   2809 #if HAVE_FAST_WRITE_INT8
   2810     /* Blit with permutation: 3->4 */
   2811     if (srcbpp == 3 && dstbpp == 4 &&
   2812         dstfmt->format != SDL_PIXELFORMAT_ARGB2101010) {
   2813 
   2814 #if SDL_BYTEORDER == SDL_LIL_ENDIAN
   2815         Uint8 k0 = ckey & 0xFF;
   2816         Uint8 k1 = (ckey >> 8)  & 0xFF;
   2817         Uint8 k2 = (ckey >> 16) & 0xFF;
   2818 #else
   2819         Uint8 k0 = (ckey >> 16) & 0xFF;
   2820         Uint8 k1 = (ckey >> 8) & 0xFF;
   2821         Uint8 k2 = ckey  & 0xFF;
   2822 #endif
   2823 
   2824         /* Find the appropriate permutation */
   2825         int alpha_channel, p0, p1, p2, p3;
   2826         get_permutation(srcfmt, dstfmt, &p0, &p1, &p2, &p3, &alpha_channel);
   2827 
   2828         while (height--) {
   2829             /* *INDENT-OFF* */
   2830             DUFFS_LOOP(
   2831             {
   2832                 Uint8 s0 = src[0];
   2833                 Uint8 s1 = src[1];
   2834                 Uint8 s2 = src[2];
   2835 
   2836                 if (k0 != s0 || k1 != s1 || k2 != s2) {
   2837                     dst[0] = src[p0];
   2838                     dst[1] = src[p1];
   2839                     dst[2] = src[p2];
   2840                     dst[3] = src[p3];
   2841                     dst[alpha_channel] = alpha;
   2842                 }
   2843                 src += 3;
   2844                 dst += 4;
   2845             }, width);
   2846             /* *INDENT-ON* */
   2847             src += srcskip;
   2848             dst += dstskip;
   2849         }
   2850         return;
   2851     }
   2852 #endif
   2853 
   2854     while (height--) {
   2855         /* *INDENT-OFF* */
   2856         DUFFS_LOOP(
   2857         {
   2858             Uint32 Pixel;
   2859             unsigned sR;
   2860             unsigned sG;
   2861             unsigned sB;
   2862             RETRIEVE_RGB_PIXEL(src, srcbpp, Pixel);
   2863             if ( (Pixel & rgbmask) != ckey ) {
   2864                 RGB_FROM_PIXEL(Pixel, srcfmt, sR, sG, sB);
   2865                 ASSEMBLE_RGBA(dst, dstbpp, dstfmt, sR, sG, sB, alpha);
   2866             }
   2867             dst += dstbpp;
   2868             src += srcbpp;
   2869         },
   2870         width);
   2871         /* *INDENT-ON* */
   2872         src += srcskip;
   2873         dst += dstskip;
   2874     }
   2875 }
   2876 
   2877 static void
   2878 BlitNtoNKeyCopyAlpha(SDL_BlitInfo * info)
   2879 {
   2880     int width = info->dst_w;
   2881     int height = info->dst_h;
   2882     Uint8 *src = info->src;
   2883     int srcskip = info->src_skip;
   2884     Uint8 *dst = info->dst;
   2885     int dstskip = info->dst_skip;
   2886     Uint32 ckey = info->colorkey;
   2887     SDL_PixelFormat *srcfmt = info->src_fmt;
   2888     SDL_PixelFormat *dstfmt = info->dst_fmt;
   2889     Uint32 rgbmask = ~srcfmt->Amask;
   2890 
   2891     Uint8 srcbpp;
   2892     Uint8 dstbpp;
   2893     Uint32 Pixel;
   2894     unsigned sR, sG, sB, sA;
   2895 
   2896     /* Set up some basic variables */
   2897     srcbpp = srcfmt->BytesPerPixel;
   2898     dstbpp = dstfmt->BytesPerPixel;
   2899     ckey &= rgbmask;
   2900 
   2901     /* Fastpath: same source/destination format, with Amask, bpp 32, loop is vectorized. ~10x faster */
   2902     if (srcfmt->format == dstfmt->format) {
   2903 
   2904         if (srcfmt->format == SDL_PIXELFORMAT_ARGB8888 ||
   2905             srcfmt->format == SDL_PIXELFORMAT_ABGR8888 ||
   2906             srcfmt->format == SDL_PIXELFORMAT_BGRA8888 ||
   2907             srcfmt->format == SDL_PIXELFORMAT_RGBA8888) {
   2908 
   2909             Uint32 *src32 = (Uint32*)src;
   2910             Uint32 *dst32 = (Uint32*)dst;
   2911             while (height--) {
   2912                 /* *INDENT-OFF* */
   2913                 DUFFS_LOOP(
   2914                 {
   2915                     if ((*src32 & rgbmask) != ckey) {
   2916                         *dst32 = *src32;
   2917                     }
   2918                     ++src32;
   2919                     ++dst32;
   2920                 },
   2921                 width);
   2922                 /* *INDENT-ON* */
   2923                 src32 = (Uint32 *)((Uint8 *)src32 + srcskip);
   2924                 dst32 = (Uint32 *)((Uint8 *)dst32 + dstskip);
   2925             }
   2926         }
   2927         return;
   2928     }
   2929 
   2930 #if HAVE_FAST_WRITE_INT8
   2931     /* Blit with permutation: 4->4 */
   2932     if (srcbpp == 4 && dstbpp == 4 &&
   2933         srcfmt->format != SDL_PIXELFORMAT_ARGB2101010 &&
   2934         dstfmt->format != SDL_PIXELFORMAT_ARGB2101010) {
   2935 
   2936         /* Find the appropriate permutation */
   2937         int p0, p1, p2, p3;
   2938         get_permutation(srcfmt, dstfmt, &p0, &p1, &p2, &p3, NULL);
   2939 
   2940         while (height--) {
   2941             /* *INDENT-OFF* */
   2942             DUFFS_LOOP(
   2943             {
   2944                 Uint32 *src32 = (Uint32*)src;
   2945                 if ((*src32 & rgbmask) != ckey) {
   2946                     dst[0] = src[p0];
   2947                     dst[1] = src[p1];
   2948                     dst[2] = src[p2];
   2949                     dst[3] = src[p3];
   2950                 }
   2951                 src += 4;
   2952                 dst += 4;
   2953             }, width);
   2954             /* *INDENT-ON* */
   2955             src += srcskip;
   2956             dst += dstskip;
   2957         }
   2958         return;
   2959     }
   2960 #endif
   2961 
   2962     while (height--) {
   2963         /* *INDENT-OFF* */
   2964         DUFFS_LOOP(
   2965         {
   2966             DISEMBLE_RGBA(src, srcbpp, srcfmt, Pixel, sR, sG, sB, sA);
   2967             if ( (Pixel & rgbmask) != ckey ) {
   2968                   ASSEMBLE_RGBA(dst, dstbpp, dstfmt, sR, sG, sB, sA);
   2969             }
   2970             dst += dstbpp;
   2971             src += srcbpp;
   2972         },
   2973         width);
   2974         /* *INDENT-ON* */
   2975         src += srcskip;
   2976         dst += dstskip;
   2977     }
   2978 }
   2979 
   2980 /* Special optimized blit for ARGB 2-10-10-10 --> RGBA */
   2981 static void
   2982 Blit2101010toN(SDL_BlitInfo * info)
   2983 {
   2984     int width = info->dst_w;
   2985     int height = info->dst_h;
   2986     Uint8 *src = info->src;
   2987     int srcskip = info->src_skip;
   2988     Uint8 *dst = info->dst;
   2989     int dstskip = info->dst_skip;
   2990     SDL_PixelFormat *dstfmt = info->dst_fmt;
   2991     int dstbpp = dstfmt->BytesPerPixel;
   2992     Uint32 Pixel;
   2993     unsigned sR, sG, sB, sA;
   2994 
   2995     while (height--) {
   2996         /* *INDENT-OFF* */
   2997         DUFFS_LOOP(
   2998         {
   2999             Pixel = *(Uint32 *)src;
   3000             RGBA_FROM_ARGB2101010(Pixel, sR, sG, sB, sA);
   3001             ASSEMBLE_RGBA(dst, dstbpp, dstfmt, sR, sG, sB, sA);
   3002             dst += dstbpp;
   3003             src += 4;
   3004         },
   3005         width);
   3006         /* *INDENT-ON* */
   3007         src += srcskip;
   3008         dst += dstskip;
   3009     }
   3010 }
   3011 
   3012 /* Special optimized blit for RGBA --> ARGB 2-10-10-10 */
   3013 static void
   3014 BlitNto2101010(SDL_BlitInfo * info)
   3015 {
   3016     int width = info->dst_w;
   3017     int height = info->dst_h;
   3018     Uint8 *src = info->src;
   3019     int srcskip = info->src_skip;
   3020     Uint8 *dst = info->dst;
   3021     int dstskip = info->dst_skip;
   3022     SDL_PixelFormat *srcfmt = info->src_fmt;
   3023     int srcbpp = srcfmt->BytesPerPixel;
   3024     Uint32 Pixel;
   3025     unsigned sR, sG, sB, sA;
   3026 
   3027     while (height--) {
   3028         /* *INDENT-OFF* */
   3029         DUFFS_LOOP(
   3030         {
   3031             DISEMBLE_RGBA(src, srcbpp, srcfmt, Pixel, sR, sG, sB, sA);
   3032             ARGB2101010_FROM_RGBA(Pixel, sR, sG, sB, sA);
   3033             *(Uint32 *)dst = Pixel;
   3034             dst += 4;
   3035             src += srcbpp;
   3036         },
   3037         width);
   3038         /* *INDENT-ON* */
   3039         src += srcskip;
   3040         dst += dstskip;
   3041     }
   3042 }
   3043 
   3044 /* Blit_3or4_to_3or4__same_rgb: 3 or 4 bpp, same RGB triplet */
   3045 static void
   3046 Blit_3or4_to_3or4__same_rgb(SDL_BlitInfo * info)
   3047 {
   3048     int width = info->dst_w;
   3049     int height = info->dst_h;
   3050     Uint8 *src = info->src;
   3051     int srcskip = info->src_skip;
   3052     Uint8 *dst = info->dst;
   3053     int dstskip = info->dst_skip;
   3054     SDL_PixelFormat *srcfmt = info->src_fmt;
   3055     int srcbpp = srcfmt->BytesPerPixel;
   3056     SDL_PixelFormat *dstfmt = info->dst_fmt;
   3057     int dstbpp = dstfmt->BytesPerPixel;
   3058 
   3059     if (dstfmt->Amask) {
   3060         /* SET_ALPHA */
   3061         Uint32 mask = ((Uint32)info->a) << dstfmt->Ashift;
   3062 #if SDL_BYTEORDER == SDL_LIL_ENDIAN
   3063         int i0 = 0, i1 = 1, i2 = 2;
   3064 #else
   3065         int i0 = srcbpp - 1 - 0;
   3066         int i1 = srcbpp - 1 - 1;
   3067         int i2 = srcbpp - 1 - 2;
   3068 #endif
   3069         while (height--) {
   3070             /* *INDENT-OFF* */
   3071             DUFFS_LOOP(
   3072             {
   3073                 Uint32 *dst32 = (Uint32*)dst;
   3074                 Uint8 s0 = src[i0];
   3075                 Uint8 s1 = src[i1];
   3076                 Uint8 s2 = src[i2];
   3077                 *dst32 = (s0) | (s1 << 8) | (s2 << 16) | mask;
   3078                 dst += 4;
   3079                 src += srcbpp;
   3080             }, width);
   3081             /* *INDENT-ON* */
   3082             src += srcskip;
   3083             dst += dstskip;
   3084         }
   3085     } else {
   3086         /* NO_ALPHA */
   3087 #if SDL_BYTEORDER == SDL_LIL_ENDIAN
   3088         int i0 = 0, i1 = 1, i2 = 2;
   3089         int j0 = 0, j1 = 1, j2 = 2;
   3090 #else
   3091         int i0 = srcbpp - 1 - 0;
   3092         int i1 = srcbpp - 1 - 1;
   3093         int i2 = srcbpp - 1 - 2;
   3094         int j0 = dstbpp - 1 - 0;
   3095         int j1 = dstbpp - 1 - 1;
   3096         int j2 = dstbpp - 1 - 2;
   3097 #endif
   3098         while (height--) {
   3099             /* *INDENT-OFF* */
   3100             DUFFS_LOOP(
   3101             {
   3102                 Uint8 s0 = src[i0];
   3103                 Uint8 s1 = src[i1];
   3104                 Uint8 s2 = src[i2];
   3105                 dst[j0] = s0;
   3106                 dst[j1] = s1;
   3107                 dst[j2] = s2;
   3108                 dst += dstbpp;
   3109                 src += srcbpp;
   3110             }, width);
   3111             /* *INDENT-ON* */
   3112             src += srcskip;
   3113             dst += dstskip;
   3114         }
   3115     }
   3116 }
   3117 
   3118 /* Blit_3or4_to_3or4__inversed_rgb: 3 or 4 bpp, inversed RGB triplet */
   3119 static void
   3120 Blit_3or4_to_3or4__inversed_rgb(SDL_BlitInfo * info)
   3121 {
   3122     int width = info->dst_w;
   3123     int height = info->dst_h;
   3124     Uint8 *src = info->src;
   3125     int srcskip = info->src_skip;
   3126     Uint8 *dst = info->dst;
   3127     int dstskip = info->dst_skip;
   3128     SDL_PixelFormat *srcfmt = info->src_fmt;
   3129     int srcbpp = srcfmt->BytesPerPixel;
   3130     SDL_PixelFormat *dstfmt = info->dst_fmt;
   3131     int dstbpp = dstfmt->BytesPerPixel;
   3132 
   3133     if (dstfmt->Amask) {
   3134         if (srcfmt->Amask) {
   3135             /* COPY_ALPHA */
   3136             /* Only to switch ABGR8888 <-> ARGB8888 */
   3137             while (height--) {
   3138 #if SDL_BYTEORDER == SDL_LIL_ENDIAN
   3139                 int i0 = 0, i1 = 1, i2 = 2, i3 = 3;
   3140 #else
   3141                 int i0 = 3, i1 = 2, i2 = 1, i3 = 0;
   3142 #endif
   3143                 /* *INDENT-OFF* */
   3144                 DUFFS_LOOP(
   3145                 {
   3146                     Uint32 *dst32 = (Uint32*)dst;
   3147                     Uint8 s0 = src[i0];
   3148                     Uint8 s1 = src[i1];
   3149                     Uint8 s2 = src[i2];
   3150                     Uint32 alphashift = ((Uint32)src[i3]) << dstfmt->Ashift;
   3151                     /* inversed, compared to Blit_3or4_to_3or4__same_rgb */
   3152                     *dst32 = (s0 << 16) | (s1 << 8) | (s2) | alphashift;
   3153                     dst += 4;
   3154                     src += 4;
   3155                 }, width);
   3156                 /* *INDENT-ON* */
   3157                 src += srcskip;
   3158                 dst += dstskip;
   3159             }
   3160         } else {
   3161             /* SET_ALPHA */
   3162             Uint32 mask = ((Uint32)info->a) << dstfmt->Ashift;
   3163 #if SDL_BYTEORDER == SDL_LIL_ENDIAN
   3164             int i0 = 0, i1 = 1, i2 = 2;
   3165 #else
   3166             int i0 = srcbpp - 1 - 0;
   3167             int i1 = srcbpp - 1 - 1;
   3168             int i2 = srcbpp - 1 - 2;
   3169 #endif
   3170             while (height--) {
   3171                 /* *INDENT-OFF* */
   3172                 DUFFS_LOOP(
   3173                 {
   3174                     Uint32 *dst32 = (Uint32*)dst;
   3175                     Uint8 s0 = src[i0];
   3176                     Uint8 s1 = src[i1];
   3177                     Uint8 s2 = src[i2];
   3178                     /* inversed, compared to Blit_3or4_to_3or4__same_rgb */
   3179                     *dst32 = (s0 << 16) | (s1 << 8) | (s2) | mask;
   3180                     dst += 4;
   3181                     src += srcbpp;
   3182                 }, width);
   3183                 /* *INDENT-ON* */
   3184                 src += srcskip;
   3185                 dst += dstskip;
   3186             }
   3187         }
   3188     } else {
   3189         /* NO_ALPHA */
   3190 #if SDL_BYTEORDER == SDL_LIL_ENDIAN
   3191         int i0 = 0, i1 = 1, i2 = 2;
   3192         int j0 = 2, j1 = 1, j2 = 0;
   3193 #else
   3194         int i0 = srcbpp - 1 - 0;
   3195         int i1 = srcbpp - 1 - 1;
   3196         int i2 = srcbpp - 1 - 2;
   3197         int j0 = dstbpp - 1 - 2;
   3198         int j1 = dstbpp - 1 - 1;
   3199         int j2 = dstbpp - 1 - 0;
   3200 #endif
   3201         while (height--) {
   3202             /* *INDENT-OFF* */
   3203             DUFFS_LOOP(
   3204             {
   3205                 Uint8 s0 = src[i0];
   3206                 Uint8 s1 = src[i1];
   3207                 Uint8 s2 = src[i2];
   3208                 /* inversed, compared to Blit_3or4_to_3or4__same_rgb */
   3209                 dst[j0] = s0;
   3210                 dst[j1] = s1;
   3211                 dst[j2] = s2;
   3212                 dst += dstbpp;
   3213                 src += srcbpp;
   3214             }, width);
   3215             /* *INDENT-ON* */
   3216             src += srcskip;
   3217             dst += dstskip;
   3218         }
   3219     }
   3220 }
   3221 
   3222 /* Normal N to N optimized blitters */
   3223 #define NO_ALPHA   1
   3224 #define SET_ALPHA  2
   3225 #define COPY_ALPHA 4
   3226 struct blit_table
   3227 {
   3228     Uint32 srcR, srcG, srcB;
   3229     int dstbpp;
   3230     Uint32 dstR, dstG, dstB;
   3231     enum blit_features blit_features;
   3232     SDL_BlitFunc blitfunc;
   3233     Uint32 alpha;  /* bitwise NO_ALPHA, SET_ALPHA, COPY_ALPHA */
   3234 };
   3235 static const struct blit_table normal_blit_1[] = {
   3236     /* Default for 8-bit RGB source, never optimized */
   3237     {0, 0, 0, 0, 0, 0, 0, 0, BlitNtoN, 0}
   3238 };
   3239 
   3240 static const struct blit_table normal_blit_2[] = {
   3241 #if SDL_ALTIVEC_BLITTERS
   3242     /* has-altivec */
   3243     {0x0000F800, 0x000007E0, 0x0000001F, 4, 0x00000000, 0x00000000, 0x00000000,
   3244      BLIT_FEATURE_HAS_ALTIVEC, Blit_RGB565_32Altivec, NO_ALPHA | COPY_ALPHA | SET_ALPHA},
   3245     {0x00007C00, 0x000003E0, 0x0000001F, 4, 0x00000000, 0x00000000, 0x00000000,
   3246      BLIT_FEATURE_HAS_ALTIVEC, Blit_RGB555_32Altivec, NO_ALPHA | COPY_ALPHA | SET_ALPHA},
   3247 #endif
   3248 #if SDL_ARM_SIMD_BLITTERS
   3249     {0x00000F00, 0x000000F0, 0x0000000F, 4, 0x00FF0000, 0x0000FF00, 0x000000FF,
   3250      BLIT_FEATURE_HAS_ARM_SIMD, Blit_RGB444_RGB888ARMSIMD, NO_ALPHA | COPY_ALPHA},
   3251 #endif
   3252 #if SDL_HAVE_BLIT_N_RGB565
   3253     {0x0000F800, 0x000007E0, 0x0000001F, 4, 0x00FF0000, 0x0000FF00, 0x000000FF,
   3254      0, Blit_RGB565_ARGB8888, NO_ALPHA | COPY_ALPHA | SET_ALPHA},
   3255     {0x0000F800, 0x000007E0, 0x0000001F, 4, 0x000000FF, 0x0000FF00, 0x00FF0000,
   3256      0, Blit_RGB565_ABGR8888, NO_ALPHA | COPY_ALPHA | SET_ALPHA},
   3257     {0x0000F800, 0x000007E0, 0x0000001F, 4, 0xFF000000, 0x00FF0000, 0x0000FF00,
   3258      0, Blit_RGB565_RGBA8888, NO_ALPHA | COPY_ALPHA | SET_ALPHA},
   3259     {0x0000F800, 0x000007E0, 0x0000001F, 4, 0x0000FF00, 0x00FF0000, 0xFF000000,
   3260      0, Blit_RGB565_BGRA8888, NO_ALPHA | COPY_ALPHA | SET_ALPHA},
   3261 #endif
   3262 
   3263     /* Default for 16-bit RGB source, used if no other blitter matches */
   3264     {0, 0, 0, 0, 0, 0, 0, 0, BlitNtoN, 0}
   3265 };
   3266 
   3267 static const struct blit_table normal_blit_3[] = {
   3268     /* 3->4 with same rgb triplet */
   3269     {0x000000FF, 0x0000FF00, 0x00FF0000, 4, 0x000000FF, 0x0000FF00, 0x00FF0000,
   3270      0, Blit_3or4_to_3or4__same_rgb,
   3271 #if HAVE_FAST_WRITE_INT8
   3272         NO_ALPHA |
   3273 #endif
   3274         SET_ALPHA},
   3275     {0x00FF0000, 0x0000FF00, 0x000000FF, 4, 0x00FF0000, 0x0000FF00, 0x000000FF,
   3276      0, Blit_3or4_to_3or4__same_rgb,
   3277 #if HAVE_FAST_WRITE_INT8
   3278         NO_ALPHA |
   3279 #endif
   3280         SET_ALPHA},
   3281     /* 3->4 with inversed rgb triplet */
   3282     {0x000000FF, 0x0000FF00, 0x00FF0000, 4, 0x00FF0000, 0x0000FF00, 0x000000FF,
   3283      0, Blit_3or4_to_3or4__inversed_rgb,
   3284 #if HAVE_FAST_WRITE_INT8
   3285         NO_ALPHA |
   3286 #endif
   3287         SET_ALPHA},
   3288     {0x00FF0000, 0x0000FF00, 0x000000FF, 4, 0x000000FF, 0x0000FF00, 0x00FF0000,
   3289      0, Blit_3or4_to_3or4__inversed_rgb,
   3290 #if HAVE_FAST_WRITE_INT8
   3291         NO_ALPHA |
   3292 #endif
   3293         SET_ALPHA},
   3294     /* 3->3 to switch RGB 24 <-> BGR 24 */
   3295     {0x000000FF, 0x0000FF00, 0x00FF0000, 3, 0x00FF0000, 0x0000FF00, 0x000000FF,
   3296      0, Blit_3or4_to_3or4__inversed_rgb, NO_ALPHA },
   3297     {0x00FF0000, 0x0000FF00, 0x000000FF, 3, 0x000000FF, 0x0000FF00, 0x00FF0000,
   3298      0, Blit_3or4_to_3or4__inversed_rgb, NO_ALPHA },
   3299     /* Default for 24-bit RGB source, never optimized */
   3300     {0, 0, 0, 0, 0, 0, 0, 0, BlitNtoN, 0}
   3301 };
   3302 
   3303 static const struct blit_table normal_blit_4[] = {
   3304 #if SDL_ALTIVEC_BLITTERS
   3305     /* has-altivec | dont-use-prefetch */
   3306     {0x00000000, 0x00000000, 0x00000000, 4, 0x00000000, 0x00000000, 0x00000000,
   3307      BLIT_FEATURE_HAS_ALTIVEC | BLIT_FEATURE_ALTIVEC_DONT_USE_PREFETCH, ConvertAltivec32to32_noprefetch, NO_ALPHA | COPY_ALPHA | SET_ALPHA},
   3308     /* has-altivec */
   3309     {0x00000000, 0x00000000, 0x00000000, 4, 0x00000000, 0x00000000, 0x00000000,
   3310      BLIT_FEATURE_HAS_ALTIVEC, ConvertAltivec32to32_prefetch, NO_ALPHA | COPY_ALPHA | SET_ALPHA},
   3311     /* has-altivec */
   3312     {0x00000000, 0x00000000, 0x00000000, 2, 0x0000F800, 0x000007E0, 0x0000001F,
   3313      BLIT_FEATURE_HAS_ALTIVEC, Blit_RGB888_RGB565Altivec, NO_ALPHA},
   3314 #endif
   3315 #if SDL_ARM_SIMD_BLITTERS
   3316     {0x000000FF, 0x0000FF00, 0x00FF0000, 4, 0x00FF0000, 0x0000FF00, 0x000000FF,
   3317      BLIT_FEATURE_HAS_ARM_SIMD, Blit_BGR888_RGB888ARMSIMD, NO_ALPHA | COPY_ALPHA },
   3318 #endif
   3319     /* 4->3 with same rgb triplet */
   3320     {0x000000FF, 0x0000FF00, 0x00FF0000, 3, 0x000000FF, 0x0000FF00, 0x00FF0000,
   3321      0, Blit_3or4_to_3or4__same_rgb, NO_ALPHA | SET_ALPHA},
   3322     {0x00FF0000, 0x0000FF00, 0x000000FF, 3, 0x00FF0000, 0x0000FF00, 0x000000FF,
   3323      0, Blit_3or4_to_3or4__same_rgb, NO_ALPHA | SET_ALPHA},
   3324     /* 4->3 with inversed rgb triplet */
   3325     {0x000000FF, 0x0000FF00, 0x00FF0000, 3, 0x00FF0000, 0x0000FF00, 0x000000FF,
   3326      0, Blit_3or4_to_3or4__inversed_rgb, NO_ALPHA | SET_ALPHA},
   3327     {0x00FF0000, 0x0000FF00, 0x000000FF, 3, 0x000000FF, 0x0000FF00, 0x00FF0000,
   3328      0, Blit_3or4_to_3or4__inversed_rgb, NO_ALPHA | SET_ALPHA},
   3329     /* 4->4 with inversed rgb triplet, and COPY_ALPHA to switch ABGR8888 <-> ARGB8888 */
   3330     {0x000000FF, 0x0000FF00, 0x00FF0000, 4, 0x00FF0000, 0x0000FF00, 0x000000FF,
   3331      0, Blit_3or4_to_3or4__inversed_rgb,
   3332 #if HAVE_FAST_WRITE_INT8
   3333         NO_ALPHA |
   3334 #endif
   3335         SET_ALPHA | COPY_ALPHA},
   3336     {0x00FF0000, 0x0000FF00, 0x000000FF, 4, 0x000000FF, 0x0000FF00, 0x00FF0000,
   3337      0, Blit_3or4_to_3or4__inversed_rgb,
   3338 #if HAVE_FAST_WRITE_INT8
   3339         NO_ALPHA |
   3340 #endif
   3341         SET_ALPHA | COPY_ALPHA},
   3342     /* RGB 888 and RGB 565 */
   3343     {0x00FF0000, 0x0000FF00, 0x000000FF, 2, 0x0000F800, 0x000007E0, 0x0000001F,
   3344      0, Blit_RGB888_RGB565, NO_ALPHA},
   3345     {0x00FF0000, 0x0000FF00, 0x000000FF, 2, 0x00007C00, 0x000003E0, 0x0000001F,
   3346      0, Blit_RGB888_RGB555, NO_ALPHA},
   3347     /* Default for 32-bit RGB source, used if no other blitter matches */
   3348     {0, 0, 0, 0, 0, 0, 0, 0, BlitNtoN, 0}
   3349 };
   3350 
   3351 static const struct blit_table *const normal_blit[] = {
   3352     normal_blit_1, normal_blit_2, normal_blit_3, normal_blit_4
   3353 };
   3354 
   3355 /* Mask matches table, or table entry is zero */
   3356 #define MASKOK(x, y) (((x) == (y)) || ((y) == 0x00000000))
   3357 
   3358 SDL_BlitFunc
   3359 SDL_CalculateBlitN(SDL_Surface * surface)
   3360 {
   3361     SDL_PixelFormat *srcfmt;
   3362     SDL_PixelFormat *dstfmt;
   3363     const struct blit_table *table;
   3364     int which;
   3365     SDL_BlitFunc blitfun;
   3366 
   3367     /* Set up data for choosing the blit */
   3368     srcfmt = surface->format;
   3369     dstfmt = surface->map->dst->format;
   3370 
   3371     /* We don't support destinations less than 8-bits */
   3372     if (dstfmt->BitsPerPixel < 8) {
   3373         return (NULL);
   3374     }
   3375 
   3376     switch (surface->map->info.flags & ~SDL_COPY_RLE_MASK) {
   3377     case 0:
   3378         blitfun = NULL;
   3379         if (dstfmt->BitsPerPixel == 8) {
   3380             if ((srcfmt->BytesPerPixel == 4) &&
   3381                 (srcfmt->Rmask == 0x00FF0000) &&
   3382                 (srcfmt->Gmask == 0x0000FF00) &&
   3383                 (srcfmt->Bmask == 0x000000FF)) {
   3384                 blitfun = Blit_RGB888_index8;
   3385             } else if ((srcfmt->BytesPerPixel == 4) &&
   3386                 (srcfmt->Rmask == 0x3FF00000) &&
   3387                 (srcfmt->Gmask == 0x000FFC00) &&
   3388                 (srcfmt->Bmask == 0x000003FF)) {
   3389                 blitfun = Blit_RGB101010_index8;
   3390             } else {
   3391                 blitfun = BlitNto1;
   3392             }
   3393         } else {
   3394             /* Now the meat, choose the blitter we want */
   3395             Uint32 a_need = NO_ALPHA;
   3396             if (dstfmt->Amask)
   3397                 a_need = srcfmt->Amask ? COPY_ALPHA : SET_ALPHA;
   3398             table = normal_blit[srcfmt->BytesPerPixel - 1];
   3399             for (which = 0; table[which].dstbpp; ++which) {
   3400                 if (MASKOK(srcfmt->Rmask, table[which].srcR) &&
   3401                     MASKOK(srcfmt->Gmask, table[which].srcG) &&
   3402                     MASKOK(srcfmt->Bmask, table[which].srcB) &&
   3403                     MASKOK(dstfmt->Rmask, table[which].dstR) &&
   3404                     MASKOK(dstfmt->Gmask, table[which].dstG) &&
   3405                     MASKOK(dstfmt->Bmask, table[which].dstB) &&
   3406                     dstfmt->BytesPerPixel == table[which].dstbpp &&
   3407                     (a_need & table[which].alpha) == a_need &&
   3408                     ((table[which].blit_features & GetBlitFeatures()) ==
   3409                      table[which].blit_features))
   3410                     break;
   3411             }
   3412             blitfun = table[which].blitfunc;
   3413 
   3414             if (blitfun == BlitNtoN) {  /* default C fallback catch-all. Slow! */
   3415                 if (srcfmt->format == SDL_PIXELFORMAT_ARGB2101010) {
   3416                     blitfun = Blit2101010toN;
   3417                 } else if (dstfmt->format == SDL_PIXELFORMAT_ARGB2101010) {
   3418                     blitfun = BlitNto2101010;
   3419                 } else if (srcfmt->BytesPerPixel == 4 &&
   3420                             dstfmt->BytesPerPixel == 4 &&
   3421                             srcfmt->Rmask == dstfmt->Rmask &&
   3422                             srcfmt->Gmask == dstfmt->Gmask &&
   3423                             srcfmt->Bmask == dstfmt->Bmask) {
   3424                     if (a_need == COPY_ALPHA) {
   3425                         if (srcfmt->Amask == dstfmt->Amask) {
   3426                             /* Fastpath C fallback: 32bit RGBA<->RGBA blit with matching RGBA */
   3427                             blitfun = Blit4to4CopyAlpha;
   3428                         } else {
   3429                             blitfun = BlitNtoNCopyAlpha;
   3430                         }
   3431                     } else {
   3432                         /* Fastpath C fallback: 32bit RGB<->RGBA blit with matching RGB */
   3433                         blitfun = Blit4to4MaskAlpha;
   3434                     }
   3435                 } else if (a_need == COPY_ALPHA) {
   3436                     blitfun = BlitNtoNCopyAlpha;
   3437                 }
   3438             }
   3439         }
   3440         return (blitfun);
   3441 
   3442     case SDL_COPY_COLORKEY:
   3443         /* colorkey blit: Here we don't have too many options, mostly
   3444            because RLE is the preferred fast way to deal with this.
   3445            If a particular case turns out to be useful we'll add it. */
   3446 
   3447         if (srcfmt->BytesPerPixel == 2 && surface->map->identity)
   3448             return Blit2to2Key;
   3449         else if (dstfmt->BytesPerPixel == 1)
   3450             return BlitNto1Key;
   3451         else {
   3452 #if SDL_ALTIVEC_BLITTERS
   3453             if ((srcfmt->BytesPerPixel == 4) && (dstfmt->BytesPerPixel == 4)
   3454                 && SDL_HasAltiVec()) {
   3455                 return Blit32to32KeyAltivec;
   3456             } else
   3457 #endif
   3458             if (srcfmt->Amask && dstfmt->Amask) {
   3459                 return BlitNtoNKeyCopyAlpha;
   3460             } else {
   3461                 return BlitNtoNKey;
   3462             }
   3463         }
   3464     }
   3465 
   3466     return NULL;
   3467 }
   3468 
   3469 #endif /* SDL_HAVE_BLIT_N */
   3470 
   3471 /* vi: set ts=4 sw=4 expandtab: */