sdl

FORK: Simple Directmedia Layer
git clone https://git.neptards.moe/neptards/sdl.git
Log | Files | Refs

SDL_yuv.c (64750B)


      1 /*
      2   Simple DirectMedia Layer
      3   Copyright (C) 1997-2020 Sam Lantinga <slouken@libsdl.org>
      4 
      5   This software is provided 'as-is', without any express or implied
      6   warranty.  In no event will the authors be held liable for any damages
      7   arising from the use of this software.
      8 
      9   Permission is granted to anyone to use this software for any purpose,
     10   including commercial applications, and to alter it and redistribute it
     11   freely, subject to the following restrictions:
     12 
     13   1. The origin of this software must not be misrepresented; you must not
     14      claim that you wrote the original software. If you use this software
     15      in a product, an acknowledgment in the product documentation would be
     16      appreciated but is not required.
     17   2. Altered source versions must be plainly marked as such, and must not be
     18      misrepresented as being the original software.
     19   3. This notice may not be removed or altered from any source distribution.
     20 */
     21 #include "../SDL_internal.h"
     22 
     23 #include "SDL_endian.h"
     24 #include "SDL_video.h"
     25 #include "SDL_pixels_c.h"
     26 #include "SDL_yuv_c.h"
     27 
     28 #include "yuv2rgb/yuv_rgb.h"
     29 
     30 #define SDL_YUV_SD_THRESHOLD    576
     31 
     32 
     33 static SDL_YUV_CONVERSION_MODE SDL_YUV_ConversionMode = SDL_YUV_CONVERSION_BT601;
     34 
     35 
     36 void SDL_SetYUVConversionMode(SDL_YUV_CONVERSION_MODE mode)
     37 {
     38     SDL_YUV_ConversionMode = mode;
     39 }
     40 
     41 SDL_YUV_CONVERSION_MODE SDL_GetYUVConversionMode()
     42 {
     43     return SDL_YUV_ConversionMode;
     44 }
     45 
     46 SDL_YUV_CONVERSION_MODE SDL_GetYUVConversionModeForResolution(int width, int height)
     47 {
     48     SDL_YUV_CONVERSION_MODE mode = SDL_GetYUVConversionMode();
     49     if (mode == SDL_YUV_CONVERSION_AUTOMATIC) {
     50         if (height <= SDL_YUV_SD_THRESHOLD) {
     51             mode = SDL_YUV_CONVERSION_BT601;
     52         } else {
     53             mode = SDL_YUV_CONVERSION_BT709;
     54         }
     55     }
     56     return mode;
     57 }
     58 
     59 #if SDL_HAVE_YUV
     60 
     61 static int GetYUVConversionType(int width, int height, YCbCrType *yuv_type)
     62 {
     63     switch (SDL_GetYUVConversionModeForResolution(width, height)) {
     64     case SDL_YUV_CONVERSION_JPEG:
     65         *yuv_type = YCBCR_JPEG;
     66         break;
     67     case SDL_YUV_CONVERSION_BT601:
     68         *yuv_type = YCBCR_601;
     69         break;
     70     case SDL_YUV_CONVERSION_BT709:
     71         *yuv_type = YCBCR_709;
     72         break;
     73     default:
     74         return SDL_SetError("Unexpected YUV conversion mode");
     75     }
     76     return 0;
     77 }
     78 
     79 static SDL_bool IsPlanar2x2Format(Uint32 format)
     80 {
     81     return (format == SDL_PIXELFORMAT_YV12 ||
     82             format == SDL_PIXELFORMAT_IYUV ||
     83             format == SDL_PIXELFORMAT_NV12 ||
     84             format == SDL_PIXELFORMAT_NV21);
     85 }
     86 
     87 static SDL_bool IsPacked4Format(Uint32 format)
     88 {
     89     return (format == SDL_PIXELFORMAT_YUY2 ||
     90             format == SDL_PIXELFORMAT_UYVY ||
     91             format == SDL_PIXELFORMAT_YVYU);
     92 }
     93 
     94 static int GetYUVPlanes(int width, int height, Uint32 format, const void *yuv, int yuv_pitch,
     95                         const Uint8 **y, const Uint8 **u, const Uint8 **v, Uint32 *y_stride, Uint32 *uv_stride)
     96 {
     97     const Uint8 *planes[3] = { NULL, NULL, NULL };
     98     int pitches[3] = { 0, 0, 0 };
     99 
    100     switch (format) {
    101     case SDL_PIXELFORMAT_YV12:
    102     case SDL_PIXELFORMAT_IYUV:
    103         pitches[0] = yuv_pitch;
    104         pitches[1] = (pitches[0] + 1) / 2;
    105         pitches[2] = (pitches[0] + 1) / 2;
    106         planes[0] = (const Uint8 *)yuv;
    107         planes[1] = planes[0] + pitches[0] * height;
    108         planes[2] = planes[1] + pitches[1] * ((height + 1) / 2);
    109         break;
    110     case SDL_PIXELFORMAT_YUY2:
    111     case SDL_PIXELFORMAT_UYVY:
    112     case SDL_PIXELFORMAT_YVYU:
    113         pitches[0] = yuv_pitch;
    114         planes[0] = (const Uint8 *)yuv;
    115         break;
    116     case SDL_PIXELFORMAT_NV12:
    117     case SDL_PIXELFORMAT_NV21:
    118         pitches[0] = yuv_pitch;
    119         pitches[1] = 2 * ((pitches[0] + 1) / 2);
    120         planes[0] = (const Uint8 *)yuv;
    121         planes[1] = planes[0] + pitches[0] * height;
    122         break;
    123     default:
    124         return SDL_SetError("GetYUVPlanes(): Unsupported YUV format: %s", SDL_GetPixelFormatName(format));
    125     }
    126 
    127     switch (format) {
    128     case SDL_PIXELFORMAT_YV12:
    129         *y = planes[0];
    130         *y_stride = pitches[0];
    131         *v = planes[1];
    132         *u = planes[2];
    133         *uv_stride = pitches[1];
    134         break;
    135     case SDL_PIXELFORMAT_IYUV:
    136         *y = planes[0];
    137         *y_stride = pitches[0];
    138         *v = planes[2];
    139         *u = planes[1];
    140         *uv_stride = pitches[1];
    141         break;
    142     case SDL_PIXELFORMAT_YUY2:
    143         *y = planes[0];
    144         *y_stride = pitches[0];
    145         *v = *y + 3;
    146         *u = *y + 1;
    147         *uv_stride = pitches[0];
    148         break;
    149     case SDL_PIXELFORMAT_UYVY:
    150         *y = planes[0] + 1;
    151         *y_stride = pitches[0];
    152         *v = *y + 1;
    153         *u = *y - 1;
    154         *uv_stride = pitches[0];
    155         break;
    156     case SDL_PIXELFORMAT_YVYU:
    157         *y = planes[0];
    158         *y_stride = pitches[0];
    159         *v = *y + 1;
    160         *u = *y + 3;
    161         *uv_stride = pitches[0];
    162         break;
    163     case SDL_PIXELFORMAT_NV12:
    164         *y = planes[0];
    165         *y_stride = pitches[0];
    166         *u = planes[1];
    167         *v = *u + 1;
    168         *uv_stride = pitches[1];
    169         break;
    170     case SDL_PIXELFORMAT_NV21:
    171         *y = planes[0];
    172         *y_stride = pitches[0];
    173         *v = planes[1];
    174         *u = *v + 1;
    175         *uv_stride = pitches[1];
    176         break;
    177     default:
    178         /* Should have caught this above */
    179         return SDL_SetError("GetYUVPlanes[2]: Unsupported YUV format: %s", SDL_GetPixelFormatName(format));
    180     }
    181     return 0;
    182 }
    183 
    184 static SDL_bool yuv_rgb_sse(
    185     Uint32 src_format, Uint32 dst_format,
    186     Uint32 width, Uint32 height, 
    187     const Uint8 *y, const Uint8 *u, const Uint8 *v, Uint32 y_stride, Uint32 uv_stride, 
    188     Uint8 *rgb, Uint32 rgb_stride, 
    189     YCbCrType yuv_type)
    190 {
    191 #ifdef __SSE2__
    192     if (!SDL_HasSSE2()) {
    193         return SDL_FALSE;
    194     }
    195 
    196     if (src_format == SDL_PIXELFORMAT_YV12 ||
    197         src_format == SDL_PIXELFORMAT_IYUV) {
    198 
    199         switch (dst_format) {
    200         case SDL_PIXELFORMAT_RGB565:
    201             yuv420_rgb565_sseu(width, height, y, u, v, y_stride, uv_stride, rgb, rgb_stride, yuv_type);
    202             return SDL_TRUE;
    203         case SDL_PIXELFORMAT_RGB24:
    204             yuv420_rgb24_sseu(width, height, y, u, v, y_stride, uv_stride, rgb, rgb_stride, yuv_type);
    205             return SDL_TRUE;
    206         case SDL_PIXELFORMAT_RGBX8888:
    207         case SDL_PIXELFORMAT_RGBA8888:
    208             yuv420_rgba_sseu(width, height, y, u, v, y_stride, uv_stride, rgb, rgb_stride, yuv_type);
    209             return SDL_TRUE;
    210         case SDL_PIXELFORMAT_BGRX8888:
    211         case SDL_PIXELFORMAT_BGRA8888:
    212             yuv420_bgra_sseu(width, height, y, u, v, y_stride, uv_stride, rgb, rgb_stride, yuv_type);
    213             return SDL_TRUE;
    214         case SDL_PIXELFORMAT_RGB888:
    215         case SDL_PIXELFORMAT_ARGB8888:
    216             yuv420_argb_sseu(width, height, y, u, v, y_stride, uv_stride, rgb, rgb_stride, yuv_type);
    217             return SDL_TRUE;
    218         case SDL_PIXELFORMAT_BGR888:
    219         case SDL_PIXELFORMAT_ABGR8888:
    220             yuv420_abgr_sseu(width, height, y, u, v, y_stride, uv_stride, rgb, rgb_stride, yuv_type);
    221             return SDL_TRUE;
    222         default:
    223             break;
    224         }
    225     }
    226 
    227     if (src_format == SDL_PIXELFORMAT_YUY2 ||
    228         src_format == SDL_PIXELFORMAT_UYVY ||
    229         src_format == SDL_PIXELFORMAT_YVYU) {
    230 
    231         switch (dst_format) {
    232         case SDL_PIXELFORMAT_RGB565:
    233             yuv422_rgb565_sseu(width, height, y, u, v, y_stride, uv_stride, rgb, rgb_stride, yuv_type);
    234             return SDL_TRUE;
    235         case SDL_PIXELFORMAT_RGB24:
    236             yuv422_rgb24_sseu(width, height, y, u, v, y_stride, uv_stride, rgb, rgb_stride, yuv_type);
    237             return SDL_TRUE;
    238         case SDL_PIXELFORMAT_RGBX8888:
    239         case SDL_PIXELFORMAT_RGBA8888:
    240             yuv422_rgba_sseu(width, height, y, u, v, y_stride, uv_stride, rgb, rgb_stride, yuv_type);
    241             return SDL_TRUE;
    242         case SDL_PIXELFORMAT_BGRX8888:
    243         case SDL_PIXELFORMAT_BGRA8888:
    244             yuv422_bgra_sseu(width, height, y, u, v, y_stride, uv_stride, rgb, rgb_stride, yuv_type);
    245             return SDL_TRUE;
    246         case SDL_PIXELFORMAT_RGB888:
    247         case SDL_PIXELFORMAT_ARGB8888:
    248             yuv422_argb_sseu(width, height, y, u, v, y_stride, uv_stride, rgb, rgb_stride, yuv_type);
    249             return SDL_TRUE;
    250         case SDL_PIXELFORMAT_BGR888:
    251         case SDL_PIXELFORMAT_ABGR8888:
    252             yuv422_abgr_sseu(width, height, y, u, v, y_stride, uv_stride, rgb, rgb_stride, yuv_type);
    253             return SDL_TRUE;
    254         default:
    255             break;
    256         }
    257     }
    258 
    259     if (src_format == SDL_PIXELFORMAT_NV12 ||
    260         src_format == SDL_PIXELFORMAT_NV21) {
    261 
    262         switch (dst_format) {
    263         case SDL_PIXELFORMAT_RGB565:
    264             yuvnv12_rgb565_sseu(width, height, y, u, v, y_stride, uv_stride, rgb, rgb_stride, yuv_type);
    265             return SDL_TRUE;
    266         case SDL_PIXELFORMAT_RGB24:
    267             yuvnv12_rgb24_sseu(width, height, y, u, v, y_stride, uv_stride, rgb, rgb_stride, yuv_type);
    268             return SDL_TRUE;
    269         case SDL_PIXELFORMAT_RGBX8888:
    270         case SDL_PIXELFORMAT_RGBA8888:
    271             yuvnv12_rgba_sseu(width, height, y, u, v, y_stride, uv_stride, rgb, rgb_stride, yuv_type);
    272             return SDL_TRUE;
    273         case SDL_PIXELFORMAT_BGRX8888:
    274         case SDL_PIXELFORMAT_BGRA8888:
    275             yuvnv12_bgra_sseu(width, height, y, u, v, y_stride, uv_stride, rgb, rgb_stride, yuv_type);
    276             return SDL_TRUE;
    277         case SDL_PIXELFORMAT_RGB888:
    278         case SDL_PIXELFORMAT_ARGB8888:
    279             yuvnv12_argb_sseu(width, height, y, u, v, y_stride, uv_stride, rgb, rgb_stride, yuv_type);
    280             return SDL_TRUE;
    281         case SDL_PIXELFORMAT_BGR888:
    282         case SDL_PIXELFORMAT_ABGR8888:
    283             yuvnv12_abgr_sseu(width, height, y, u, v, y_stride, uv_stride, rgb, rgb_stride, yuv_type);
    284             return SDL_TRUE;
    285         default:
    286             break;
    287         }
    288     }
    289 #endif
    290     return SDL_FALSE;
    291 }
    292 
    293 static SDL_bool yuv_rgb_std(
    294     Uint32 src_format, Uint32 dst_format,
    295     Uint32 width, Uint32 height, 
    296     const Uint8 *y, const Uint8 *u, const Uint8 *v, Uint32 y_stride, Uint32 uv_stride, 
    297     Uint8 *rgb, Uint32 rgb_stride, 
    298     YCbCrType yuv_type)
    299 {
    300     if (src_format == SDL_PIXELFORMAT_YV12 ||
    301         src_format == SDL_PIXELFORMAT_IYUV) {
    302 
    303         switch (dst_format) {
    304         case SDL_PIXELFORMAT_RGB565:
    305             yuv420_rgb565_std(width, height, y, u, v, y_stride, uv_stride, rgb, rgb_stride, yuv_type);
    306             return SDL_TRUE;
    307         case SDL_PIXELFORMAT_RGB24:
    308             yuv420_rgb24_std(width, height, y, u, v, y_stride, uv_stride, rgb, rgb_stride, yuv_type);
    309             return SDL_TRUE;
    310         case SDL_PIXELFORMAT_RGBX8888:
    311         case SDL_PIXELFORMAT_RGBA8888:
    312             yuv420_rgba_std(width, height, y, u, v, y_stride, uv_stride, rgb, rgb_stride, yuv_type);
    313             return SDL_TRUE;
    314         case SDL_PIXELFORMAT_BGRX8888:
    315         case SDL_PIXELFORMAT_BGRA8888:
    316             yuv420_bgra_std(width, height, y, u, v, y_stride, uv_stride, rgb, rgb_stride, yuv_type);
    317             return SDL_TRUE;
    318         case SDL_PIXELFORMAT_RGB888:
    319         case SDL_PIXELFORMAT_ARGB8888:
    320             yuv420_argb_std(width, height, y, u, v, y_stride, uv_stride, rgb, rgb_stride, yuv_type);
    321             return SDL_TRUE;
    322         case SDL_PIXELFORMAT_BGR888:
    323         case SDL_PIXELFORMAT_ABGR8888:
    324             yuv420_abgr_std(width, height, y, u, v, y_stride, uv_stride, rgb, rgb_stride, yuv_type);
    325             return SDL_TRUE;
    326         default:
    327             break;
    328         }
    329     }
    330 
    331     if (src_format == SDL_PIXELFORMAT_YUY2 ||
    332         src_format == SDL_PIXELFORMAT_UYVY ||
    333         src_format == SDL_PIXELFORMAT_YVYU) {
    334 
    335         switch (dst_format) {
    336         case SDL_PIXELFORMAT_RGB565:
    337             yuv422_rgb565_std(width, height, y, u, v, y_stride, uv_stride, rgb, rgb_stride, yuv_type);
    338             return SDL_TRUE;
    339         case SDL_PIXELFORMAT_RGB24:
    340             yuv422_rgb24_std(width, height, y, u, v, y_stride, uv_stride, rgb, rgb_stride, yuv_type);
    341             return SDL_TRUE;
    342         case SDL_PIXELFORMAT_RGBX8888:
    343         case SDL_PIXELFORMAT_RGBA8888:
    344             yuv422_rgba_std(width, height, y, u, v, y_stride, uv_stride, rgb, rgb_stride, yuv_type);
    345             return SDL_TRUE;
    346         case SDL_PIXELFORMAT_BGRX8888:
    347         case SDL_PIXELFORMAT_BGRA8888:
    348             yuv422_bgra_std(width, height, y, u, v, y_stride, uv_stride, rgb, rgb_stride, yuv_type);
    349             return SDL_TRUE;
    350         case SDL_PIXELFORMAT_RGB888:
    351         case SDL_PIXELFORMAT_ARGB8888:
    352             yuv422_argb_std(width, height, y, u, v, y_stride, uv_stride, rgb, rgb_stride, yuv_type);
    353             return SDL_TRUE;
    354         case SDL_PIXELFORMAT_BGR888:
    355         case SDL_PIXELFORMAT_ABGR8888:
    356             yuv422_abgr_std(width, height, y, u, v, y_stride, uv_stride, rgb, rgb_stride, yuv_type);
    357             return SDL_TRUE;
    358         default:
    359             break;
    360         }
    361     }
    362 
    363     if (src_format == SDL_PIXELFORMAT_NV12 ||
    364         src_format == SDL_PIXELFORMAT_NV21) {
    365 
    366         switch (dst_format) {
    367         case SDL_PIXELFORMAT_RGB565:
    368             yuvnv12_rgb565_std(width, height, y, u, v, y_stride, uv_stride, rgb, rgb_stride, yuv_type);
    369             return SDL_TRUE;
    370         case SDL_PIXELFORMAT_RGB24:
    371             yuvnv12_rgb24_std(width, height, y, u, v, y_stride, uv_stride, rgb, rgb_stride, yuv_type);
    372             return SDL_TRUE;
    373         case SDL_PIXELFORMAT_RGBX8888:
    374         case SDL_PIXELFORMAT_RGBA8888:
    375             yuvnv12_rgba_std(width, height, y, u, v, y_stride, uv_stride, rgb, rgb_stride, yuv_type);
    376             return SDL_TRUE;
    377         case SDL_PIXELFORMAT_BGRX8888:
    378         case SDL_PIXELFORMAT_BGRA8888:
    379             yuvnv12_bgra_std(width, height, y, u, v, y_stride, uv_stride, rgb, rgb_stride, yuv_type);
    380             return SDL_TRUE;
    381         case SDL_PIXELFORMAT_RGB888:
    382         case SDL_PIXELFORMAT_ARGB8888:
    383             yuvnv12_argb_std(width, height, y, u, v, y_stride, uv_stride, rgb, rgb_stride, yuv_type);
    384             return SDL_TRUE;
    385         case SDL_PIXELFORMAT_BGR888:
    386         case SDL_PIXELFORMAT_ABGR8888:
    387             yuvnv12_abgr_std(width, height, y, u, v, y_stride, uv_stride, rgb, rgb_stride, yuv_type);
    388             return SDL_TRUE;
    389         default:
    390             break;
    391         }
    392     }
    393     return SDL_FALSE;
    394 }
    395 
    396 int
    397 SDL_ConvertPixels_YUV_to_RGB(int width, int height,
    398          Uint32 src_format, const void *src, int src_pitch,
    399          Uint32 dst_format, void *dst, int dst_pitch)
    400 {
    401     const Uint8 *y = NULL;
    402     const Uint8 *u = NULL;
    403     const Uint8 *v = NULL;
    404     Uint32 y_stride = 0;
    405     Uint32 uv_stride = 0;
    406     YCbCrType yuv_type = YCBCR_601;
    407 
    408     if (GetYUVPlanes(width, height, src_format, src, src_pitch, &y, &u, &v, &y_stride, &uv_stride) < 0) {
    409         return -1;
    410     }
    411 
    412     if (GetYUVConversionType(width, height, &yuv_type) < 0) {
    413         return -1;
    414     }
    415 
    416     if (yuv_rgb_sse(src_format, dst_format, width, height, y, u, v, y_stride, uv_stride, (Uint8*)dst, dst_pitch, yuv_type)) {
    417         return 0;
    418     }
    419 
    420     if (yuv_rgb_std(src_format, dst_format, width, height, y, u, v, y_stride, uv_stride, (Uint8*)dst, dst_pitch, yuv_type)) {
    421         return 0;
    422     }
    423 
    424     /* No fast path for the RGB format, instead convert using an intermediate buffer */
    425     if (dst_format != SDL_PIXELFORMAT_ARGB8888) {
    426         int ret;
    427         void *tmp;
    428         int tmp_pitch = (width * sizeof(Uint32));
    429 
    430         tmp = SDL_malloc(tmp_pitch * height);
    431         if (tmp == NULL) {
    432             return SDL_OutOfMemory();
    433         }
    434 
    435         /* convert src/src_format to tmp/ARGB8888 */
    436         ret = SDL_ConvertPixels_YUV_to_RGB(width, height, src_format, src, src_pitch, SDL_PIXELFORMAT_ARGB8888, tmp, tmp_pitch);
    437         if (ret < 0) {
    438             SDL_free(tmp);
    439             return ret;
    440         }
    441 
    442         /* convert tmp/ARGB8888 to dst/RGB */
    443         ret = SDL_ConvertPixels(width, height, SDL_PIXELFORMAT_ARGB8888, tmp, tmp_pitch, dst_format, dst, dst_pitch);
    444         SDL_free(tmp);
    445         return ret;
    446     }
    447 
    448     return SDL_SetError("Unsupported YUV conversion");
    449 }
    450 
    451 struct RGB2YUVFactors
    452 {
    453     int y_offset;
    454     float y[3]; /* Rfactor, Gfactor, Bfactor */
    455     float u[3]; /* Rfactor, Gfactor, Bfactor */
    456     float v[3]; /* Rfactor, Gfactor, Bfactor */
    457 };
    458 
    459 static int
    460 SDL_ConvertPixels_ARGB8888_to_YUV(int width, int height, const void *src, int src_pitch, Uint32 dst_format, void *dst, int dst_pitch)
    461 {
    462     const int src_pitch_x_2    = src_pitch * 2;
    463     const int height_half      = height / 2;
    464     const int height_remainder = (height & 0x1);
    465     const int width_half       = width / 2;
    466     const int width_remainder  = (width & 0x1);
    467     int i, j;
    468  
    469     static struct RGB2YUVFactors RGB2YUVFactorTables[SDL_YUV_CONVERSION_BT709 + 1] =
    470     {
    471         /* ITU-T T.871 (JPEG) */
    472         {
    473             0,
    474             {  0.2990f,  0.5870f,  0.1140f },
    475             { -0.1687f, -0.3313f,  0.5000f },
    476             {  0.5000f, -0.4187f, -0.0813f },
    477         },
    478         /* ITU-R BT.601-7 */
    479         {
    480             16,
    481             {  0.2568f,  0.5041f,  0.0979f },
    482             { -0.1482f, -0.2910f,  0.4392f },
    483             {  0.4392f, -0.3678f, -0.0714f },
    484         },
    485         /* ITU-R BT.709-6 */
    486         {
    487             16,
    488             { 0.1826f,  0.6142f,  0.0620f },
    489             {-0.1006f, -0.3386f,  0.4392f },
    490             { 0.4392f, -0.3989f, -0.0403f },
    491         },
    492     };
    493     const struct RGB2YUVFactors *cvt = &RGB2YUVFactorTables[SDL_GetYUVConversionModeForResolution(width, height)];
    494 
    495 #define MAKE_Y(r, g, b) (Uint8)((int)(cvt->y[0] * (r) + cvt->y[1] * (g) + cvt->y[2] * (b) + 0.5f) + cvt->y_offset)
    496 #define MAKE_U(r, g, b) (Uint8)((int)(cvt->u[0] * (r) + cvt->u[1] * (g) + cvt->u[2] * (b) + 0.5f) + 128)
    497 #define MAKE_V(r, g, b) (Uint8)((int)(cvt->v[0] * (r) + cvt->v[1] * (g) + cvt->v[2] * (b) + 0.5f) + 128)
    498 
    499 #define READ_2x2_PIXELS                                                                                         \
    500         const Uint32 p1 = ((const Uint32 *)curr_row)[2 * i];                                                    \
    501         const Uint32 p2 = ((const Uint32 *)curr_row)[2 * i + 1];                                                \
    502         const Uint32 p3 = ((const Uint32 *)next_row)[2 * i];                                                    \
    503         const Uint32 p4 = ((const Uint32 *)next_row)[2 * i + 1];                                                \
    504         const Uint32 r = ((p1 & 0x00ff0000) + (p2 & 0x00ff0000) + (p3 & 0x00ff0000) + (p4 & 0x00ff0000)) >> 18; \
    505         const Uint32 g = ((p1 & 0x0000ff00) + (p2 & 0x0000ff00) + (p3 & 0x0000ff00) + (p4 & 0x0000ff00)) >> 10; \
    506         const Uint32 b = ((p1 & 0x000000ff) + (p2 & 0x000000ff) + (p3 & 0x000000ff) + (p4 & 0x000000ff)) >> 2;  \
    507 
    508 #define READ_2x1_PIXELS                                                                                         \
    509         const Uint32 p1 = ((const Uint32 *)curr_row)[2 * i];                                                    \
    510         const Uint32 p2 = ((const Uint32 *)next_row)[2 * i];                                                    \
    511         const Uint32 r = ((p1 & 0x00ff0000) + (p2 & 0x00ff0000)) >> 17;                                         \
    512         const Uint32 g = ((p1 & 0x0000ff00) + (p2 & 0x0000ff00)) >> 9;                                          \
    513         const Uint32 b = ((p1 & 0x000000ff) + (p2 & 0x000000ff)) >> 1;                                          \
    514 
    515 #define READ_1x2_PIXELS                                                                                         \
    516         const Uint32 p1 = ((const Uint32 *)curr_row)[2 * i];                                                    \
    517         const Uint32 p2 = ((const Uint32 *)curr_row)[2 * i + 1];                                                \
    518         const Uint32 r = ((p1 & 0x00ff0000) + (p2 & 0x00ff0000)) >> 17;                                         \
    519         const Uint32 g = ((p1 & 0x0000ff00) + (p2 & 0x0000ff00)) >> 9;                                          \
    520         const Uint32 b = ((p1 & 0x000000ff) + (p2 & 0x000000ff)) >> 1;                                          \
    521 
    522 #define READ_1x1_PIXEL                                                                                          \
    523         const Uint32 p = ((const Uint32 *)curr_row)[2 * i];                                                     \
    524         const Uint32 r = (p & 0x00ff0000) >> 16;                                                                \
    525         const Uint32 g = (p & 0x0000ff00) >> 8;                                                                 \
    526         const Uint32 b = (p & 0x000000ff);                                                                      \
    527 
    528 #define READ_TWO_RGB_PIXELS                                                                                     \
    529         const Uint32 p = ((const Uint32 *)curr_row)[2 * i];                                                     \
    530         const Uint32 r = (p & 0x00ff0000) >> 16;                                                                \
    531         const Uint32 g = (p & 0x0000ff00) >> 8;                                                                 \
    532         const Uint32 b = (p & 0x000000ff);                                                                      \
    533         const Uint32 p1 = ((const Uint32 *)curr_row)[2 * i + 1];                                                \
    534         const Uint32 r1 = (p1 & 0x00ff0000) >> 16;                                                              \
    535         const Uint32 g1 = (p1 & 0x0000ff00) >> 8;                                                               \
    536         const Uint32 b1 = (p1 & 0x000000ff);                                                                    \
    537         const Uint32 R = (r + r1)/2;                                                                            \
    538         const Uint32 G = (g + g1)/2;                                                                            \
    539         const Uint32 B = (b + b1)/2;                                                                            \
    540 
    541 #define READ_ONE_RGB_PIXEL  READ_1x1_PIXEL
    542 
    543     switch (dst_format) 
    544     {
    545     case SDL_PIXELFORMAT_YV12:
    546     case SDL_PIXELFORMAT_IYUV:
    547     case SDL_PIXELFORMAT_NV12:
    548     case SDL_PIXELFORMAT_NV21:
    549         {
    550             const Uint8 *curr_row, *next_row;
    551             
    552             Uint8 *plane_y;
    553             Uint8 *plane_u;
    554             Uint8 *plane_v;
    555             Uint8 *plane_interleaved_uv;
    556             Uint32 y_stride, uv_stride, y_skip, uv_skip;
    557 
    558             GetYUVPlanes(width, height, dst_format, dst, dst_pitch,
    559                          (const Uint8 **)&plane_y, (const Uint8 **)&plane_u, (const Uint8 **)&plane_v,
    560                          &y_stride, &uv_stride);
    561             plane_interleaved_uv = (plane_y + height * y_stride);
    562             y_skip = (y_stride - width);
    563 
    564             curr_row = (const Uint8*)src;
    565 
    566             /* Write Y plane */
    567             for (j = 0; j < height; j++) {
    568                 for (i = 0; i < width; i++) {
    569                     const Uint32 p1 = ((const Uint32 *)curr_row)[i];
    570                     const Uint32 r = (p1 & 0x00ff0000) >> 16;
    571                     const Uint32 g = (p1 & 0x0000ff00) >> 8;
    572                     const Uint32 b = (p1 & 0x000000ff);
    573                     *plane_y++ = MAKE_Y(r, g, b);
    574                 }
    575                 plane_y += y_skip;
    576                 curr_row += src_pitch;
    577             }
    578 
    579             curr_row = (const Uint8*)src;
    580             next_row = (const Uint8*)src;
    581             next_row += src_pitch;
    582 
    583             if (dst_format == SDL_PIXELFORMAT_YV12 || dst_format == SDL_PIXELFORMAT_IYUV)
    584             {
    585                 /* Write UV planes, not interleaved */
    586                 uv_skip = (uv_stride - (width + 1)/2);
    587                 for (j = 0; j < height_half; j++) {
    588                     for (i = 0; i < width_half; i++) {
    589                         READ_2x2_PIXELS;
    590                         *plane_u++ = MAKE_U(r, g, b);
    591                         *plane_v++ = MAKE_V(r, g, b);
    592                     }
    593                     if (width_remainder) {
    594                         READ_2x1_PIXELS;
    595                         *plane_u++ = MAKE_U(r, g, b);
    596                         *plane_v++ = MAKE_V(r, g, b);
    597                     }
    598                     plane_u += uv_skip;
    599                     plane_v += uv_skip;
    600                     curr_row += src_pitch_x_2;
    601                     next_row += src_pitch_x_2;
    602                 }
    603                 if (height_remainder) {
    604                     for (i = 0; i < width_half; i++) {
    605                         READ_1x2_PIXELS;
    606                         *plane_u++ = MAKE_U(r, g, b);
    607                         *plane_v++ = MAKE_V(r, g, b);
    608                     }
    609                     if (width_remainder) {
    610                         READ_1x1_PIXEL;
    611                         *plane_u++ = MAKE_U(r, g, b);
    612                         *plane_v++ = MAKE_V(r, g, b);
    613                     }
    614                     plane_u += uv_skip;
    615                     plane_v += uv_skip;
    616                 }
    617             }
    618             else if (dst_format == SDL_PIXELFORMAT_NV12)
    619             {
    620                 uv_skip = (uv_stride - ((width + 1)/2)*2);
    621                 for (j = 0; j < height_half; j++) {
    622                     for (i = 0; i < width_half; i++) {
    623                         READ_2x2_PIXELS;
    624                         *plane_interleaved_uv++ = MAKE_U(r, g, b);
    625                         *plane_interleaved_uv++ = MAKE_V(r, g, b);
    626                     }
    627                     if (width_remainder) {
    628                         READ_2x1_PIXELS;
    629                         *plane_interleaved_uv++ = MAKE_U(r, g, b);
    630                         *plane_interleaved_uv++ = MAKE_V(r, g, b);
    631                     }
    632                     plane_interleaved_uv += uv_skip;
    633                     curr_row += src_pitch_x_2;
    634                     next_row += src_pitch_x_2;
    635                 }
    636                 if (height_remainder) {
    637                     for (i = 0; i < width_half; i++) {
    638                         READ_1x2_PIXELS;
    639                         *plane_interleaved_uv++ = MAKE_U(r, g, b);
    640                         *plane_interleaved_uv++ = MAKE_V(r, g, b);
    641                     }
    642                     if (width_remainder) {
    643                         READ_1x1_PIXEL;
    644                         *plane_interleaved_uv++ = MAKE_U(r, g, b);
    645                         *plane_interleaved_uv++ = MAKE_V(r, g, b);
    646                     }
    647                 }
    648             } 
    649             else /* dst_format == SDL_PIXELFORMAT_NV21 */
    650             {
    651                 uv_skip = (uv_stride - ((width + 1)/2)*2);
    652                 for (j = 0; j < height_half; j++) {
    653                     for (i = 0; i < width_half; i++) {
    654                         READ_2x2_PIXELS;
    655                         *plane_interleaved_uv++ = MAKE_V(r, g, b);
    656                         *plane_interleaved_uv++ = MAKE_U(r, g, b);
    657                     }
    658                     if (width_remainder) {
    659                         READ_2x1_PIXELS;
    660                         *plane_interleaved_uv++ = MAKE_V(r, g, b);
    661                         *plane_interleaved_uv++ = MAKE_U(r, g, b);
    662                     }
    663                     plane_interleaved_uv += uv_skip;
    664                     curr_row += src_pitch_x_2;
    665                     next_row += src_pitch_x_2;
    666                 }
    667                 if (height_remainder) {
    668                     for (i = 0; i < width_half; i++) {
    669                         READ_1x2_PIXELS;
    670                         *plane_interleaved_uv++ = MAKE_V(r, g, b);
    671                         *plane_interleaved_uv++ = MAKE_U(r, g, b);
    672                     }
    673                     if (width_remainder) {
    674                         READ_1x1_PIXEL;
    675                         *plane_interleaved_uv++ = MAKE_V(r, g, b);
    676                         *plane_interleaved_uv++ = MAKE_U(r, g, b);
    677                     }
    678                 }
    679             }
    680         }
    681         break;
    682 
    683     case SDL_PIXELFORMAT_YUY2:
    684     case SDL_PIXELFORMAT_UYVY:
    685     case SDL_PIXELFORMAT_YVYU:
    686         {
    687             const Uint8 *curr_row = (const Uint8*) src;
    688             Uint8 *plane           = (Uint8*) dst;
    689             const int row_size = (4 * ((width + 1) / 2));
    690             int plane_skip;
    691 
    692             if (dst_pitch < row_size) {
    693                 return SDL_SetError("Destination pitch is too small, expected at least %d\n", row_size);
    694             }
    695             plane_skip = (dst_pitch - row_size);
    696 
    697             /* Write YUV plane, packed */
    698             if (dst_format == SDL_PIXELFORMAT_YUY2) 
    699             {
    700                 for (j = 0; j < height; j++) {
    701                     for (i = 0; i < width_half; i++) {
    702                         READ_TWO_RGB_PIXELS;
    703                         /* Y U Y1 V */
    704                         *plane++ = MAKE_Y(r, g, b);
    705                         *plane++ = MAKE_U(R, G, B);
    706                         *plane++ = MAKE_Y(r1, g1, b1);
    707                         *plane++ = MAKE_V(R, G, B);
    708                     }
    709                     if (width_remainder) {
    710                         READ_ONE_RGB_PIXEL;
    711                         /* Y U Y V */
    712                         *plane++ = MAKE_Y(r, g, b);
    713                         *plane++ = MAKE_U(r, g, b);
    714                         *plane++ = MAKE_Y(r, g, b);
    715                         *plane++ = MAKE_V(r, g, b);
    716                     }
    717                     plane += plane_skip;
    718                     curr_row += src_pitch;
    719                 }
    720             } 
    721             else if (dst_format == SDL_PIXELFORMAT_UYVY)
    722             {
    723                 for (j = 0; j < height; j++) {
    724                     for (i = 0; i < width_half; i++) {
    725                         READ_TWO_RGB_PIXELS;
    726                         /* U Y V Y1 */
    727                         *plane++ = MAKE_U(R, G, B);
    728                         *plane++ = MAKE_Y(r, g, b);
    729                         *plane++ = MAKE_V(R, G, B);
    730                         *plane++ = MAKE_Y(r1, g1, b1);
    731                     }
    732                     if (width_remainder) {
    733                         READ_ONE_RGB_PIXEL;
    734                         /* U Y V Y */
    735                         *plane++ = MAKE_U(r, g, b);
    736                         *plane++ = MAKE_Y(r, g, b);
    737                         *plane++ = MAKE_V(r, g, b);
    738                         *plane++ = MAKE_Y(r, g, b);
    739                     }
    740                     plane += plane_skip;
    741                     curr_row += src_pitch;
    742                 }
    743             }
    744             else if (dst_format == SDL_PIXELFORMAT_YVYU)
    745             {
    746                 for (j = 0; j < height; j++) {
    747                     for (i = 0; i < width_half; i++) {
    748                         READ_TWO_RGB_PIXELS;
    749                         /* Y V Y1 U */
    750                         *plane++ = MAKE_Y(r, g, b);
    751                         *plane++ = MAKE_V(R, G, B);
    752                         *plane++ = MAKE_Y(r1, g1, b1);
    753                         *plane++ = MAKE_U(R, G, B);
    754                     }
    755                     if (width_remainder) {
    756                         READ_ONE_RGB_PIXEL;
    757                         /* Y V Y U */
    758                         *plane++ = MAKE_Y(r, g, b);
    759                         *plane++ = MAKE_V(r, g, b);
    760                         *plane++ = MAKE_Y(r, g, b);
    761                         *plane++ = MAKE_U(r, g, b);
    762                     }
    763                     plane += plane_skip;
    764                     curr_row += src_pitch;
    765                 }
    766             }
    767         }
    768         break;
    769 
    770     default:
    771         return SDL_SetError("Unsupported YUV destination format: %s", SDL_GetPixelFormatName(dst_format));
    772     }
    773 #undef MAKE_Y
    774 #undef MAKE_U
    775 #undef MAKE_V
    776 #undef READ_2x2_PIXELS
    777 #undef READ_2x1_PIXELS
    778 #undef READ_1x2_PIXELS
    779 #undef READ_1x1_PIXEL
    780 #undef READ_TWO_RGB_PIXELS
    781 #undef READ_ONE_RGB_PIXEL
    782     return 0;
    783 }
    784 
    785 int
    786 SDL_ConvertPixels_RGB_to_YUV(int width, int height,
    787          Uint32 src_format, const void *src, int src_pitch,
    788          Uint32 dst_format, void *dst, int dst_pitch)
    789 {
    790 #if 0 /* Doesn't handle odd widths */
    791     /* RGB24 to FOURCC */
    792     if (src_format == SDL_PIXELFORMAT_RGB24) {
    793         Uint8 *y;
    794         Uint8 *u;
    795         Uint8 *v;
    796         Uint32 y_stride;
    797         Uint32 uv_stride;
    798         YCbCrType yuv_type;
    799 
    800         if (GetYUVPlanes(width, height, dst_format, dst, dst_pitch, (const Uint8 **)&y, (const Uint8 **)&u, (const Uint8 **)&v, &y_stride, &uv_stride) < 0) {
    801             return -1;
    802         }
    803 
    804         if (GetYUVConversionType(width, height, &yuv_type) < 0) {
    805             return -1;
    806         }
    807 
    808         rgb24_yuv420_std(width, height, src, src_pitch, y, u, v, y_stride, uv_stride, yuv_type);
    809         return 0;
    810     }
    811 #endif
    812 
    813     /* ARGB8888 to FOURCC */
    814     if (src_format == SDL_PIXELFORMAT_ARGB8888) {
    815         return SDL_ConvertPixels_ARGB8888_to_YUV(width, height, src, src_pitch, dst_format, dst, dst_pitch);
    816     }
    817 
    818     /* not ARGB8888 to FOURCC : need an intermediate conversion */
    819     {
    820         int ret;
    821         void *tmp;
    822         int tmp_pitch = (width * sizeof(Uint32));
    823 
    824         tmp = SDL_malloc(tmp_pitch * height);
    825         if (tmp == NULL) {
    826             return SDL_OutOfMemory();
    827         }
    828 
    829         /* convert src/src_format to tmp/ARGB8888 */
    830         ret = SDL_ConvertPixels(width, height, src_format, src, src_pitch, SDL_PIXELFORMAT_ARGB8888, tmp, tmp_pitch);
    831         if (ret == -1) {
    832             SDL_free(tmp);
    833             return ret;
    834         }
    835 
    836         /* convert tmp/ARGB8888 to dst/FOURCC */
    837         ret = SDL_ConvertPixels_ARGB8888_to_YUV(width, height, tmp, tmp_pitch, dst_format, dst, dst_pitch);
    838         SDL_free(tmp);
    839         return ret;
    840     }
    841 }
    842 
    843 static int
    844 SDL_ConvertPixels_YUV_to_YUV_Copy(int width, int height, Uint32 format,
    845         const void *src, int src_pitch, void *dst, int dst_pitch)
    846 {
    847     int i;
    848 
    849     if (IsPlanar2x2Format(format)) {
    850         /* Y plane */
    851         for (i = height; i--;) {
    852             SDL_memcpy(dst, src, width);
    853             src = (const Uint8*)src + src_pitch;
    854             dst = (Uint8*)dst + dst_pitch;
    855         }
    856 
    857         if (format == SDL_PIXELFORMAT_YV12 || format == SDL_PIXELFORMAT_IYUV) {
    858             /* U and V planes are a quarter the size of the Y plane, rounded up */
    859             width = (width + 1) / 2;
    860             height = (height + 1) / 2;
    861             src_pitch = (src_pitch + 1) / 2;
    862             dst_pitch = (dst_pitch + 1) / 2;
    863             for (i = height * 2; i--;) {
    864                 SDL_memcpy(dst, src, width);
    865                 src = (const Uint8*)src + src_pitch;
    866                 dst = (Uint8*)dst + dst_pitch;
    867             }
    868         } else if (format == SDL_PIXELFORMAT_NV12 || format == SDL_PIXELFORMAT_NV21) {
    869             /* U/V plane is half the height of the Y plane, rounded up */
    870             height = (height + 1) / 2;
    871             width = ((width + 1) / 2)*2;
    872             src_pitch = ((src_pitch + 1) / 2)*2;
    873             dst_pitch = ((dst_pitch + 1) / 2)*2;
    874             for (i = height; i--;) {
    875                 SDL_memcpy(dst, src, width);
    876                 src = (const Uint8*)src + src_pitch;
    877                 dst = (Uint8*)dst + dst_pitch;
    878             }
    879         }
    880         return 0;
    881     }
    882 
    883     if (IsPacked4Format(format)) {
    884         /* Packed planes */
    885         width = 4 * ((width + 1) / 2);
    886         for (i = height; i--;) {
    887             SDL_memcpy(dst, src, width);
    888             src = (const Uint8*)src + src_pitch;
    889             dst = (Uint8*)dst + dst_pitch;
    890         }
    891         return 0;
    892     }
    893 
    894     return SDL_SetError("SDL_ConvertPixels_YUV_to_YUV_Copy: Unsupported YUV format: %s", SDL_GetPixelFormatName(format));
    895 }
    896 
    897 static int
    898 SDL_ConvertPixels_SwapUVPlanes(int width, int height, const void *src, int src_pitch, void *dst, int dst_pitch)
    899 {
    900     int y;
    901     const int UVwidth = (width + 1)/2;
    902     const int UVheight = (height + 1)/2;
    903 
    904     /* Skip the Y plane */
    905     src = (const Uint8 *)src + height * src_pitch;
    906     dst = (Uint8 *)dst + height * dst_pitch;
    907 
    908     if (src == dst) {
    909         int UVpitch = (dst_pitch + 1)/2;
    910         Uint8 *tmp;
    911         Uint8 *row1 = dst;
    912         Uint8 *row2 = (Uint8 *)dst + UVheight * UVpitch;
    913 
    914         /* Allocate a temporary row for the swap */
    915         tmp = (Uint8 *)SDL_malloc(UVwidth);
    916         if (!tmp) {
    917             return SDL_OutOfMemory();
    918         }
    919         for (y = 0; y < UVheight; ++y) {
    920             SDL_memcpy(tmp, row1, UVwidth);
    921             SDL_memcpy(row1, row2, UVwidth);
    922             SDL_memcpy(row2, tmp, UVwidth);
    923             row1 += UVpitch;
    924             row2 += UVpitch;
    925         }
    926         SDL_free(tmp);
    927     } else {
    928         const Uint8 *srcUV;
    929         Uint8 *dstUV;
    930         int srcUVPitch = ((src_pitch + 1)/2);
    931         int dstUVPitch = ((dst_pitch + 1)/2);
    932 
    933         /* Copy the first plane */
    934         srcUV = (const Uint8 *)src;
    935         dstUV = (Uint8 *)dst + UVheight * dstUVPitch;
    936         for (y = 0; y < UVheight; ++y) {
    937             SDL_memcpy(dstUV, srcUV, UVwidth);
    938             srcUV += srcUVPitch;
    939             dstUV += dstUVPitch;
    940         }
    941 
    942         /* Copy the second plane */
    943         dstUV = (Uint8 *)dst;
    944         for (y = 0; y < UVheight; ++y) {
    945             SDL_memcpy(dstUV, srcUV, UVwidth);
    946             srcUV += srcUVPitch;
    947             dstUV += dstUVPitch;
    948         }
    949     }
    950     return 0;
    951 }
    952 
    953 static int
    954 SDL_ConvertPixels_PackUVPlanes_to_NV(int width, int height, const void *src, int src_pitch, void *dst, int dst_pitch, SDL_bool reverseUV)
    955 {
    956     int x, y;
    957     const int UVwidth = (width + 1)/2;
    958     const int UVheight = (height + 1)/2;
    959     const int srcUVPitch = ((src_pitch + 1)/2);
    960     const int srcUVPitchLeft = srcUVPitch - UVwidth;
    961     const int dstUVPitch = ((dst_pitch + 1)/2)*2;
    962     const int dstUVPitchLeft = dstUVPitch - UVwidth*2;
    963     const Uint8 *src1, *src2;
    964     Uint8 *dstUV;
    965     Uint8 *tmp = NULL;
    966 #ifdef __SSE2__
    967     const SDL_bool use_SSE2 = SDL_HasSSE2();
    968 #endif
    969 
    970     /* Skip the Y plane */
    971     src = (const Uint8 *)src + height * src_pitch;
    972     dst = (Uint8 *)dst + height * dst_pitch;
    973 
    974     if (src == dst) {
    975         /* Need to make a copy of the buffer so we don't clobber it while converting */
    976         tmp = (Uint8 *)SDL_malloc(2*UVheight*srcUVPitch);
    977         if (!tmp) {
    978             return SDL_OutOfMemory();
    979         }
    980         SDL_memcpy(tmp, src, 2*UVheight*srcUVPitch);
    981         src = tmp;
    982     }
    983 
    984     if (reverseUV) {
    985         src2 = (const Uint8 *)src;
    986         src1 = src2 + UVheight * srcUVPitch;
    987     } else {
    988         src1 = (const Uint8 *)src;
    989         src2 = src1 + UVheight * srcUVPitch;
    990     }
    991     dstUV = (Uint8 *)dst;
    992 
    993     y = UVheight;
    994     while (y--) {
    995         x = UVwidth;
    996 #ifdef __SSE2__
    997         if (use_SSE2) {
    998             while (x >= 16) {
    999                 __m128i u = _mm_loadu_si128((__m128i *)src1);
   1000                 __m128i v = _mm_loadu_si128((__m128i *)src2);
   1001                 __m128i uv1 = _mm_unpacklo_epi8(u, v);
   1002                 __m128i uv2 = _mm_unpackhi_epi8(u, v);
   1003                 _mm_storeu_si128((__m128i*)dstUV, uv1);
   1004                 _mm_storeu_si128((__m128i*)(dstUV + 16), uv2);
   1005                 src1 += 16;
   1006                 src2 += 16;
   1007                 dstUV += 32;
   1008                 x -= 16;
   1009             }
   1010         }
   1011 #endif
   1012         while (x--) {
   1013             *dstUV++ = *src1++;
   1014             *dstUV++ = *src2++;
   1015         }
   1016         src1 += srcUVPitchLeft;
   1017         src2 += srcUVPitchLeft;
   1018         dstUV += dstUVPitchLeft;
   1019     }
   1020 
   1021     if (tmp) {
   1022         SDL_free(tmp);
   1023     }
   1024     return 0;
   1025 }
   1026 
   1027 static int
   1028 SDL_ConvertPixels_SplitNV_to_UVPlanes(int width, int height, const void *src, int src_pitch, void *dst, int dst_pitch, SDL_bool reverseUV)
   1029 {
   1030     int x, y;
   1031     const int UVwidth = (width + 1)/2;
   1032     const int UVheight = (height + 1)/2;
   1033     const int srcUVPitch = ((src_pitch + 1)/2)*2;
   1034     const int srcUVPitchLeft = srcUVPitch - UVwidth*2;
   1035     const int dstUVPitch = ((dst_pitch + 1)/2);
   1036     const int dstUVPitchLeft = dstUVPitch - UVwidth;
   1037     const Uint8 *srcUV;
   1038     Uint8 *dst1, *dst2;
   1039     Uint8 *tmp = NULL;
   1040 #ifdef __SSE2__
   1041     const SDL_bool use_SSE2 = SDL_HasSSE2();
   1042 #endif
   1043 
   1044     /* Skip the Y plane */
   1045     src = (const Uint8 *)src + height * src_pitch;
   1046     dst = (Uint8 *)dst + height * dst_pitch;
   1047 
   1048     if (src == dst) {
   1049         /* Need to make a copy of the buffer so we don't clobber it while converting */
   1050         tmp = (Uint8 *)SDL_malloc(UVheight*srcUVPitch);
   1051         if (!tmp) {
   1052             return SDL_OutOfMemory();
   1053         }
   1054         SDL_memcpy(tmp, src, UVheight*srcUVPitch);
   1055         src = tmp;
   1056     }
   1057 
   1058     if (reverseUV) {
   1059         dst2 = (Uint8 *)dst;
   1060         dst1 = dst2 + UVheight * dstUVPitch;
   1061     } else {
   1062         dst1 = (Uint8 *)dst;
   1063         dst2 = dst1 + UVheight * dstUVPitch;
   1064     }
   1065     srcUV = (const Uint8 *)src;
   1066 
   1067     y = UVheight;
   1068     while (y--) {
   1069         x = UVwidth;
   1070 #ifdef __SSE2__
   1071         if (use_SSE2) {
   1072             __m128i mask = _mm_set1_epi16(0x00FF);
   1073             while (x >= 16) {
   1074                 __m128i uv1 = _mm_loadu_si128((__m128i*)srcUV);
   1075                 __m128i uv2 = _mm_loadu_si128((__m128i*)(srcUV+16));
   1076                 __m128i u1 = _mm_and_si128(uv1, mask);
   1077                 __m128i u2 = _mm_and_si128(uv2, mask);
   1078                 __m128i u = _mm_packus_epi16(u1, u2);
   1079                 __m128i v1 = _mm_srli_epi16(uv1, 8);
   1080                 __m128i v2 = _mm_srli_epi16(uv2, 8);
   1081                 __m128i v = _mm_packus_epi16(v1, v2);
   1082                 _mm_storeu_si128((__m128i*)dst1, u);
   1083                 _mm_storeu_si128((__m128i*)dst2, v);
   1084                 srcUV += 32;
   1085                 dst1 += 16;
   1086                 dst2 += 16;
   1087                 x -= 16;
   1088             }
   1089         }
   1090 #endif
   1091         while (x--) {
   1092             *dst1++ = *srcUV++;
   1093             *dst2++ = *srcUV++;
   1094         }
   1095         srcUV += srcUVPitchLeft;
   1096         dst1 += dstUVPitchLeft;
   1097         dst2 += dstUVPitchLeft;
   1098     }
   1099 
   1100     if (tmp) {
   1101         SDL_free(tmp);
   1102     }
   1103     return 0;
   1104 }
   1105 
   1106 static int
   1107 SDL_ConvertPixels_SwapNV(int width, int height, const void *src, int src_pitch, void *dst, int dst_pitch)
   1108 {
   1109     int x, y;
   1110     const int UVwidth = (width + 1)/2;
   1111     const int UVheight = (height + 1)/2;
   1112     const int srcUVPitch = ((src_pitch + 1)/2)*2;
   1113     const int srcUVPitchLeft = (srcUVPitch - UVwidth*2)/sizeof(Uint16);
   1114     const int dstUVPitch = ((dst_pitch + 1)/2)*2;
   1115     const int dstUVPitchLeft = (dstUVPitch - UVwidth*2)/sizeof(Uint16);
   1116     const Uint16 *srcUV;
   1117     Uint16 *dstUV;
   1118 #ifdef __SSE2__
   1119     const SDL_bool use_SSE2 = SDL_HasSSE2();
   1120 #endif
   1121 
   1122     /* Skip the Y plane */
   1123     src = (const Uint8 *)src + height * src_pitch;
   1124     dst = (Uint8 *)dst + height * dst_pitch;
   1125 
   1126     srcUV = (const Uint16 *)src;
   1127     dstUV = (Uint16 *)dst;
   1128     y = UVheight;
   1129     while (y--) {
   1130         x = UVwidth;
   1131 #ifdef __SSE2__
   1132         if (use_SSE2) {
   1133             while (x >= 8) {
   1134                 __m128i uv = _mm_loadu_si128((__m128i*)srcUV);
   1135                 __m128i v = _mm_slli_epi16(uv, 8);
   1136                 __m128i u = _mm_srli_epi16(uv, 8);
   1137                 __m128i vu = _mm_or_si128(v, u);
   1138                 _mm_storeu_si128((__m128i*)dstUV, vu);
   1139                 srcUV += 8;
   1140                 dstUV += 8;
   1141                 x -= 8;
   1142             }
   1143         }
   1144 #endif
   1145         while (x--) {
   1146             *dstUV++ = SDL_Swap16(*srcUV++);
   1147         }
   1148         srcUV += srcUVPitchLeft;
   1149         dstUV += dstUVPitchLeft;
   1150     }
   1151     return 0;
   1152 }
   1153 
   1154 static int
   1155 SDL_ConvertPixels_Planar2x2_to_Planar2x2(int width, int height,
   1156          Uint32 src_format, const void *src, int src_pitch,
   1157          Uint32 dst_format, void *dst, int dst_pitch)
   1158 {
   1159     if (src != dst) {
   1160         /* Copy Y plane */
   1161         int i;
   1162         const Uint8 *srcY = (const Uint8 *)src;
   1163         Uint8 *dstY = (Uint8 *)dst;
   1164         for (i = height; i--; ) {
   1165             SDL_memcpy(dstY, srcY, width);
   1166             srcY += src_pitch;
   1167             dstY += dst_pitch;
   1168         }
   1169     }
   1170 
   1171     switch (src_format) {
   1172     case SDL_PIXELFORMAT_YV12:
   1173         switch (dst_format) {
   1174         case SDL_PIXELFORMAT_IYUV:
   1175             return SDL_ConvertPixels_SwapUVPlanes(width, height, src, src_pitch, dst, dst_pitch);
   1176         case SDL_PIXELFORMAT_NV12:
   1177             return SDL_ConvertPixels_PackUVPlanes_to_NV(width, height, src, src_pitch, dst, dst_pitch, SDL_TRUE);
   1178         case SDL_PIXELFORMAT_NV21:
   1179             return SDL_ConvertPixels_PackUVPlanes_to_NV(width, height, src, src_pitch, dst, dst_pitch, SDL_FALSE);
   1180         default:
   1181             break;
   1182         }
   1183         break;
   1184     case SDL_PIXELFORMAT_IYUV:
   1185         switch (dst_format) {
   1186         case SDL_PIXELFORMAT_YV12:
   1187             return SDL_ConvertPixels_SwapUVPlanes(width, height, src, src_pitch, dst, dst_pitch);
   1188         case SDL_PIXELFORMAT_NV12:
   1189             return SDL_ConvertPixels_PackUVPlanes_to_NV(width, height, src, src_pitch, dst, dst_pitch, SDL_FALSE);
   1190         case SDL_PIXELFORMAT_NV21:
   1191             return SDL_ConvertPixels_PackUVPlanes_to_NV(width, height, src, src_pitch, dst, dst_pitch, SDL_TRUE);
   1192         default:
   1193             break;
   1194         }
   1195         break;
   1196     case SDL_PIXELFORMAT_NV12:
   1197         switch (dst_format) {
   1198         case SDL_PIXELFORMAT_YV12:
   1199             return SDL_ConvertPixels_SplitNV_to_UVPlanes(width, height, src, src_pitch, dst, dst_pitch, SDL_TRUE);
   1200         case SDL_PIXELFORMAT_IYUV:
   1201             return SDL_ConvertPixels_SplitNV_to_UVPlanes(width, height, src, src_pitch, dst, dst_pitch, SDL_FALSE);
   1202         case SDL_PIXELFORMAT_NV21:
   1203             return SDL_ConvertPixels_SwapNV(width, height, src, src_pitch, dst, dst_pitch);
   1204         default:
   1205             break;
   1206         }
   1207         break;
   1208     case SDL_PIXELFORMAT_NV21:
   1209         switch (dst_format) {
   1210         case SDL_PIXELFORMAT_YV12:
   1211             return SDL_ConvertPixels_SplitNV_to_UVPlanes(width, height, src, src_pitch, dst, dst_pitch, SDL_FALSE);
   1212         case SDL_PIXELFORMAT_IYUV:
   1213             return SDL_ConvertPixels_SplitNV_to_UVPlanes(width, height, src, src_pitch, dst, dst_pitch, SDL_TRUE);
   1214         case SDL_PIXELFORMAT_NV12:
   1215             return SDL_ConvertPixels_SwapNV(width, height, src, src_pitch, dst, dst_pitch);
   1216         default:
   1217             break;
   1218         }
   1219         break;
   1220     default:
   1221         break;
   1222     }
   1223     return SDL_SetError("SDL_ConvertPixels_Planar2x2_to_Planar2x2: Unsupported YUV conversion: %s -> %s", SDL_GetPixelFormatName(src_format), SDL_GetPixelFormatName(dst_format));
   1224 }
   1225 
   1226 #ifdef __SSE2__
   1227 #define PACKED4_TO_PACKED4_ROW_SSE2(shuffle)                                                        \
   1228     while (x >= 4) {                                                                                \
   1229         __m128i yuv = _mm_loadu_si128((__m128i*)srcYUV);                                            \
   1230         __m128i lo = _mm_unpacklo_epi8(yuv, _mm_setzero_si128());                                   \
   1231         __m128i hi = _mm_unpackhi_epi8(yuv, _mm_setzero_si128());                                   \
   1232         lo = _mm_shufflelo_epi16(lo, shuffle);                                                      \
   1233         lo = _mm_shufflehi_epi16(lo, shuffle);                                                      \
   1234         hi = _mm_shufflelo_epi16(hi, shuffle);                                                      \
   1235         hi = _mm_shufflehi_epi16(hi, shuffle);                                                      \
   1236         yuv = _mm_packus_epi16(lo, hi);                                                             \
   1237         _mm_storeu_si128((__m128i*)dstYUV, yuv);                                                    \
   1238         srcYUV += 16;                                                                               \
   1239         dstYUV += 16;                                                                               \
   1240         x -= 4;                                                                                     \
   1241     }                                                                                               \
   1242 
   1243 #endif
   1244 
   1245 static int
   1246 SDL_ConvertPixels_YUY2_to_UYVY(int width, int height, const void *src, int src_pitch, void *dst, int dst_pitch)
   1247 {
   1248     int x, y;
   1249     const int YUVwidth = (width + 1)/2;
   1250     const int srcYUVPitchLeft = (src_pitch - YUVwidth*4);
   1251     const int dstYUVPitchLeft = (dst_pitch - YUVwidth*4);
   1252     const Uint8 *srcYUV = (const Uint8 *)src;
   1253     Uint8 *dstYUV = (Uint8 *)dst;
   1254 #ifdef __SSE2__
   1255     const SDL_bool use_SSE2 = SDL_HasSSE2();
   1256 #endif
   1257 
   1258     y = height;
   1259     while (y--) {
   1260         x = YUVwidth;
   1261 #ifdef __SSE2__
   1262         if (use_SSE2) {
   1263             PACKED4_TO_PACKED4_ROW_SSE2(_MM_SHUFFLE(2, 3, 0, 1));
   1264         }
   1265 #endif
   1266         while (x--) {
   1267             Uint8 Y1, U, Y2, V;
   1268 
   1269             Y1 = srcYUV[0];
   1270             U = srcYUV[1];
   1271             Y2 = srcYUV[2];
   1272             V = srcYUV[3];
   1273             srcYUV += 4;
   1274 
   1275             dstYUV[0] = U;
   1276             dstYUV[1] = Y1;
   1277             dstYUV[2] = V;
   1278             dstYUV[3] = Y2;
   1279             dstYUV += 4;
   1280         }
   1281         srcYUV += srcYUVPitchLeft;
   1282         dstYUV += dstYUVPitchLeft;
   1283     }
   1284     return 0;
   1285 }
   1286 
   1287 static int
   1288 SDL_ConvertPixels_YUY2_to_YVYU(int width, int height, const void *src, int src_pitch, void *dst, int dst_pitch)
   1289 {
   1290     int x, y;
   1291     const int YUVwidth = (width + 1)/2;
   1292     const int srcYUVPitchLeft = (src_pitch - YUVwidth*4);
   1293     const int dstYUVPitchLeft = (dst_pitch - YUVwidth*4);
   1294     const Uint8 *srcYUV = (const Uint8 *)src;
   1295     Uint8 *dstYUV = (Uint8 *)dst;
   1296 #ifdef __SSE2__
   1297     const SDL_bool use_SSE2 = SDL_HasSSE2();
   1298 #endif
   1299 
   1300     y = height;
   1301     while (y--) {
   1302         x = YUVwidth;
   1303 #ifdef __SSE2__
   1304         if (use_SSE2) {
   1305             PACKED4_TO_PACKED4_ROW_SSE2(_MM_SHUFFLE(1, 2, 3, 0));
   1306         }
   1307 #endif
   1308         while (x--) {
   1309             Uint8 Y1, U, Y2, V;
   1310 
   1311             Y1 = srcYUV[0];
   1312             U = srcYUV[1];
   1313             Y2 = srcYUV[2];
   1314             V = srcYUV[3];
   1315             srcYUV += 4;
   1316 
   1317             dstYUV[0] = Y1;
   1318             dstYUV[1] = V;
   1319             dstYUV[2] = Y2;
   1320             dstYUV[3] = U;
   1321             dstYUV += 4;
   1322         }
   1323         srcYUV += srcYUVPitchLeft;
   1324         dstYUV += dstYUVPitchLeft;
   1325     }
   1326     return 0;
   1327 }
   1328 
   1329 static int
   1330 SDL_ConvertPixels_UYVY_to_YUY2(int width, int height, const void *src, int src_pitch, void *dst, int dst_pitch)
   1331 {
   1332     int x, y;
   1333     const int YUVwidth = (width + 1)/2;
   1334     const int srcYUVPitchLeft = (src_pitch - YUVwidth*4);
   1335     const int dstYUVPitchLeft = (dst_pitch - YUVwidth*4);
   1336     const Uint8 *srcYUV = (const Uint8 *)src;
   1337     Uint8 *dstYUV = (Uint8 *)dst;
   1338 #ifdef __SSE2__
   1339     const SDL_bool use_SSE2 = SDL_HasSSE2();
   1340 #endif
   1341 
   1342     y = height;
   1343     while (y--) {
   1344         x = YUVwidth;
   1345 #ifdef __SSE2__
   1346         if (use_SSE2) {
   1347             PACKED4_TO_PACKED4_ROW_SSE2(_MM_SHUFFLE(2, 3, 0, 1));
   1348         }
   1349 #endif
   1350         while (x--) {
   1351             Uint8 Y1, U, Y2, V;
   1352 
   1353             U = srcYUV[0];
   1354             Y1 = srcYUV[1];
   1355             V = srcYUV[2];
   1356             Y2 = srcYUV[3];
   1357             srcYUV += 4;
   1358 
   1359             dstYUV[0] = Y1;
   1360             dstYUV[1] = U;
   1361             dstYUV[2] = Y2;
   1362             dstYUV[3] = V;
   1363             dstYUV += 4;
   1364         }
   1365         srcYUV += srcYUVPitchLeft;
   1366         dstYUV += dstYUVPitchLeft;
   1367     }
   1368     return 0;
   1369 }
   1370 
   1371 static int
   1372 SDL_ConvertPixels_UYVY_to_YVYU(int width, int height, const void *src, int src_pitch, void *dst, int dst_pitch)
   1373 {
   1374     int x, y;
   1375     const int YUVwidth = (width + 1)/2;
   1376     const int srcYUVPitchLeft = (src_pitch - YUVwidth*4);
   1377     const int dstYUVPitchLeft = (dst_pitch - YUVwidth*4);
   1378     const Uint8 *srcYUV = (const Uint8 *)src;
   1379     Uint8 *dstYUV = (Uint8 *)dst;
   1380 #ifdef __SSE2__
   1381     const SDL_bool use_SSE2 = SDL_HasSSE2();
   1382 #endif
   1383 
   1384     y = height;
   1385     while (y--) {
   1386         x = YUVwidth;
   1387 #ifdef __SSE2__
   1388         if (use_SSE2) {
   1389             PACKED4_TO_PACKED4_ROW_SSE2(_MM_SHUFFLE(0, 3, 2, 1));
   1390         }
   1391 #endif
   1392         while (x--) {
   1393             Uint8 Y1, U, Y2, V;
   1394 
   1395             U = srcYUV[0];
   1396             Y1 = srcYUV[1];
   1397             V = srcYUV[2];
   1398             Y2 = srcYUV[3];
   1399             srcYUV += 4;
   1400 
   1401             dstYUV[0] = Y1;
   1402             dstYUV[1] = V;
   1403             dstYUV[2] = Y2;
   1404             dstYUV[3] = U;
   1405             dstYUV += 4;
   1406         }
   1407         srcYUV += srcYUVPitchLeft;
   1408         dstYUV += dstYUVPitchLeft;
   1409     }
   1410     return 0;
   1411 }
   1412 
   1413 static int
   1414 SDL_ConvertPixels_YVYU_to_YUY2(int width, int height, const void *src, int src_pitch, void *dst, int dst_pitch)
   1415 {
   1416     int x, y;
   1417     const int YUVwidth = (width + 1)/2;
   1418     const int srcYUVPitchLeft = (src_pitch - YUVwidth*4);
   1419     const int dstYUVPitchLeft = (dst_pitch - YUVwidth*4);
   1420     const Uint8 *srcYUV = (const Uint8 *)src;
   1421     Uint8 *dstYUV = (Uint8 *)dst;
   1422 #ifdef __SSE2__
   1423     const SDL_bool use_SSE2 = SDL_HasSSE2();
   1424 #endif
   1425 
   1426     y = height;
   1427     while (y--) {
   1428         x = YUVwidth;
   1429 #ifdef __SSE2__
   1430         if (use_SSE2) {
   1431             PACKED4_TO_PACKED4_ROW_SSE2(_MM_SHUFFLE(1, 2, 3, 0));
   1432         }
   1433 #endif
   1434         while (x--) {
   1435             Uint8 Y1, U, Y2, V;
   1436 
   1437             Y1 = srcYUV[0];
   1438             V = srcYUV[1];
   1439             Y2 = srcYUV[2];
   1440             U = srcYUV[3];
   1441             srcYUV += 4;
   1442 
   1443             dstYUV[0] = Y1;
   1444             dstYUV[1] = U;
   1445             dstYUV[2] = Y2;
   1446             dstYUV[3] = V;
   1447             dstYUV += 4;
   1448         }
   1449         srcYUV += srcYUVPitchLeft;
   1450         dstYUV += dstYUVPitchLeft;
   1451     }
   1452     return 0;
   1453 }
   1454 
   1455 static int
   1456 SDL_ConvertPixels_YVYU_to_UYVY(int width, int height, const void *src, int src_pitch, void *dst, int dst_pitch)
   1457 {
   1458     int x, y;
   1459     const int YUVwidth = (width + 1)/2;
   1460     const int srcYUVPitchLeft = (src_pitch - YUVwidth*4);
   1461     const int dstYUVPitchLeft = (dst_pitch - YUVwidth*4);
   1462     const Uint8 *srcYUV = (const Uint8 *)src;
   1463     Uint8 *dstYUV = (Uint8 *)dst;
   1464 #ifdef __SSE2__
   1465     const SDL_bool use_SSE2 = SDL_HasSSE2();
   1466 #endif
   1467 
   1468     y = height;
   1469     while (y--) {
   1470         x = YUVwidth;
   1471 #ifdef __SSE2__
   1472         if (use_SSE2) {
   1473             PACKED4_TO_PACKED4_ROW_SSE2(_MM_SHUFFLE(2, 1, 0, 3));
   1474         }
   1475 #endif
   1476         while (x--) {
   1477             Uint8 Y1, U, Y2, V;
   1478 
   1479             Y1 = srcYUV[0];
   1480             V = srcYUV[1];
   1481             Y2 = srcYUV[2];
   1482             U = srcYUV[3];
   1483             srcYUV += 4;
   1484 
   1485             dstYUV[0] = U;
   1486             dstYUV[1] = Y1;
   1487             dstYUV[2] = V;
   1488             dstYUV[3] = Y2;
   1489             dstYUV += 4;
   1490         }
   1491         srcYUV += srcYUVPitchLeft;
   1492         dstYUV += dstYUVPitchLeft;
   1493     }
   1494     return 0;
   1495 }
   1496 
   1497 static int
   1498 SDL_ConvertPixels_Packed4_to_Packed4(int width, int height,
   1499          Uint32 src_format, const void *src, int src_pitch,
   1500          Uint32 dst_format, void *dst, int dst_pitch)
   1501 {
   1502     switch (src_format) {
   1503     case SDL_PIXELFORMAT_YUY2:
   1504         switch (dst_format) {
   1505         case SDL_PIXELFORMAT_UYVY:
   1506             return SDL_ConvertPixels_YUY2_to_UYVY(width, height, src, src_pitch, dst, dst_pitch);
   1507         case SDL_PIXELFORMAT_YVYU:
   1508             return SDL_ConvertPixels_YUY2_to_YVYU(width, height, src, src_pitch, dst, dst_pitch);
   1509         default:
   1510             break;
   1511         }
   1512         break;
   1513     case SDL_PIXELFORMAT_UYVY:
   1514         switch (dst_format) {
   1515         case SDL_PIXELFORMAT_YUY2:
   1516             return SDL_ConvertPixels_UYVY_to_YUY2(width, height, src, src_pitch, dst, dst_pitch);
   1517         case SDL_PIXELFORMAT_YVYU:
   1518             return SDL_ConvertPixels_UYVY_to_YVYU(width, height, src, src_pitch, dst, dst_pitch);
   1519         default:
   1520             break;
   1521         }
   1522         break;
   1523     case SDL_PIXELFORMAT_YVYU:
   1524         switch (dst_format) {
   1525         case SDL_PIXELFORMAT_YUY2:
   1526             return SDL_ConvertPixels_YVYU_to_YUY2(width, height, src, src_pitch, dst, dst_pitch);
   1527         case SDL_PIXELFORMAT_UYVY:
   1528             return SDL_ConvertPixels_YVYU_to_UYVY(width, height, src, src_pitch, dst, dst_pitch);
   1529         default:
   1530             break;
   1531         }
   1532         break;
   1533     default:
   1534         break;
   1535     }
   1536     return SDL_SetError("SDL_ConvertPixels_Packed4_to_Packed4: Unsupported YUV conversion: %s -> %s", SDL_GetPixelFormatName(src_format), SDL_GetPixelFormatName(dst_format));
   1537 }
   1538 
   1539 static int
   1540 SDL_ConvertPixels_Planar2x2_to_Packed4(int width, int height,
   1541          Uint32 src_format, const void *src, int src_pitch,
   1542          Uint32 dst_format, void *dst, int dst_pitch)
   1543 {
   1544     int x, y;
   1545     const Uint8 *srcY1, *srcY2, *srcU, *srcV;
   1546     Uint32 srcY_pitch, srcUV_pitch;
   1547     Uint32 srcY_pitch_left, srcUV_pitch_left, srcUV_pixel_stride;
   1548     Uint8 *dstY1, *dstY2, *dstU1, *dstU2, *dstV1, *dstV2;
   1549     Uint32 dstY_pitch, dstUV_pitch;
   1550     Uint32 dst_pitch_left;
   1551 
   1552     if (src == dst) {
   1553         return SDL_SetError("Can't change YUV plane types in-place");
   1554     }
   1555 
   1556     if (GetYUVPlanes(width, height, src_format, src, src_pitch,
   1557                      &srcY1, &srcU, &srcV, &srcY_pitch, &srcUV_pitch) < 0) {
   1558         return -1;
   1559     }
   1560     srcY2 = srcY1 + srcY_pitch;
   1561     srcY_pitch_left = (srcY_pitch - width);
   1562 
   1563     if (src_format == SDL_PIXELFORMAT_NV12 || src_format == SDL_PIXELFORMAT_NV21) {
   1564         srcUV_pixel_stride = 2;
   1565         srcUV_pitch_left = (srcUV_pitch - 2*((width + 1)/2));
   1566     } else {
   1567         srcUV_pixel_stride = 1;
   1568         srcUV_pitch_left = (srcUV_pitch - ((width + 1)/2));
   1569     }
   1570 
   1571     if (GetYUVPlanes(width, height, dst_format, dst, dst_pitch,
   1572                      (const Uint8 **)&dstY1, (const Uint8 **)&dstU1, (const Uint8 **)&dstV1,
   1573                      &dstY_pitch, &dstUV_pitch) < 0) {
   1574         return -1;
   1575     }
   1576     dstY2 = dstY1 + dstY_pitch;
   1577     dstU2 = dstU1 + dstUV_pitch;
   1578     dstV2 = dstV1 + dstUV_pitch;
   1579     dst_pitch_left = (dstY_pitch - 4*((width + 1)/2));
   1580 
   1581     /* Copy 2x2 blocks of pixels at a time */
   1582     for (y = 0; y < (height - 1); y += 2) {
   1583         for (x = 0; x < (width - 1); x += 2) {
   1584             /* Row 1 */
   1585             *dstY1 = *srcY1++;
   1586             dstY1 += 2;
   1587             *dstY1 = *srcY1++;
   1588             dstY1 += 2;
   1589             *dstU1 = *srcU;
   1590             *dstV1 = *srcV;
   1591 
   1592             /* Row 2 */
   1593             *dstY2 = *srcY2++;
   1594             dstY2 += 2;
   1595             *dstY2 = *srcY2++;
   1596             dstY2 += 2;
   1597             *dstU2 = *srcU;
   1598             *dstV2 = *srcV;
   1599 
   1600             srcU += srcUV_pixel_stride;
   1601             srcV += srcUV_pixel_stride;
   1602             dstU1 += 4;
   1603             dstU2 += 4;
   1604             dstV1 += 4;
   1605             dstV2 += 4;
   1606         }
   1607 
   1608         /* Last column */
   1609         if (x == (width - 1)) {
   1610             /* Row 1 */
   1611             *dstY1 = *srcY1;
   1612             dstY1 += 2;
   1613             *dstY1 = *srcY1++;
   1614             dstY1 += 2;
   1615             *dstU1 = *srcU;
   1616             *dstV1 = *srcV;
   1617 
   1618             /* Row 2 */
   1619             *dstY2 = *srcY2;
   1620             dstY2 += 2;
   1621             *dstY2 = *srcY2++;
   1622             dstY2 += 2;
   1623             *dstU2 = *srcU;
   1624             *dstV2 = *srcV;
   1625 
   1626             srcU += srcUV_pixel_stride;
   1627             srcV += srcUV_pixel_stride;
   1628             dstU1 += 4;
   1629             dstU2 += 4;
   1630             dstV1 += 4;
   1631             dstV2 += 4;
   1632         }
   1633 
   1634         srcY1 += srcY_pitch_left + srcY_pitch;
   1635         srcY2 += srcY_pitch_left + srcY_pitch;
   1636         srcU += srcUV_pitch_left;
   1637         srcV += srcUV_pitch_left;
   1638         dstY1 += dst_pitch_left + dstY_pitch;
   1639         dstY2 += dst_pitch_left + dstY_pitch;
   1640         dstU1 += dst_pitch_left + dstUV_pitch;
   1641         dstU2 += dst_pitch_left + dstUV_pitch;
   1642         dstV1 += dst_pitch_left + dstUV_pitch;
   1643         dstV2 += dst_pitch_left + dstUV_pitch;
   1644     }
   1645 
   1646     /* Last row */
   1647     if (y == (height - 1)) {
   1648         for (x = 0; x < (width - 1); x += 2) {
   1649             /* Row 1 */
   1650             *dstY1 = *srcY1++;
   1651             dstY1 += 2;
   1652             *dstY1 = *srcY1++;
   1653             dstY1 += 2;
   1654             *dstU1 = *srcU;
   1655             *dstV1 = *srcV;
   1656 
   1657             srcU += srcUV_pixel_stride;
   1658             srcV += srcUV_pixel_stride;
   1659             dstU1 += 4;
   1660             dstV1 += 4;
   1661         }
   1662 
   1663         /* Last column */
   1664         if (x == (width - 1)) {
   1665             /* Row 1 */
   1666             *dstY1 = *srcY1;
   1667             dstY1 += 2;
   1668             *dstY1 = *srcY1++;
   1669             dstY1 += 2;
   1670             *dstU1 = *srcU;
   1671             *dstV1 = *srcV;
   1672 
   1673             srcU += srcUV_pixel_stride;
   1674             srcV += srcUV_pixel_stride;
   1675             dstU1 += 4;
   1676             dstV1 += 4;
   1677         }
   1678     }
   1679     return 0;
   1680 }
   1681 
   1682 static int
   1683 SDL_ConvertPixels_Packed4_to_Planar2x2(int width, int height,
   1684          Uint32 src_format, const void *src, int src_pitch,
   1685          Uint32 dst_format, void *dst, int dst_pitch)
   1686 {
   1687     int x, y;
   1688     const Uint8 *srcY1, *srcY2, *srcU1, *srcU2, *srcV1, *srcV2;
   1689     Uint32 srcY_pitch, srcUV_pitch;
   1690     Uint32 src_pitch_left;
   1691     Uint8 *dstY1, *dstY2, *dstU, *dstV;
   1692     Uint32 dstY_pitch, dstUV_pitch;
   1693     Uint32 dstY_pitch_left, dstUV_pitch_left, dstUV_pixel_stride;
   1694 
   1695     if (src == dst) {
   1696         return SDL_SetError("Can't change YUV plane types in-place");
   1697     }
   1698 
   1699     if (GetYUVPlanes(width, height, src_format, src, src_pitch,
   1700                      &srcY1, &srcU1, &srcV1, &srcY_pitch, &srcUV_pitch) < 0) {
   1701         return -1;
   1702     }
   1703     srcY2 = srcY1 + srcY_pitch;
   1704     srcU2 = srcU1 + srcUV_pitch;
   1705     srcV2 = srcV1 + srcUV_pitch;
   1706     src_pitch_left = (srcY_pitch - 4*((width + 1)/2));
   1707 
   1708     if (GetYUVPlanes(width, height, dst_format, dst, dst_pitch,
   1709                      (const Uint8 **)&dstY1, (const Uint8 **)&dstU, (const Uint8 **)&dstV,
   1710                      &dstY_pitch, &dstUV_pitch) < 0) {
   1711         return -1;
   1712     }
   1713     dstY2 = dstY1 + dstY_pitch;
   1714     dstY_pitch_left = (dstY_pitch - width);
   1715 
   1716     if (dst_format == SDL_PIXELFORMAT_NV12 || dst_format == SDL_PIXELFORMAT_NV21) {
   1717         dstUV_pixel_stride = 2;
   1718         dstUV_pitch_left = (dstUV_pitch - 2*((width + 1)/2));
   1719     } else {
   1720         dstUV_pixel_stride = 1;
   1721         dstUV_pitch_left = (dstUV_pitch - ((width + 1)/2));
   1722     }
   1723 
   1724     /* Copy 2x2 blocks of pixels at a time */
   1725     for (y = 0; y < (height - 1); y += 2) {
   1726         for (x = 0; x < (width - 1); x += 2) {
   1727             /* Row 1 */
   1728             *dstY1++ = *srcY1;
   1729             srcY1 += 2;
   1730             *dstY1++ = *srcY1;
   1731             srcY1 += 2;
   1732 
   1733             /* Row 2 */
   1734             *dstY2++ = *srcY2;
   1735             srcY2 += 2;
   1736             *dstY2++ = *srcY2;
   1737             srcY2 += 2;
   1738 
   1739             *dstU = (Uint8)(((Uint32)*srcU1 + *srcU2)/2);
   1740             *dstV = (Uint8)(((Uint32)*srcV1 + *srcV2)/2);
   1741 
   1742             srcU1 += 4;
   1743             srcU2 += 4;
   1744             srcV1 += 4;
   1745             srcV2 += 4;
   1746             dstU += dstUV_pixel_stride;
   1747             dstV += dstUV_pixel_stride;
   1748         }
   1749 
   1750         /* Last column */
   1751         if (x == (width - 1)) {
   1752             /* Row 1 */
   1753             *dstY1 = *srcY1;
   1754             srcY1 += 2;
   1755             *dstY1++ = *srcY1;
   1756             srcY1 += 2;
   1757 
   1758             /* Row 2 */
   1759             *dstY2 = *srcY2;
   1760             srcY2 += 2;
   1761             *dstY2++ = *srcY2;
   1762             srcY2 += 2;
   1763 
   1764             *dstU = (Uint8)(((Uint32)*srcU1 + *srcU2)/2);
   1765             *dstV = (Uint8)(((Uint32)*srcV1 + *srcV2)/2);
   1766 
   1767             srcU1 += 4;
   1768             srcU2 += 4;
   1769             srcV1 += 4;
   1770             srcV2 += 4;
   1771             dstU += dstUV_pixel_stride;
   1772             dstV += dstUV_pixel_stride;
   1773         }
   1774 
   1775         srcY1 += src_pitch_left + srcY_pitch;
   1776         srcY2 += src_pitch_left + srcY_pitch;
   1777         srcU1 += src_pitch_left + srcUV_pitch;
   1778         srcU2 += src_pitch_left + srcUV_pitch;
   1779         srcV1 += src_pitch_left + srcUV_pitch;
   1780         srcV2 += src_pitch_left + srcUV_pitch;
   1781         dstY1 += dstY_pitch_left + dstY_pitch;
   1782         dstY2 += dstY_pitch_left + dstY_pitch;
   1783         dstU += dstUV_pitch_left;
   1784         dstV += dstUV_pitch_left;
   1785     }
   1786 
   1787     /* Last row */
   1788     if (y == (height - 1)) {
   1789         for (x = 0; x < (width - 1); x += 2) {
   1790             *dstY1++ = *srcY1;
   1791             srcY1 += 2;
   1792             *dstY1++ = *srcY1;
   1793             srcY1 += 2;
   1794 
   1795             *dstU = *srcU1;
   1796             *dstV = *srcV1;
   1797 
   1798             srcU1 += 4;
   1799             srcV1 += 4;
   1800             dstU += dstUV_pixel_stride;
   1801             dstV += dstUV_pixel_stride;
   1802         }
   1803 
   1804         /* Last column */
   1805         if (x == (width - 1)) {
   1806             *dstY1 = *srcY1;
   1807             *dstU = *srcU1;
   1808             *dstV = *srcV1;
   1809         }
   1810     }
   1811     return 0;
   1812 }
   1813 
   1814 #endif /* SDL_HAVE_YUV */
   1815 
   1816 int
   1817 SDL_ConvertPixels_YUV_to_YUV(int width, int height,
   1818          Uint32 src_format, const void *src, int src_pitch,
   1819          Uint32 dst_format, void *dst, int dst_pitch)
   1820 {
   1821 #if SDL_HAVE_YUV
   1822     if (src_format == dst_format) {
   1823         if (src == dst) {
   1824             /* Nothing to do */
   1825             return 0;
   1826         }
   1827         return SDL_ConvertPixels_YUV_to_YUV_Copy(width, height, src_format, src, src_pitch, dst, dst_pitch);
   1828     }
   1829 
   1830     if (IsPlanar2x2Format(src_format) && IsPlanar2x2Format(dst_format)) {
   1831         return SDL_ConvertPixels_Planar2x2_to_Planar2x2(width, height, src_format, src, src_pitch, dst_format, dst, dst_pitch);
   1832     } else if (IsPacked4Format(src_format) && IsPacked4Format(dst_format)) {
   1833         return SDL_ConvertPixels_Packed4_to_Packed4(width, height, src_format, src, src_pitch, dst_format, dst, dst_pitch);
   1834     } else if (IsPlanar2x2Format(src_format) && IsPacked4Format(dst_format)) {
   1835         return SDL_ConvertPixels_Planar2x2_to_Packed4(width, height, src_format, src, src_pitch, dst_format, dst, dst_pitch);
   1836     } else if (IsPacked4Format(src_format) && IsPlanar2x2Format(dst_format)) {
   1837         return SDL_ConvertPixels_Packed4_to_Planar2x2(width, height, src_format, src, src_pitch, dst_format, dst, dst_pitch);
   1838     } else {
   1839         return SDL_SetError("SDL_ConvertPixels_YUV_to_YUV: Unsupported YUV conversion: %s -> %s", SDL_GetPixelFormatName(src_format), SDL_GetPixelFormatName(dst_format));
   1840     }
   1841 #else
   1842 	return SDL_SetError("SDL not built with YUV support");
   1843 #endif
   1844 }
   1845 
   1846 /* vi: set ts=4 sw=4 expandtab: */