sdl

FORK: Simple Directmedia Layer
git clone https://git.neptards.moe/neptards/sdl.git
Log | Files | Refs

yuv_rgb.c (25673B)


      1 // Copyright 2016 Adrien Descamps
      2 // Distributed under BSD 3-Clause License
      3 #include "../../SDL_internal.h"
      4 
      5 #if SDL_HAVE_YUV
      6 
      7 #include "yuv_rgb.h"
      8 
      9 #include "SDL_cpuinfo.h"
     10 /*#include <x86intrin.h>*/
     11 
     12 #define PRECISION 6
     13 #define PRECISION_FACTOR (1<<PRECISION)
     14 
     15 typedef struct
     16 {
     17 	uint8_t y_shift;
     18 	int16_t matrix[3][3];
     19 } RGB2YUVParam;
     20 // |Y|   |y_shift|                        |matrix[0][0] matrix[0][1] matrix[0][2]|   |R|
     21 // |U| = |  128  | + 1/PRECISION_FACTOR * |matrix[1][0] matrix[1][1] matrix[1][2]| * |G|
     22 // |V|   |  128  |                        |matrix[2][0] matrix[2][1] matrix[2][2]|   |B|
     23 
     24 typedef struct
     25 {
     26 	uint8_t y_shift;
     27 	int16_t y_factor;
     28 	int16_t v_r_factor;
     29 	int16_t u_g_factor;
     30 	int16_t v_g_factor;
     31 	int16_t u_b_factor;
     32 } YUV2RGBParam;
     33 // |R|                        |y_factor      0       v_r_factor|   |Y-y_shift|
     34 // |G| = 1/PRECISION_FACTOR * |y_factor  u_g_factor  v_g_factor| * |  U-128  |
     35 // |B|                        |y_factor  u_b_factor      0     |   |  V-128  |
     36 
     37 #define V(value) (int16_t)((value*PRECISION_FACTOR)+0.5)
     38 
     39 // for ITU-T T.871, values can be found in section 7
     40 // for ITU-R BT.601-7 values are derived from equations in sections 2.5.1-2.5.3, assuming RGB is encoded using full range ([0-1]<->[0-255])
     41 // for ITU-R BT.709-6 values are derived from equations in sections 3.2-3.4, assuming RGB is encoded using full range ([0-1]<->[0-255])
     42 // all values are rounded to the fourth decimal
     43 
     44 static const YUV2RGBParam YUV2RGB[3] = {
     45 	// ITU-T T.871 (JPEG)
     46 	{/*.y_shift=*/ 0, /*.y_factor=*/ V(1.0), /*.v_r_factor=*/ V(1.402), /*.u_g_factor=*/ -V(0.3441), /*.v_g_factor=*/ -V(0.7141), /*.u_b_factor=*/ V(1.772)},
     47 	// ITU-R BT.601-7
     48 	{/*.y_shift=*/ 16, /*.y_factor=*/ V(1.1644), /*.v_r_factor=*/ V(1.596), /*.u_g_factor=*/ -V(0.3918), /*.v_g_factor=*/ -V(0.813), /*.u_b_factor=*/ V(2.0172)},
     49 	// ITU-R BT.709-6
     50 	{/*.y_shift=*/ 16, /*.y_factor=*/ V(1.1644), /*.v_r_factor=*/ V(1.7927), /*.u_g_factor=*/ -V(0.2132), /*.v_g_factor=*/ -V(0.5329), /*.u_b_factor=*/ V(2.1124)}
     51 };
     52 
     53 static const RGB2YUVParam RGB2YUV[3] = {
     54 	// ITU-T T.871 (JPEG)
     55 	{/*.y_shift=*/ 0, /*.matrix=*/ {{V(0.299), V(0.587), V(0.114)}, {-V(0.1687), -V(0.3313), V(0.5)}, {V(0.5), -V(0.4187), -V(0.0813)}}},
     56 	// ITU-R BT.601-7
     57 	{/*.y_shift=*/ 16, /*.matrix=*/ {{V(0.2568), V(0.5041), V(0.0979)}, {-V(0.1482), -V(0.291), V(0.4392)}, {V(0.4392), -V(0.3678), -V(0.0714)}}},
     58 	// ITU-R BT.709-6
     59 	{/*.y_shift=*/ 16, /*.matrix=*/ {{V(0.1826), V(0.6142), V(0.062)}, {-V(0.1006), -V(0.3386), V(0.4392)}, {V(0.4392), -V(0.3989), -V(0.0403)}}}
     60 };
     61 
     62 /* The various layouts of YUV data we support */
     63 #define YUV_FORMAT_420	1
     64 #define YUV_FORMAT_422	2
     65 #define YUV_FORMAT_NV12	3
     66 
     67 /* The various formats of RGB pixel that we support */
     68 #define RGB_FORMAT_RGB565	1
     69 #define RGB_FORMAT_RGB24	2
     70 #define RGB_FORMAT_RGBA		3
     71 #define RGB_FORMAT_BGRA		4
     72 #define RGB_FORMAT_ARGB		5
     73 #define RGB_FORMAT_ABGR		6
     74 
     75 // divide by PRECISION_FACTOR and clamp to [0:255] interval
     76 // input must be in the [-128*PRECISION_FACTOR:384*PRECISION_FACTOR] range
     77 static uint8_t clampU8(int32_t v)
     78 {
     79 	static const uint8_t lut[512] = 
     80 	{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
     81 	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
     82 	0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,
     83 	47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,
     84 	91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,
     85 	126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,
     86 	159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
     87 	192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,
     88 	225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255,
     89 	255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
     90 	255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
     91 	255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
     92 	255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255
     93 	};
     94 	return lut[(v+128*PRECISION_FACTOR)>>PRECISION];
     95 }
     96 
     97 
     98 #define STD_FUNCTION_NAME	yuv420_rgb565_std
     99 #define YUV_FORMAT			YUV_FORMAT_420
    100 #define RGB_FORMAT			RGB_FORMAT_RGB565
    101 #include "yuv_rgb_std_func.h"
    102 
    103 #define STD_FUNCTION_NAME	yuv420_rgb24_std
    104 #define YUV_FORMAT			YUV_FORMAT_420
    105 #define RGB_FORMAT			RGB_FORMAT_RGB24
    106 #include "yuv_rgb_std_func.h"
    107 
    108 #define STD_FUNCTION_NAME	yuv420_rgba_std
    109 #define YUV_FORMAT			YUV_FORMAT_420
    110 #define RGB_FORMAT			RGB_FORMAT_RGBA
    111 #include "yuv_rgb_std_func.h"
    112 
    113 #define STD_FUNCTION_NAME	yuv420_bgra_std
    114 #define YUV_FORMAT			YUV_FORMAT_420
    115 #define RGB_FORMAT			RGB_FORMAT_BGRA
    116 #include "yuv_rgb_std_func.h"
    117 
    118 #define STD_FUNCTION_NAME	yuv420_argb_std
    119 #define YUV_FORMAT			YUV_FORMAT_420
    120 #define RGB_FORMAT			RGB_FORMAT_ARGB
    121 #include "yuv_rgb_std_func.h"
    122 
    123 #define STD_FUNCTION_NAME	yuv420_abgr_std
    124 #define YUV_FORMAT			YUV_FORMAT_420
    125 #define RGB_FORMAT			RGB_FORMAT_ABGR
    126 #include "yuv_rgb_std_func.h"
    127 
    128 #define STD_FUNCTION_NAME	yuv422_rgb565_std
    129 #define YUV_FORMAT			YUV_FORMAT_422
    130 #define RGB_FORMAT			RGB_FORMAT_RGB565
    131 #include "yuv_rgb_std_func.h"
    132 
    133 #define STD_FUNCTION_NAME	yuv422_rgb24_std
    134 #define YUV_FORMAT			YUV_FORMAT_422
    135 #define RGB_FORMAT			RGB_FORMAT_RGB24
    136 #include "yuv_rgb_std_func.h"
    137 
    138 #define STD_FUNCTION_NAME	yuv422_rgba_std
    139 #define YUV_FORMAT			YUV_FORMAT_422
    140 #define RGB_FORMAT			RGB_FORMAT_RGBA
    141 #include "yuv_rgb_std_func.h"
    142 
    143 #define STD_FUNCTION_NAME	yuv422_bgra_std
    144 #define YUV_FORMAT			YUV_FORMAT_422
    145 #define RGB_FORMAT			RGB_FORMAT_BGRA
    146 #include "yuv_rgb_std_func.h"
    147 
    148 #define STD_FUNCTION_NAME	yuv422_argb_std
    149 #define YUV_FORMAT			YUV_FORMAT_422
    150 #define RGB_FORMAT			RGB_FORMAT_ARGB
    151 #include "yuv_rgb_std_func.h"
    152 
    153 #define STD_FUNCTION_NAME	yuv422_abgr_std
    154 #define YUV_FORMAT			YUV_FORMAT_422
    155 #define RGB_FORMAT			RGB_FORMAT_ABGR
    156 #include "yuv_rgb_std_func.h"
    157 
    158 #define STD_FUNCTION_NAME	yuvnv12_rgb565_std
    159 #define YUV_FORMAT			YUV_FORMAT_NV12
    160 #define RGB_FORMAT			RGB_FORMAT_RGB565
    161 #include "yuv_rgb_std_func.h"
    162 
    163 #define STD_FUNCTION_NAME	yuvnv12_rgb24_std
    164 #define YUV_FORMAT			YUV_FORMAT_NV12
    165 #define RGB_FORMAT			RGB_FORMAT_RGB24
    166 #include "yuv_rgb_std_func.h"
    167 
    168 #define STD_FUNCTION_NAME	yuvnv12_rgba_std
    169 #define YUV_FORMAT			YUV_FORMAT_NV12
    170 #define RGB_FORMAT			RGB_FORMAT_RGBA
    171 #include "yuv_rgb_std_func.h"
    172 
    173 #define STD_FUNCTION_NAME	yuvnv12_bgra_std
    174 #define YUV_FORMAT			YUV_FORMAT_NV12
    175 #define RGB_FORMAT			RGB_FORMAT_BGRA
    176 #include "yuv_rgb_std_func.h"
    177 
    178 #define STD_FUNCTION_NAME	yuvnv12_argb_std
    179 #define YUV_FORMAT			YUV_FORMAT_NV12
    180 #define RGB_FORMAT			RGB_FORMAT_ARGB
    181 #include "yuv_rgb_std_func.h"
    182 
    183 #define STD_FUNCTION_NAME	yuvnv12_abgr_std
    184 #define YUV_FORMAT			YUV_FORMAT_NV12
    185 #define RGB_FORMAT			RGB_FORMAT_ABGR
    186 #include "yuv_rgb_std_func.h"
    187 
    188 void rgb24_yuv420_std(
    189 	uint32_t width, uint32_t height, 
    190 	const uint8_t *RGB, uint32_t RGB_stride, 
    191 	uint8_t *Y, uint8_t *U, uint8_t *V, uint32_t Y_stride, uint32_t UV_stride, 
    192 	YCbCrType yuv_type)
    193 {
    194 	const RGB2YUVParam *const param = &(RGB2YUV[yuv_type]);
    195 	
    196 	uint32_t x, y;
    197 	for(y=0; y<(height-1); y+=2)
    198 	{
    199 		const uint8_t *rgb_ptr1=RGB+y*RGB_stride,
    200 			*rgb_ptr2=RGB+(y+1)*RGB_stride;
    201 			
    202 		uint8_t *y_ptr1=Y+y*Y_stride,
    203 			*y_ptr2=Y+(y+1)*Y_stride,
    204 			*u_ptr=U+(y/2)*UV_stride,
    205 			*v_ptr=V+(y/2)*UV_stride;
    206 		
    207 		for(x=0; x<(width-1); x+=2)
    208 		{
    209 			// compute yuv for the four pixels, u and v values are summed
    210 			int32_t y_tmp, u_tmp, v_tmp;
    211 			
    212 			y_tmp = param->matrix[0][0]*rgb_ptr1[0] + param->matrix[0][1]*rgb_ptr1[1] + param->matrix[0][2]*rgb_ptr1[2];
    213 			u_tmp = param->matrix[1][0]*rgb_ptr1[0] + param->matrix[1][1]*rgb_ptr1[1] + param->matrix[1][2]*rgb_ptr1[2];
    214 			v_tmp = param->matrix[2][0]*rgb_ptr1[0] + param->matrix[2][1]*rgb_ptr1[1] + param->matrix[2][2]*rgb_ptr1[2];
    215 			y_ptr1[0]=clampU8(y_tmp+((param->y_shift)<<PRECISION));
    216 			
    217 			y_tmp = param->matrix[0][0]*rgb_ptr1[3] + param->matrix[0][1]*rgb_ptr1[4] + param->matrix[0][2]*rgb_ptr1[5];
    218 			u_tmp += param->matrix[1][0]*rgb_ptr1[3] + param->matrix[1][1]*rgb_ptr1[4] + param->matrix[1][2]*rgb_ptr1[5];
    219 			v_tmp += param->matrix[2][0]*rgb_ptr1[3] + param->matrix[2][1]*rgb_ptr1[4] + param->matrix[2][2]*rgb_ptr1[5];
    220 			y_ptr1[1]=clampU8(y_tmp+((param->y_shift)<<PRECISION));
    221 			
    222 			y_tmp = param->matrix[0][0]*rgb_ptr2[0] + param->matrix[0][1]*rgb_ptr2[1] + param->matrix[0][2]*rgb_ptr2[2];
    223 			u_tmp += param->matrix[1][0]*rgb_ptr2[0] + param->matrix[1][1]*rgb_ptr2[1] + param->matrix[1][2]*rgb_ptr2[2];
    224 			v_tmp += param->matrix[2][0]*rgb_ptr2[0] + param->matrix[2][1]*rgb_ptr2[1] + param->matrix[2][2]*rgb_ptr2[2];
    225 			y_ptr2[0]=clampU8(y_tmp+((param->y_shift)<<PRECISION));
    226 			
    227 			y_tmp = param->matrix[0][0]*rgb_ptr2[3] + param->matrix[0][1]*rgb_ptr2[4] + param->matrix[0][2]*rgb_ptr2[5];
    228 			u_tmp += param->matrix[1][0]*rgb_ptr2[3] + param->matrix[1][1]*rgb_ptr2[4] + param->matrix[1][2]*rgb_ptr2[5];
    229 			v_tmp += param->matrix[2][0]*rgb_ptr2[3] + param->matrix[2][1]*rgb_ptr2[4] + param->matrix[2][2]*rgb_ptr2[5];
    230 			y_ptr2[1]=clampU8(y_tmp+((param->y_shift)<<PRECISION));
    231 			
    232 			u_ptr[0] = clampU8(u_tmp/4+(128<<PRECISION));
    233 			v_ptr[0] = clampU8(v_tmp/4+(128<<PRECISION));
    234 			
    235 			rgb_ptr1 += 6;
    236 			rgb_ptr2 += 6;
    237 			y_ptr1 += 2;
    238 			y_ptr2 += 2;
    239 			u_ptr += 1;
    240 			v_ptr += 1;
    241 		}
    242 	}
    243 }
    244 
    245 #ifdef __SSE2__
    246 
    247 #define SSE_FUNCTION_NAME	yuv420_rgb565_sse
    248 #define STD_FUNCTION_NAME	yuv420_rgb565_std
    249 #define YUV_FORMAT			YUV_FORMAT_420
    250 #define RGB_FORMAT			RGB_FORMAT_RGB565
    251 #define SSE_ALIGNED
    252 #include "yuv_rgb_sse_func.h"
    253 
    254 #define SSE_FUNCTION_NAME	yuv420_rgb565_sseu
    255 #define STD_FUNCTION_NAME	yuv420_rgb565_std
    256 #define YUV_FORMAT			YUV_FORMAT_420
    257 #define RGB_FORMAT			RGB_FORMAT_RGB565
    258 #include "yuv_rgb_sse_func.h"
    259 
    260 #define SSE_FUNCTION_NAME	yuv420_rgb24_sse
    261 #define STD_FUNCTION_NAME	yuv420_rgb24_std
    262 #define YUV_FORMAT			YUV_FORMAT_420
    263 #define RGB_FORMAT			RGB_FORMAT_RGB24
    264 #define SSE_ALIGNED
    265 #include "yuv_rgb_sse_func.h"
    266 
    267 #define SSE_FUNCTION_NAME	yuv420_rgb24_sseu
    268 #define STD_FUNCTION_NAME	yuv420_rgb24_std
    269 #define YUV_FORMAT			YUV_FORMAT_420
    270 #define RGB_FORMAT			RGB_FORMAT_RGB24
    271 #include "yuv_rgb_sse_func.h"
    272 
    273 #define SSE_FUNCTION_NAME	yuv420_rgba_sse
    274 #define STD_FUNCTION_NAME	yuv420_rgba_std
    275 #define YUV_FORMAT			YUV_FORMAT_420
    276 #define RGB_FORMAT			RGB_FORMAT_RGBA
    277 #define SSE_ALIGNED
    278 #include "yuv_rgb_sse_func.h"
    279 
    280 #define SSE_FUNCTION_NAME	yuv420_rgba_sseu
    281 #define STD_FUNCTION_NAME	yuv420_rgba_std
    282 #define YUV_FORMAT			YUV_FORMAT_420
    283 #define RGB_FORMAT			RGB_FORMAT_RGBA
    284 #include "yuv_rgb_sse_func.h"
    285 
    286 #define SSE_FUNCTION_NAME	yuv420_bgra_sse
    287 #define STD_FUNCTION_NAME	yuv420_bgra_std
    288 #define YUV_FORMAT			YUV_FORMAT_420
    289 #define RGB_FORMAT			RGB_FORMAT_BGRA
    290 #define SSE_ALIGNED
    291 #include "yuv_rgb_sse_func.h"
    292 
    293 #define SSE_FUNCTION_NAME	yuv420_bgra_sseu
    294 #define STD_FUNCTION_NAME	yuv420_bgra_std
    295 #define YUV_FORMAT			YUV_FORMAT_420
    296 #define RGB_FORMAT			RGB_FORMAT_BGRA
    297 #include "yuv_rgb_sse_func.h"
    298 
    299 #define SSE_FUNCTION_NAME	yuv420_argb_sse
    300 #define STD_FUNCTION_NAME	yuv420_argb_std
    301 #define YUV_FORMAT			YUV_FORMAT_420
    302 #define RGB_FORMAT			RGB_FORMAT_ARGB
    303 #define SSE_ALIGNED
    304 #include "yuv_rgb_sse_func.h"
    305 
    306 #define SSE_FUNCTION_NAME	yuv420_argb_sseu
    307 #define STD_FUNCTION_NAME	yuv420_argb_std
    308 #define YUV_FORMAT			YUV_FORMAT_420
    309 #define RGB_FORMAT			RGB_FORMAT_ARGB
    310 #include "yuv_rgb_sse_func.h"
    311 
    312 #define SSE_FUNCTION_NAME	yuv420_abgr_sse
    313 #define STD_FUNCTION_NAME	yuv420_abgr_std
    314 #define YUV_FORMAT			YUV_FORMAT_420
    315 #define RGB_FORMAT			RGB_FORMAT_ABGR
    316 #define SSE_ALIGNED
    317 #include "yuv_rgb_sse_func.h"
    318 
    319 #define SSE_FUNCTION_NAME	yuv420_abgr_sseu
    320 #define STD_FUNCTION_NAME	yuv420_abgr_std
    321 #define YUV_FORMAT			YUV_FORMAT_420
    322 #define RGB_FORMAT			RGB_FORMAT_ABGR
    323 #include "yuv_rgb_sse_func.h"
    324 
    325 #define SSE_FUNCTION_NAME	yuv422_rgb565_sse
    326 #define STD_FUNCTION_NAME	yuv422_rgb565_std
    327 #define YUV_FORMAT			YUV_FORMAT_422
    328 #define RGB_FORMAT			RGB_FORMAT_RGB565
    329 #define SSE_ALIGNED
    330 #include "yuv_rgb_sse_func.h"
    331 
    332 #define SSE_FUNCTION_NAME	yuv422_rgb565_sseu
    333 #define STD_FUNCTION_NAME	yuv422_rgb565_std
    334 #define YUV_FORMAT			YUV_FORMAT_422
    335 #define RGB_FORMAT			RGB_FORMAT_RGB565
    336 #include "yuv_rgb_sse_func.h"
    337 
    338 #define SSE_FUNCTION_NAME	yuv422_rgb24_sse
    339 #define STD_FUNCTION_NAME	yuv422_rgb24_std
    340 #define YUV_FORMAT			YUV_FORMAT_422
    341 #define RGB_FORMAT			RGB_FORMAT_RGB24
    342 #define SSE_ALIGNED
    343 #include "yuv_rgb_sse_func.h"
    344 
    345 #define SSE_FUNCTION_NAME	yuv422_rgb24_sseu
    346 #define STD_FUNCTION_NAME	yuv422_rgb24_std
    347 #define YUV_FORMAT			YUV_FORMAT_422
    348 #define RGB_FORMAT			RGB_FORMAT_RGB24
    349 #include "yuv_rgb_sse_func.h"
    350 
    351 #define SSE_FUNCTION_NAME	yuv422_rgba_sse
    352 #define STD_FUNCTION_NAME	yuv422_rgba_std
    353 #define YUV_FORMAT			YUV_FORMAT_422
    354 #define RGB_FORMAT			RGB_FORMAT_RGBA
    355 #define SSE_ALIGNED
    356 #include "yuv_rgb_sse_func.h"
    357 
    358 #define SSE_FUNCTION_NAME	yuv422_rgba_sseu
    359 #define STD_FUNCTION_NAME	yuv422_rgba_std
    360 #define YUV_FORMAT			YUV_FORMAT_422
    361 #define RGB_FORMAT			RGB_FORMAT_RGBA
    362 #include "yuv_rgb_sse_func.h"
    363 
    364 #define SSE_FUNCTION_NAME	yuv422_bgra_sse
    365 #define STD_FUNCTION_NAME	yuv422_bgra_std
    366 #define YUV_FORMAT			YUV_FORMAT_422
    367 #define RGB_FORMAT			RGB_FORMAT_BGRA
    368 #define SSE_ALIGNED
    369 #include "yuv_rgb_sse_func.h"
    370 
    371 #define SSE_FUNCTION_NAME	yuv422_bgra_sseu
    372 #define STD_FUNCTION_NAME	yuv422_bgra_std
    373 #define YUV_FORMAT			YUV_FORMAT_422
    374 #define RGB_FORMAT			RGB_FORMAT_BGRA
    375 #include "yuv_rgb_sse_func.h"
    376 
    377 #define SSE_FUNCTION_NAME	yuv422_argb_sse
    378 #define STD_FUNCTION_NAME	yuv422_argb_std
    379 #define YUV_FORMAT			YUV_FORMAT_422
    380 #define RGB_FORMAT			RGB_FORMAT_ARGB
    381 #define SSE_ALIGNED
    382 #include "yuv_rgb_sse_func.h"
    383 
    384 #define SSE_FUNCTION_NAME	yuv422_argb_sseu
    385 #define STD_FUNCTION_NAME	yuv422_argb_std
    386 #define YUV_FORMAT			YUV_FORMAT_422
    387 #define RGB_FORMAT			RGB_FORMAT_ARGB
    388 #include "yuv_rgb_sse_func.h"
    389 
    390 #define SSE_FUNCTION_NAME	yuv422_abgr_sse
    391 #define STD_FUNCTION_NAME	yuv422_abgr_std
    392 #define YUV_FORMAT			YUV_FORMAT_422
    393 #define RGB_FORMAT			RGB_FORMAT_ABGR
    394 #define SSE_ALIGNED
    395 #include "yuv_rgb_sse_func.h"
    396 
    397 #define SSE_FUNCTION_NAME	yuv422_abgr_sseu
    398 #define STD_FUNCTION_NAME	yuv422_abgr_std
    399 #define YUV_FORMAT			YUV_FORMAT_422
    400 #define RGB_FORMAT			RGB_FORMAT_ABGR
    401 #include "yuv_rgb_sse_func.h"
    402 
    403 #define SSE_FUNCTION_NAME	yuvnv12_rgb565_sse
    404 #define STD_FUNCTION_NAME	yuvnv12_rgb565_std
    405 #define YUV_FORMAT			YUV_FORMAT_NV12
    406 #define RGB_FORMAT			RGB_FORMAT_RGB565
    407 #define SSE_ALIGNED
    408 #include "yuv_rgb_sse_func.h"
    409 
    410 #define SSE_FUNCTION_NAME	yuvnv12_rgb565_sseu
    411 #define STD_FUNCTION_NAME	yuvnv12_rgb565_std
    412 #define YUV_FORMAT			YUV_FORMAT_NV12
    413 #define RGB_FORMAT			RGB_FORMAT_RGB565
    414 #include "yuv_rgb_sse_func.h"
    415 
    416 #define SSE_FUNCTION_NAME	yuvnv12_rgb24_sse
    417 #define STD_FUNCTION_NAME	yuvnv12_rgb24_std
    418 #define YUV_FORMAT			YUV_FORMAT_NV12
    419 #define RGB_FORMAT			RGB_FORMAT_RGB24
    420 #define SSE_ALIGNED
    421 #include "yuv_rgb_sse_func.h"
    422 
    423 #define SSE_FUNCTION_NAME	yuvnv12_rgb24_sseu
    424 #define STD_FUNCTION_NAME	yuvnv12_rgb24_std
    425 #define YUV_FORMAT			YUV_FORMAT_NV12
    426 #define RGB_FORMAT			RGB_FORMAT_RGB24
    427 #include "yuv_rgb_sse_func.h"
    428 
    429 #define SSE_FUNCTION_NAME	yuvnv12_rgba_sse
    430 #define STD_FUNCTION_NAME	yuvnv12_rgba_std
    431 #define YUV_FORMAT			YUV_FORMAT_NV12
    432 #define RGB_FORMAT			RGB_FORMAT_RGBA
    433 #define SSE_ALIGNED
    434 #include "yuv_rgb_sse_func.h"
    435 
    436 #define SSE_FUNCTION_NAME	yuvnv12_rgba_sseu
    437 #define STD_FUNCTION_NAME	yuvnv12_rgba_std
    438 #define YUV_FORMAT			YUV_FORMAT_NV12
    439 #define RGB_FORMAT			RGB_FORMAT_RGBA
    440 #include "yuv_rgb_sse_func.h"
    441 
    442 #define SSE_FUNCTION_NAME	yuvnv12_bgra_sse
    443 #define STD_FUNCTION_NAME	yuvnv12_bgra_std
    444 #define YUV_FORMAT			YUV_FORMAT_NV12
    445 #define RGB_FORMAT			RGB_FORMAT_BGRA
    446 #define SSE_ALIGNED
    447 #include "yuv_rgb_sse_func.h"
    448 
    449 #define SSE_FUNCTION_NAME	yuvnv12_bgra_sseu
    450 #define STD_FUNCTION_NAME	yuvnv12_bgra_std
    451 #define YUV_FORMAT			YUV_FORMAT_NV12
    452 #define RGB_FORMAT			RGB_FORMAT_BGRA
    453 #include "yuv_rgb_sse_func.h"
    454 
    455 #define SSE_FUNCTION_NAME	yuvnv12_argb_sse
    456 #define STD_FUNCTION_NAME	yuvnv12_argb_std
    457 #define YUV_FORMAT			YUV_FORMAT_NV12
    458 #define RGB_FORMAT			RGB_FORMAT_ARGB
    459 #define SSE_ALIGNED
    460 #include "yuv_rgb_sse_func.h"
    461 
    462 #define SSE_FUNCTION_NAME	yuvnv12_argb_sseu
    463 #define STD_FUNCTION_NAME	yuvnv12_argb_std
    464 #define YUV_FORMAT			YUV_FORMAT_NV12
    465 #define RGB_FORMAT			RGB_FORMAT_ARGB
    466 #include "yuv_rgb_sse_func.h"
    467 
    468 #define SSE_FUNCTION_NAME	yuvnv12_abgr_sse
    469 #define STD_FUNCTION_NAME	yuvnv12_abgr_std
    470 #define YUV_FORMAT			YUV_FORMAT_NV12
    471 #define RGB_FORMAT			RGB_FORMAT_ABGR
    472 #define SSE_ALIGNED
    473 #include "yuv_rgb_sse_func.h"
    474 
    475 #define SSE_FUNCTION_NAME	yuvnv12_abgr_sseu
    476 #define STD_FUNCTION_NAME	yuvnv12_abgr_std
    477 #define YUV_FORMAT			YUV_FORMAT_NV12
    478 #define RGB_FORMAT			RGB_FORMAT_ABGR
    479 #include "yuv_rgb_sse_func.h"
    480 
    481 
    482 #define UNPACK_RGB24_32_STEP1(RGB1, RGB2, RGB3, RGB4, RGB5, RGB6, R1, R2, G1, G2, B1, B2) \
    483 R1 = _mm_unpacklo_epi8(RGB1, RGB4); \
    484 R2 = _mm_unpackhi_epi8(RGB1, RGB4); \
    485 G1 = _mm_unpacklo_epi8(RGB2, RGB5); \
    486 G2 = _mm_unpackhi_epi8(RGB2, RGB5); \
    487 B1 = _mm_unpacklo_epi8(RGB3, RGB6); \
    488 B2 = _mm_unpackhi_epi8(RGB3, RGB6);
    489 
    490 #define UNPACK_RGB24_32_STEP2(RGB1, RGB2, RGB3, RGB4, RGB5, RGB6, R1, R2, G1, G2, B1, B2) \
    491 RGB1 = _mm_unpacklo_epi8(R1, G2); \
    492 RGB2 = _mm_unpackhi_epi8(R1, G2); \
    493 RGB3 = _mm_unpacklo_epi8(R2, B1); \
    494 RGB4 = _mm_unpackhi_epi8(R2, B1); \
    495 RGB5 = _mm_unpacklo_epi8(G1, B2); \
    496 RGB6 = _mm_unpackhi_epi8(G1, B2); \
    497 
    498 #define UNPACK_RGB24_32(RGB1, RGB2, RGB3, RGB4, RGB5, RGB6, R1, R2, G1, G2, B1, B2) \
    499 UNPACK_RGB24_32_STEP1(RGB1, RGB2, RGB3, RGB4, RGB5, RGB6, R1, R2, G1, G2, B1, B2) \
    500 UNPACK_RGB24_32_STEP2(RGB1, RGB2, RGB3, RGB4, RGB5, RGB6, R1, R2, G1, G2, B1, B2) \
    501 UNPACK_RGB24_32_STEP1(RGB1, RGB2, RGB3, RGB4, RGB5, RGB6, R1, R2, G1, G2, B1, B2) \
    502 UNPACK_RGB24_32_STEP2(RGB1, RGB2, RGB3, RGB4, RGB5, RGB6, R1, R2, G1, G2, B1, B2) \
    503 UNPACK_RGB24_32_STEP1(RGB1, RGB2, RGB3, RGB4, RGB5, RGB6, R1, R2, G1, G2, B1, B2) \
    504 
    505 #define RGB2YUV_16(R, G, B, Y, U, V) \
    506 Y = _mm_add_epi16(_mm_mullo_epi16(R, _mm_set1_epi16(param->matrix[0][0])), \
    507 		_mm_mullo_epi16(G, _mm_set1_epi16(param->matrix[0][1]))); \
    508 Y = _mm_add_epi16(Y, _mm_mullo_epi16(B, _mm_set1_epi16(param->matrix[0][2]))); \
    509 Y = _mm_add_epi16(Y, _mm_set1_epi16((param->y_shift)<<PRECISION)); \
    510 Y = _mm_srai_epi16(Y, PRECISION); \
    511 U = _mm_add_epi16(_mm_mullo_epi16(R, _mm_set1_epi16(param->matrix[1][0])), \
    512 		_mm_mullo_epi16(G, _mm_set1_epi16(param->matrix[1][1]))); \
    513 U = _mm_add_epi16(U, _mm_mullo_epi16(B, _mm_set1_epi16(param->matrix[1][2]))); \
    514 U = _mm_add_epi16(U, _mm_set1_epi16(128<<PRECISION)); \
    515 U = _mm_srai_epi16(U, PRECISION); \
    516 V = _mm_add_epi16(_mm_mullo_epi16(R, _mm_set1_epi16(param->matrix[2][0])), \
    517 		_mm_mullo_epi16(G, _mm_set1_epi16(param->matrix[2][1]))); \
    518 V = _mm_add_epi16(V, _mm_mullo_epi16(B, _mm_set1_epi16(param->matrix[2][2]))); \
    519 V = _mm_add_epi16(V, _mm_set1_epi16(128<<PRECISION)); \
    520 V = _mm_srai_epi16(V, PRECISION);
    521 
    522 #define RGB2YUV_32 \
    523 	__m128i r1, r2, b1, b2, g1, g2; \
    524 	__m128i r_16, g_16, b_16; \
    525 	__m128i y1_16, y2_16, u1_16, u2_16, v1_16, v2_16, y, u1, u2, v1, v2, u1_tmp, u2_tmp, v1_tmp, v2_tmp; \
    526 	__m128i rgb1 = LOAD_SI128((const __m128i*)(rgb_ptr1)), \
    527 		rgb2 = LOAD_SI128((const __m128i*)(rgb_ptr1+16)), \
    528 		rgb3 = LOAD_SI128((const __m128i*)(rgb_ptr1+32)), \
    529 		rgb4 = LOAD_SI128((const __m128i*)(rgb_ptr2)), \
    530 		rgb5 = LOAD_SI128((const __m128i*)(rgb_ptr2+16)), \
    531 		rgb6 = LOAD_SI128((const __m128i*)(rgb_ptr2+32)); \
    532 	/* unpack rgb24 data to r, g and b data in separate channels*/ \
    533 	UNPACK_RGB24_32(rgb1, rgb2, rgb3, rgb4, rgb5, rgb6, r1, r2, g1, g2, b1, b2) \
    534 	/* process pixels of first line */ \
    535 	r_16 = _mm_unpacklo_epi8(r1, _mm_setzero_si128()); \
    536 	g_16 = _mm_unpacklo_epi8(g1, _mm_setzero_si128()); \
    537 	b_16 = _mm_unpacklo_epi8(b1, _mm_setzero_si128()); \
    538 	RGB2YUV_16(r_16, g_16, b_16, y1_16, u1_16, v1_16) \
    539 	r_16 = _mm_unpackhi_epi8(r1, _mm_setzero_si128()); \
    540 	g_16 = _mm_unpackhi_epi8(g1, _mm_setzero_si128()); \
    541 	b_16 = _mm_unpackhi_epi8(b1, _mm_setzero_si128()); \
    542 	RGB2YUV_16(r_16, g_16, b_16, y2_16, u2_16, v2_16) \
    543 	y = _mm_packus_epi16(y1_16, y2_16); \
    544 	u1 = _mm_packus_epi16(u1_16, u2_16); \
    545 	v1 = _mm_packus_epi16(v1_16, v2_16); \
    546 	/* save Y values */ \
    547 	SAVE_SI128((__m128i*)(y_ptr1), y); \
    548 	/* process pixels of second line */ \
    549 	r_16 = _mm_unpacklo_epi8(r2, _mm_setzero_si128()); \
    550 	g_16 = _mm_unpacklo_epi8(g2, _mm_setzero_si128()); \
    551 	b_16 = _mm_unpacklo_epi8(b2, _mm_setzero_si128()); \
    552 	RGB2YUV_16(r_16, g_16, b_16, y1_16, u1_16, v1_16) \
    553 	r_16 = _mm_unpackhi_epi8(r2, _mm_setzero_si128()); \
    554 	g_16 = _mm_unpackhi_epi8(g2, _mm_setzero_si128()); \
    555 	b_16 = _mm_unpackhi_epi8(b2, _mm_setzero_si128()); \
    556 	RGB2YUV_16(r_16, g_16, b_16, y2_16, u2_16, v2_16) \
    557 	y = _mm_packus_epi16(y1_16, y2_16); \
    558 	u2 = _mm_packus_epi16(u1_16, u2_16); \
    559 	v2 = _mm_packus_epi16(v1_16, v2_16); \
    560 	/* save Y values */ \
    561 	SAVE_SI128((__m128i*)(y_ptr2), y); \
    562 	/* vertical subsampling of u/v values */ \
    563 	u1_tmp = _mm_avg_epu8(u1, u2); \
    564 	v1_tmp = _mm_avg_epu8(v1, v2); \
    565 	/* do the same again with next data */ \
    566 	rgb1 = LOAD_SI128((const __m128i*)(rgb_ptr1+48)); \
    567 	rgb2 = LOAD_SI128((const __m128i*)(rgb_ptr1+64)); \
    568 	rgb3 = LOAD_SI128((const __m128i*)(rgb_ptr1+80)); \
    569 	rgb4 = LOAD_SI128((const __m128i*)(rgb_ptr2+48)); \
    570 	rgb5 = LOAD_SI128((const __m128i*)(rgb_ptr2+64)); \
    571 	rgb6 = LOAD_SI128((const __m128i*)(rgb_ptr2+80)); \
    572 	/* unpack rgb24 data to r, g and b data in separate channels*/ \
    573 	UNPACK_RGB24_32(rgb1, rgb2, rgb3, rgb4, rgb5, rgb6, r1, r2, g1, g2, b1, b2) \
    574 	/* process pixels of first line */ \
    575 	r_16 = _mm_unpacklo_epi8(r1, _mm_setzero_si128()); \
    576 	g_16 = _mm_unpacklo_epi8(g1, _mm_setzero_si128()); \
    577 	b_16 = _mm_unpacklo_epi8(b1, _mm_setzero_si128()); \
    578 	RGB2YUV_16(r_16, g_16, b_16, y1_16, u1_16, v1_16) \
    579 	r_16 = _mm_unpackhi_epi8(r1, _mm_setzero_si128()); \
    580 	g_16 = _mm_unpackhi_epi8(g1, _mm_setzero_si128()); \
    581 	b_16 = _mm_unpackhi_epi8(b1, _mm_setzero_si128()); \
    582 	RGB2YUV_16(r_16, g_16, b_16, y2_16, u2_16, v2_16) \
    583 	y = _mm_packus_epi16(y1_16, y2_16); \
    584 	u1 = _mm_packus_epi16(u1_16, u2_16); \
    585 	v1 = _mm_packus_epi16(v1_16, v2_16); \
    586 	/* save Y values */ \
    587 	SAVE_SI128((__m128i*)(y_ptr1+16), y); \
    588 	/* process pixels of second line */ \
    589 	r_16 = _mm_unpacklo_epi8(r2, _mm_setzero_si128()); \
    590 	g_16 = _mm_unpacklo_epi8(g2, _mm_setzero_si128()); \
    591 	b_16 = _mm_unpacklo_epi8(b2, _mm_setzero_si128()); \
    592 	RGB2YUV_16(r_16, g_16, b_16, y1_16, u1_16, v1_16) \
    593 	r_16 = _mm_unpackhi_epi8(r2, _mm_setzero_si128()); \
    594 	g_16 = _mm_unpackhi_epi8(g2, _mm_setzero_si128()); \
    595 	b_16 = _mm_unpackhi_epi8(b2, _mm_setzero_si128()); \
    596 	RGB2YUV_16(r_16, g_16, b_16, y2_16, u2_16, v2_16) \
    597 	y = _mm_packus_epi16(y1_16, y2_16); \
    598 	u2 = _mm_packus_epi16(u1_16, u2_16); \
    599 	v2 = _mm_packus_epi16(v1_16, v2_16); \
    600 	/* save Y values */ \
    601 	SAVE_SI128((__m128i*)(y_ptr2+16), y); \
    602 	/* vertical subsampling of u/v values */ \
    603 	u2_tmp = _mm_avg_epu8(u1, u2); \
    604 	v2_tmp = _mm_avg_epu8(v1, v2); \
    605 	/* horizontal subsampling of u/v values */ \
    606 	u1 = _mm_packus_epi16(_mm_srl_epi16(u1_tmp, _mm_cvtsi32_si128(8)), _mm_srl_epi16(u2_tmp, _mm_cvtsi32_si128(8))); \
    607 	v1 = _mm_packus_epi16(_mm_srl_epi16(v1_tmp, _mm_cvtsi32_si128(8)), _mm_srl_epi16(v2_tmp, _mm_cvtsi32_si128(8))); \
    608 	u2 = _mm_packus_epi16(_mm_and_si128(u1_tmp, _mm_set1_epi16(0xFF)), _mm_and_si128(u2_tmp, _mm_set1_epi16(0xFF))); \
    609 	v2 = _mm_packus_epi16(_mm_and_si128(v1_tmp, _mm_set1_epi16(0xFF)), _mm_and_si128(v2_tmp, _mm_set1_epi16(0xFF))); \
    610 	u1 = _mm_avg_epu8(u1, u2); \
    611 	v1 = _mm_avg_epu8(v1, v2); \
    612 	SAVE_SI128((__m128i*)(u_ptr), u1); \
    613 	SAVE_SI128((__m128i*)(v_ptr), v1);
    614 
    615 void rgb24_yuv420_sse(uint32_t width, uint32_t height, 
    616 	const uint8_t *RGB, uint32_t RGB_stride, 
    617 	uint8_t *Y, uint8_t *U, uint8_t *V, uint32_t Y_stride, uint32_t UV_stride, 
    618 	YCbCrType yuv_type)
    619 {
    620 	#define LOAD_SI128 _mm_load_si128
    621 	#define SAVE_SI128 _mm_stream_si128
    622 	const RGB2YUVParam *const param = &(RGB2YUV[yuv_type]);
    623 	
    624 	uint32_t xpos, ypos;
    625 	for(ypos=0; ypos<(height-1); ypos+=2)
    626 	{
    627 		const uint8_t *rgb_ptr1=RGB+ypos*RGB_stride,
    628 			*rgb_ptr2=RGB+(ypos+1)*RGB_stride;
    629 		
    630 		uint8_t *y_ptr1=Y+ypos*Y_stride,
    631 			*y_ptr2=Y+(ypos+1)*Y_stride,
    632 			*u_ptr=U+(ypos/2)*UV_stride,
    633 			*v_ptr=V+(ypos/2)*UV_stride;
    634 		
    635 		for(xpos=0; xpos<(width-31); xpos+=32)
    636 		{
    637 			RGB2YUV_32
    638 			
    639 			rgb_ptr1+=96;
    640 			rgb_ptr2+=96;
    641 			y_ptr1+=32;
    642 			y_ptr2+=32;
    643 			u_ptr+=16; 
    644 			v_ptr+=16;
    645 		}
    646 	}
    647 	#undef LOAD_SI128
    648 	#undef SAVE_SI128
    649 }
    650 
    651 void rgb24_yuv420_sseu(uint32_t width, uint32_t height, 
    652 	const uint8_t *RGB, uint32_t RGB_stride, 
    653 	uint8_t *Y, uint8_t *U, uint8_t *V, uint32_t Y_stride, uint32_t UV_stride, 
    654 	YCbCrType yuv_type)
    655 {
    656 	#define LOAD_SI128 _mm_loadu_si128
    657 	#define SAVE_SI128 _mm_storeu_si128
    658 	const RGB2YUVParam *const param = &(RGB2YUV[yuv_type]);
    659 	
    660 	uint32_t xpos, ypos;
    661 	for(ypos=0; ypos<(height-1); ypos+=2)
    662 	{
    663 		const uint8_t *rgb_ptr1=RGB+ypos*RGB_stride,
    664 			*rgb_ptr2=RGB+(ypos+1)*RGB_stride;
    665 		
    666 		uint8_t *y_ptr1=Y+ypos*Y_stride,
    667 			*y_ptr2=Y+(ypos+1)*Y_stride,
    668 			*u_ptr=U+(ypos/2)*UV_stride,
    669 			*v_ptr=V+(ypos/2)*UV_stride;
    670 		
    671 		for(xpos=0; xpos<(width-31); xpos+=32)
    672 		{
    673 			RGB2YUV_32
    674 			
    675 			rgb_ptr1+=96;
    676 			rgb_ptr2+=96;
    677 			y_ptr1+=32;
    678 			y_ptr2+=32;
    679 			u_ptr+=16; 
    680 			v_ptr+=16;
    681 		}
    682 	}
    683 	#undef LOAD_SI128
    684 	#undef SAVE_SI128
    685 }
    686 
    687 
    688 #endif //__SSE2__
    689 
    690 #endif /* SDL_HAVE_YUV */