yuv_rgb.c (25673B)
1 // Copyright 2016 Adrien Descamps 2 // Distributed under BSD 3-Clause License 3 #include "../../SDL_internal.h" 4 5 #if SDL_HAVE_YUV 6 7 #include "yuv_rgb.h" 8 9 #include "SDL_cpuinfo.h" 10 /*#include <x86intrin.h>*/ 11 12 #define PRECISION 6 13 #define PRECISION_FACTOR (1<<PRECISION) 14 15 typedef struct 16 { 17 uint8_t y_shift; 18 int16_t matrix[3][3]; 19 } RGB2YUVParam; 20 // |Y| |y_shift| |matrix[0][0] matrix[0][1] matrix[0][2]| |R| 21 // |U| = | 128 | + 1/PRECISION_FACTOR * |matrix[1][0] matrix[1][1] matrix[1][2]| * |G| 22 // |V| | 128 | |matrix[2][0] matrix[2][1] matrix[2][2]| |B| 23 24 typedef struct 25 { 26 uint8_t y_shift; 27 int16_t y_factor; 28 int16_t v_r_factor; 29 int16_t u_g_factor; 30 int16_t v_g_factor; 31 int16_t u_b_factor; 32 } YUV2RGBParam; 33 // |R| |y_factor 0 v_r_factor| |Y-y_shift| 34 // |G| = 1/PRECISION_FACTOR * |y_factor u_g_factor v_g_factor| * | U-128 | 35 // |B| |y_factor u_b_factor 0 | | V-128 | 36 37 #define V(value) (int16_t)((value*PRECISION_FACTOR)+0.5) 38 39 // for ITU-T T.871, values can be found in section 7 40 // for ITU-R BT.601-7 values are derived from equations in sections 2.5.1-2.5.3, assuming RGB is encoded using full range ([0-1]<->[0-255]) 41 // for ITU-R BT.709-6 values are derived from equations in sections 3.2-3.4, assuming RGB is encoded using full range ([0-1]<->[0-255]) 42 // all values are rounded to the fourth decimal 43 44 static const YUV2RGBParam YUV2RGB[3] = { 45 // ITU-T T.871 (JPEG) 46 {/*.y_shift=*/ 0, /*.y_factor=*/ V(1.0), /*.v_r_factor=*/ V(1.402), /*.u_g_factor=*/ -V(0.3441), /*.v_g_factor=*/ -V(0.7141), /*.u_b_factor=*/ V(1.772)}, 47 // ITU-R BT.601-7 48 {/*.y_shift=*/ 16, /*.y_factor=*/ V(1.1644), /*.v_r_factor=*/ V(1.596), /*.u_g_factor=*/ -V(0.3918), /*.v_g_factor=*/ -V(0.813), /*.u_b_factor=*/ V(2.0172)}, 49 // ITU-R BT.709-6 50 {/*.y_shift=*/ 16, /*.y_factor=*/ V(1.1644), /*.v_r_factor=*/ V(1.7927), /*.u_g_factor=*/ -V(0.2132), /*.v_g_factor=*/ -V(0.5329), /*.u_b_factor=*/ V(2.1124)} 51 }; 52 53 static const RGB2YUVParam RGB2YUV[3] = { 54 // ITU-T T.871 (JPEG) 55 {/*.y_shift=*/ 0, /*.matrix=*/ {{V(0.299), V(0.587), V(0.114)}, {-V(0.1687), -V(0.3313), V(0.5)}, {V(0.5), -V(0.4187), -V(0.0813)}}}, 56 // ITU-R BT.601-7 57 {/*.y_shift=*/ 16, /*.matrix=*/ {{V(0.2568), V(0.5041), V(0.0979)}, {-V(0.1482), -V(0.291), V(0.4392)}, {V(0.4392), -V(0.3678), -V(0.0714)}}}, 58 // ITU-R BT.709-6 59 {/*.y_shift=*/ 16, /*.matrix=*/ {{V(0.1826), V(0.6142), V(0.062)}, {-V(0.1006), -V(0.3386), V(0.4392)}, {V(0.4392), -V(0.3989), -V(0.0403)}}} 60 }; 61 62 /* The various layouts of YUV data we support */ 63 #define YUV_FORMAT_420 1 64 #define YUV_FORMAT_422 2 65 #define YUV_FORMAT_NV12 3 66 67 /* The various formats of RGB pixel that we support */ 68 #define RGB_FORMAT_RGB565 1 69 #define RGB_FORMAT_RGB24 2 70 #define RGB_FORMAT_RGBA 3 71 #define RGB_FORMAT_BGRA 4 72 #define RGB_FORMAT_ARGB 5 73 #define RGB_FORMAT_ABGR 6 74 75 // divide by PRECISION_FACTOR and clamp to [0:255] interval 76 // input must be in the [-128*PRECISION_FACTOR:384*PRECISION_FACTOR] range 77 static uint8_t clampU8(int32_t v) 78 { 79 static const uint8_t lut[512] = 80 {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 81 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 82 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46, 83 47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90, 84 91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125, 85 126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158, 86 159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191, 87 192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,224, 88 225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255, 89 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, 90 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, 91 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, 92 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255 93 }; 94 return lut[(v+128*PRECISION_FACTOR)>>PRECISION]; 95 } 96 97 98 #define STD_FUNCTION_NAME yuv420_rgb565_std 99 #define YUV_FORMAT YUV_FORMAT_420 100 #define RGB_FORMAT RGB_FORMAT_RGB565 101 #include "yuv_rgb_std_func.h" 102 103 #define STD_FUNCTION_NAME yuv420_rgb24_std 104 #define YUV_FORMAT YUV_FORMAT_420 105 #define RGB_FORMAT RGB_FORMAT_RGB24 106 #include "yuv_rgb_std_func.h" 107 108 #define STD_FUNCTION_NAME yuv420_rgba_std 109 #define YUV_FORMAT YUV_FORMAT_420 110 #define RGB_FORMAT RGB_FORMAT_RGBA 111 #include "yuv_rgb_std_func.h" 112 113 #define STD_FUNCTION_NAME yuv420_bgra_std 114 #define YUV_FORMAT YUV_FORMAT_420 115 #define RGB_FORMAT RGB_FORMAT_BGRA 116 #include "yuv_rgb_std_func.h" 117 118 #define STD_FUNCTION_NAME yuv420_argb_std 119 #define YUV_FORMAT YUV_FORMAT_420 120 #define RGB_FORMAT RGB_FORMAT_ARGB 121 #include "yuv_rgb_std_func.h" 122 123 #define STD_FUNCTION_NAME yuv420_abgr_std 124 #define YUV_FORMAT YUV_FORMAT_420 125 #define RGB_FORMAT RGB_FORMAT_ABGR 126 #include "yuv_rgb_std_func.h" 127 128 #define STD_FUNCTION_NAME yuv422_rgb565_std 129 #define YUV_FORMAT YUV_FORMAT_422 130 #define RGB_FORMAT RGB_FORMAT_RGB565 131 #include "yuv_rgb_std_func.h" 132 133 #define STD_FUNCTION_NAME yuv422_rgb24_std 134 #define YUV_FORMAT YUV_FORMAT_422 135 #define RGB_FORMAT RGB_FORMAT_RGB24 136 #include "yuv_rgb_std_func.h" 137 138 #define STD_FUNCTION_NAME yuv422_rgba_std 139 #define YUV_FORMAT YUV_FORMAT_422 140 #define RGB_FORMAT RGB_FORMAT_RGBA 141 #include "yuv_rgb_std_func.h" 142 143 #define STD_FUNCTION_NAME yuv422_bgra_std 144 #define YUV_FORMAT YUV_FORMAT_422 145 #define RGB_FORMAT RGB_FORMAT_BGRA 146 #include "yuv_rgb_std_func.h" 147 148 #define STD_FUNCTION_NAME yuv422_argb_std 149 #define YUV_FORMAT YUV_FORMAT_422 150 #define RGB_FORMAT RGB_FORMAT_ARGB 151 #include "yuv_rgb_std_func.h" 152 153 #define STD_FUNCTION_NAME yuv422_abgr_std 154 #define YUV_FORMAT YUV_FORMAT_422 155 #define RGB_FORMAT RGB_FORMAT_ABGR 156 #include "yuv_rgb_std_func.h" 157 158 #define STD_FUNCTION_NAME yuvnv12_rgb565_std 159 #define YUV_FORMAT YUV_FORMAT_NV12 160 #define RGB_FORMAT RGB_FORMAT_RGB565 161 #include "yuv_rgb_std_func.h" 162 163 #define STD_FUNCTION_NAME yuvnv12_rgb24_std 164 #define YUV_FORMAT YUV_FORMAT_NV12 165 #define RGB_FORMAT RGB_FORMAT_RGB24 166 #include "yuv_rgb_std_func.h" 167 168 #define STD_FUNCTION_NAME yuvnv12_rgba_std 169 #define YUV_FORMAT YUV_FORMAT_NV12 170 #define RGB_FORMAT RGB_FORMAT_RGBA 171 #include "yuv_rgb_std_func.h" 172 173 #define STD_FUNCTION_NAME yuvnv12_bgra_std 174 #define YUV_FORMAT YUV_FORMAT_NV12 175 #define RGB_FORMAT RGB_FORMAT_BGRA 176 #include "yuv_rgb_std_func.h" 177 178 #define STD_FUNCTION_NAME yuvnv12_argb_std 179 #define YUV_FORMAT YUV_FORMAT_NV12 180 #define RGB_FORMAT RGB_FORMAT_ARGB 181 #include "yuv_rgb_std_func.h" 182 183 #define STD_FUNCTION_NAME yuvnv12_abgr_std 184 #define YUV_FORMAT YUV_FORMAT_NV12 185 #define RGB_FORMAT RGB_FORMAT_ABGR 186 #include "yuv_rgb_std_func.h" 187 188 void rgb24_yuv420_std( 189 uint32_t width, uint32_t height, 190 const uint8_t *RGB, uint32_t RGB_stride, 191 uint8_t *Y, uint8_t *U, uint8_t *V, uint32_t Y_stride, uint32_t UV_stride, 192 YCbCrType yuv_type) 193 { 194 const RGB2YUVParam *const param = &(RGB2YUV[yuv_type]); 195 196 uint32_t x, y; 197 for(y=0; y<(height-1); y+=2) 198 { 199 const uint8_t *rgb_ptr1=RGB+y*RGB_stride, 200 *rgb_ptr2=RGB+(y+1)*RGB_stride; 201 202 uint8_t *y_ptr1=Y+y*Y_stride, 203 *y_ptr2=Y+(y+1)*Y_stride, 204 *u_ptr=U+(y/2)*UV_stride, 205 *v_ptr=V+(y/2)*UV_stride; 206 207 for(x=0; x<(width-1); x+=2) 208 { 209 // compute yuv for the four pixels, u and v values are summed 210 int32_t y_tmp, u_tmp, v_tmp; 211 212 y_tmp = param->matrix[0][0]*rgb_ptr1[0] + param->matrix[0][1]*rgb_ptr1[1] + param->matrix[0][2]*rgb_ptr1[2]; 213 u_tmp = param->matrix[1][0]*rgb_ptr1[0] + param->matrix[1][1]*rgb_ptr1[1] + param->matrix[1][2]*rgb_ptr1[2]; 214 v_tmp = param->matrix[2][0]*rgb_ptr1[0] + param->matrix[2][1]*rgb_ptr1[1] + param->matrix[2][2]*rgb_ptr1[2]; 215 y_ptr1[0]=clampU8(y_tmp+((param->y_shift)<<PRECISION)); 216 217 y_tmp = param->matrix[0][0]*rgb_ptr1[3] + param->matrix[0][1]*rgb_ptr1[4] + param->matrix[0][2]*rgb_ptr1[5]; 218 u_tmp += param->matrix[1][0]*rgb_ptr1[3] + param->matrix[1][1]*rgb_ptr1[4] + param->matrix[1][2]*rgb_ptr1[5]; 219 v_tmp += param->matrix[2][0]*rgb_ptr1[3] + param->matrix[2][1]*rgb_ptr1[4] + param->matrix[2][2]*rgb_ptr1[5]; 220 y_ptr1[1]=clampU8(y_tmp+((param->y_shift)<<PRECISION)); 221 222 y_tmp = param->matrix[0][0]*rgb_ptr2[0] + param->matrix[0][1]*rgb_ptr2[1] + param->matrix[0][2]*rgb_ptr2[2]; 223 u_tmp += param->matrix[1][0]*rgb_ptr2[0] + param->matrix[1][1]*rgb_ptr2[1] + param->matrix[1][2]*rgb_ptr2[2]; 224 v_tmp += param->matrix[2][0]*rgb_ptr2[0] + param->matrix[2][1]*rgb_ptr2[1] + param->matrix[2][2]*rgb_ptr2[2]; 225 y_ptr2[0]=clampU8(y_tmp+((param->y_shift)<<PRECISION)); 226 227 y_tmp = param->matrix[0][0]*rgb_ptr2[3] + param->matrix[0][1]*rgb_ptr2[4] + param->matrix[0][2]*rgb_ptr2[5]; 228 u_tmp += param->matrix[1][0]*rgb_ptr2[3] + param->matrix[1][1]*rgb_ptr2[4] + param->matrix[1][2]*rgb_ptr2[5]; 229 v_tmp += param->matrix[2][0]*rgb_ptr2[3] + param->matrix[2][1]*rgb_ptr2[4] + param->matrix[2][2]*rgb_ptr2[5]; 230 y_ptr2[1]=clampU8(y_tmp+((param->y_shift)<<PRECISION)); 231 232 u_ptr[0] = clampU8(u_tmp/4+(128<<PRECISION)); 233 v_ptr[0] = clampU8(v_tmp/4+(128<<PRECISION)); 234 235 rgb_ptr1 += 6; 236 rgb_ptr2 += 6; 237 y_ptr1 += 2; 238 y_ptr2 += 2; 239 u_ptr += 1; 240 v_ptr += 1; 241 } 242 } 243 } 244 245 #ifdef __SSE2__ 246 247 #define SSE_FUNCTION_NAME yuv420_rgb565_sse 248 #define STD_FUNCTION_NAME yuv420_rgb565_std 249 #define YUV_FORMAT YUV_FORMAT_420 250 #define RGB_FORMAT RGB_FORMAT_RGB565 251 #define SSE_ALIGNED 252 #include "yuv_rgb_sse_func.h" 253 254 #define SSE_FUNCTION_NAME yuv420_rgb565_sseu 255 #define STD_FUNCTION_NAME yuv420_rgb565_std 256 #define YUV_FORMAT YUV_FORMAT_420 257 #define RGB_FORMAT RGB_FORMAT_RGB565 258 #include "yuv_rgb_sse_func.h" 259 260 #define SSE_FUNCTION_NAME yuv420_rgb24_sse 261 #define STD_FUNCTION_NAME yuv420_rgb24_std 262 #define YUV_FORMAT YUV_FORMAT_420 263 #define RGB_FORMAT RGB_FORMAT_RGB24 264 #define SSE_ALIGNED 265 #include "yuv_rgb_sse_func.h" 266 267 #define SSE_FUNCTION_NAME yuv420_rgb24_sseu 268 #define STD_FUNCTION_NAME yuv420_rgb24_std 269 #define YUV_FORMAT YUV_FORMAT_420 270 #define RGB_FORMAT RGB_FORMAT_RGB24 271 #include "yuv_rgb_sse_func.h" 272 273 #define SSE_FUNCTION_NAME yuv420_rgba_sse 274 #define STD_FUNCTION_NAME yuv420_rgba_std 275 #define YUV_FORMAT YUV_FORMAT_420 276 #define RGB_FORMAT RGB_FORMAT_RGBA 277 #define SSE_ALIGNED 278 #include "yuv_rgb_sse_func.h" 279 280 #define SSE_FUNCTION_NAME yuv420_rgba_sseu 281 #define STD_FUNCTION_NAME yuv420_rgba_std 282 #define YUV_FORMAT YUV_FORMAT_420 283 #define RGB_FORMAT RGB_FORMAT_RGBA 284 #include "yuv_rgb_sse_func.h" 285 286 #define SSE_FUNCTION_NAME yuv420_bgra_sse 287 #define STD_FUNCTION_NAME yuv420_bgra_std 288 #define YUV_FORMAT YUV_FORMAT_420 289 #define RGB_FORMAT RGB_FORMAT_BGRA 290 #define SSE_ALIGNED 291 #include "yuv_rgb_sse_func.h" 292 293 #define SSE_FUNCTION_NAME yuv420_bgra_sseu 294 #define STD_FUNCTION_NAME yuv420_bgra_std 295 #define YUV_FORMAT YUV_FORMAT_420 296 #define RGB_FORMAT RGB_FORMAT_BGRA 297 #include "yuv_rgb_sse_func.h" 298 299 #define SSE_FUNCTION_NAME yuv420_argb_sse 300 #define STD_FUNCTION_NAME yuv420_argb_std 301 #define YUV_FORMAT YUV_FORMAT_420 302 #define RGB_FORMAT RGB_FORMAT_ARGB 303 #define SSE_ALIGNED 304 #include "yuv_rgb_sse_func.h" 305 306 #define SSE_FUNCTION_NAME yuv420_argb_sseu 307 #define STD_FUNCTION_NAME yuv420_argb_std 308 #define YUV_FORMAT YUV_FORMAT_420 309 #define RGB_FORMAT RGB_FORMAT_ARGB 310 #include "yuv_rgb_sse_func.h" 311 312 #define SSE_FUNCTION_NAME yuv420_abgr_sse 313 #define STD_FUNCTION_NAME yuv420_abgr_std 314 #define YUV_FORMAT YUV_FORMAT_420 315 #define RGB_FORMAT RGB_FORMAT_ABGR 316 #define SSE_ALIGNED 317 #include "yuv_rgb_sse_func.h" 318 319 #define SSE_FUNCTION_NAME yuv420_abgr_sseu 320 #define STD_FUNCTION_NAME yuv420_abgr_std 321 #define YUV_FORMAT YUV_FORMAT_420 322 #define RGB_FORMAT RGB_FORMAT_ABGR 323 #include "yuv_rgb_sse_func.h" 324 325 #define SSE_FUNCTION_NAME yuv422_rgb565_sse 326 #define STD_FUNCTION_NAME yuv422_rgb565_std 327 #define YUV_FORMAT YUV_FORMAT_422 328 #define RGB_FORMAT RGB_FORMAT_RGB565 329 #define SSE_ALIGNED 330 #include "yuv_rgb_sse_func.h" 331 332 #define SSE_FUNCTION_NAME yuv422_rgb565_sseu 333 #define STD_FUNCTION_NAME yuv422_rgb565_std 334 #define YUV_FORMAT YUV_FORMAT_422 335 #define RGB_FORMAT RGB_FORMAT_RGB565 336 #include "yuv_rgb_sse_func.h" 337 338 #define SSE_FUNCTION_NAME yuv422_rgb24_sse 339 #define STD_FUNCTION_NAME yuv422_rgb24_std 340 #define YUV_FORMAT YUV_FORMAT_422 341 #define RGB_FORMAT RGB_FORMAT_RGB24 342 #define SSE_ALIGNED 343 #include "yuv_rgb_sse_func.h" 344 345 #define SSE_FUNCTION_NAME yuv422_rgb24_sseu 346 #define STD_FUNCTION_NAME yuv422_rgb24_std 347 #define YUV_FORMAT YUV_FORMAT_422 348 #define RGB_FORMAT RGB_FORMAT_RGB24 349 #include "yuv_rgb_sse_func.h" 350 351 #define SSE_FUNCTION_NAME yuv422_rgba_sse 352 #define STD_FUNCTION_NAME yuv422_rgba_std 353 #define YUV_FORMAT YUV_FORMAT_422 354 #define RGB_FORMAT RGB_FORMAT_RGBA 355 #define SSE_ALIGNED 356 #include "yuv_rgb_sse_func.h" 357 358 #define SSE_FUNCTION_NAME yuv422_rgba_sseu 359 #define STD_FUNCTION_NAME yuv422_rgba_std 360 #define YUV_FORMAT YUV_FORMAT_422 361 #define RGB_FORMAT RGB_FORMAT_RGBA 362 #include "yuv_rgb_sse_func.h" 363 364 #define SSE_FUNCTION_NAME yuv422_bgra_sse 365 #define STD_FUNCTION_NAME yuv422_bgra_std 366 #define YUV_FORMAT YUV_FORMAT_422 367 #define RGB_FORMAT RGB_FORMAT_BGRA 368 #define SSE_ALIGNED 369 #include "yuv_rgb_sse_func.h" 370 371 #define SSE_FUNCTION_NAME yuv422_bgra_sseu 372 #define STD_FUNCTION_NAME yuv422_bgra_std 373 #define YUV_FORMAT YUV_FORMAT_422 374 #define RGB_FORMAT RGB_FORMAT_BGRA 375 #include "yuv_rgb_sse_func.h" 376 377 #define SSE_FUNCTION_NAME yuv422_argb_sse 378 #define STD_FUNCTION_NAME yuv422_argb_std 379 #define YUV_FORMAT YUV_FORMAT_422 380 #define RGB_FORMAT RGB_FORMAT_ARGB 381 #define SSE_ALIGNED 382 #include "yuv_rgb_sse_func.h" 383 384 #define SSE_FUNCTION_NAME yuv422_argb_sseu 385 #define STD_FUNCTION_NAME yuv422_argb_std 386 #define YUV_FORMAT YUV_FORMAT_422 387 #define RGB_FORMAT RGB_FORMAT_ARGB 388 #include "yuv_rgb_sse_func.h" 389 390 #define SSE_FUNCTION_NAME yuv422_abgr_sse 391 #define STD_FUNCTION_NAME yuv422_abgr_std 392 #define YUV_FORMAT YUV_FORMAT_422 393 #define RGB_FORMAT RGB_FORMAT_ABGR 394 #define SSE_ALIGNED 395 #include "yuv_rgb_sse_func.h" 396 397 #define SSE_FUNCTION_NAME yuv422_abgr_sseu 398 #define STD_FUNCTION_NAME yuv422_abgr_std 399 #define YUV_FORMAT YUV_FORMAT_422 400 #define RGB_FORMAT RGB_FORMAT_ABGR 401 #include "yuv_rgb_sse_func.h" 402 403 #define SSE_FUNCTION_NAME yuvnv12_rgb565_sse 404 #define STD_FUNCTION_NAME yuvnv12_rgb565_std 405 #define YUV_FORMAT YUV_FORMAT_NV12 406 #define RGB_FORMAT RGB_FORMAT_RGB565 407 #define SSE_ALIGNED 408 #include "yuv_rgb_sse_func.h" 409 410 #define SSE_FUNCTION_NAME yuvnv12_rgb565_sseu 411 #define STD_FUNCTION_NAME yuvnv12_rgb565_std 412 #define YUV_FORMAT YUV_FORMAT_NV12 413 #define RGB_FORMAT RGB_FORMAT_RGB565 414 #include "yuv_rgb_sse_func.h" 415 416 #define SSE_FUNCTION_NAME yuvnv12_rgb24_sse 417 #define STD_FUNCTION_NAME yuvnv12_rgb24_std 418 #define YUV_FORMAT YUV_FORMAT_NV12 419 #define RGB_FORMAT RGB_FORMAT_RGB24 420 #define SSE_ALIGNED 421 #include "yuv_rgb_sse_func.h" 422 423 #define SSE_FUNCTION_NAME yuvnv12_rgb24_sseu 424 #define STD_FUNCTION_NAME yuvnv12_rgb24_std 425 #define YUV_FORMAT YUV_FORMAT_NV12 426 #define RGB_FORMAT RGB_FORMAT_RGB24 427 #include "yuv_rgb_sse_func.h" 428 429 #define SSE_FUNCTION_NAME yuvnv12_rgba_sse 430 #define STD_FUNCTION_NAME yuvnv12_rgba_std 431 #define YUV_FORMAT YUV_FORMAT_NV12 432 #define RGB_FORMAT RGB_FORMAT_RGBA 433 #define SSE_ALIGNED 434 #include "yuv_rgb_sse_func.h" 435 436 #define SSE_FUNCTION_NAME yuvnv12_rgba_sseu 437 #define STD_FUNCTION_NAME yuvnv12_rgba_std 438 #define YUV_FORMAT YUV_FORMAT_NV12 439 #define RGB_FORMAT RGB_FORMAT_RGBA 440 #include "yuv_rgb_sse_func.h" 441 442 #define SSE_FUNCTION_NAME yuvnv12_bgra_sse 443 #define STD_FUNCTION_NAME yuvnv12_bgra_std 444 #define YUV_FORMAT YUV_FORMAT_NV12 445 #define RGB_FORMAT RGB_FORMAT_BGRA 446 #define SSE_ALIGNED 447 #include "yuv_rgb_sse_func.h" 448 449 #define SSE_FUNCTION_NAME yuvnv12_bgra_sseu 450 #define STD_FUNCTION_NAME yuvnv12_bgra_std 451 #define YUV_FORMAT YUV_FORMAT_NV12 452 #define RGB_FORMAT RGB_FORMAT_BGRA 453 #include "yuv_rgb_sse_func.h" 454 455 #define SSE_FUNCTION_NAME yuvnv12_argb_sse 456 #define STD_FUNCTION_NAME yuvnv12_argb_std 457 #define YUV_FORMAT YUV_FORMAT_NV12 458 #define RGB_FORMAT RGB_FORMAT_ARGB 459 #define SSE_ALIGNED 460 #include "yuv_rgb_sse_func.h" 461 462 #define SSE_FUNCTION_NAME yuvnv12_argb_sseu 463 #define STD_FUNCTION_NAME yuvnv12_argb_std 464 #define YUV_FORMAT YUV_FORMAT_NV12 465 #define RGB_FORMAT RGB_FORMAT_ARGB 466 #include "yuv_rgb_sse_func.h" 467 468 #define SSE_FUNCTION_NAME yuvnv12_abgr_sse 469 #define STD_FUNCTION_NAME yuvnv12_abgr_std 470 #define YUV_FORMAT YUV_FORMAT_NV12 471 #define RGB_FORMAT RGB_FORMAT_ABGR 472 #define SSE_ALIGNED 473 #include "yuv_rgb_sse_func.h" 474 475 #define SSE_FUNCTION_NAME yuvnv12_abgr_sseu 476 #define STD_FUNCTION_NAME yuvnv12_abgr_std 477 #define YUV_FORMAT YUV_FORMAT_NV12 478 #define RGB_FORMAT RGB_FORMAT_ABGR 479 #include "yuv_rgb_sse_func.h" 480 481 482 #define UNPACK_RGB24_32_STEP1(RGB1, RGB2, RGB3, RGB4, RGB5, RGB6, R1, R2, G1, G2, B1, B2) \ 483 R1 = _mm_unpacklo_epi8(RGB1, RGB4); \ 484 R2 = _mm_unpackhi_epi8(RGB1, RGB4); \ 485 G1 = _mm_unpacklo_epi8(RGB2, RGB5); \ 486 G2 = _mm_unpackhi_epi8(RGB2, RGB5); \ 487 B1 = _mm_unpacklo_epi8(RGB3, RGB6); \ 488 B2 = _mm_unpackhi_epi8(RGB3, RGB6); 489 490 #define UNPACK_RGB24_32_STEP2(RGB1, RGB2, RGB3, RGB4, RGB5, RGB6, R1, R2, G1, G2, B1, B2) \ 491 RGB1 = _mm_unpacklo_epi8(R1, G2); \ 492 RGB2 = _mm_unpackhi_epi8(R1, G2); \ 493 RGB3 = _mm_unpacklo_epi8(R2, B1); \ 494 RGB4 = _mm_unpackhi_epi8(R2, B1); \ 495 RGB5 = _mm_unpacklo_epi8(G1, B2); \ 496 RGB6 = _mm_unpackhi_epi8(G1, B2); \ 497 498 #define UNPACK_RGB24_32(RGB1, RGB2, RGB3, RGB4, RGB5, RGB6, R1, R2, G1, G2, B1, B2) \ 499 UNPACK_RGB24_32_STEP1(RGB1, RGB2, RGB3, RGB4, RGB5, RGB6, R1, R2, G1, G2, B1, B2) \ 500 UNPACK_RGB24_32_STEP2(RGB1, RGB2, RGB3, RGB4, RGB5, RGB6, R1, R2, G1, G2, B1, B2) \ 501 UNPACK_RGB24_32_STEP1(RGB1, RGB2, RGB3, RGB4, RGB5, RGB6, R1, R2, G1, G2, B1, B2) \ 502 UNPACK_RGB24_32_STEP2(RGB1, RGB2, RGB3, RGB4, RGB5, RGB6, R1, R2, G1, G2, B1, B2) \ 503 UNPACK_RGB24_32_STEP1(RGB1, RGB2, RGB3, RGB4, RGB5, RGB6, R1, R2, G1, G2, B1, B2) \ 504 505 #define RGB2YUV_16(R, G, B, Y, U, V) \ 506 Y = _mm_add_epi16(_mm_mullo_epi16(R, _mm_set1_epi16(param->matrix[0][0])), \ 507 _mm_mullo_epi16(G, _mm_set1_epi16(param->matrix[0][1]))); \ 508 Y = _mm_add_epi16(Y, _mm_mullo_epi16(B, _mm_set1_epi16(param->matrix[0][2]))); \ 509 Y = _mm_add_epi16(Y, _mm_set1_epi16((param->y_shift)<<PRECISION)); \ 510 Y = _mm_srai_epi16(Y, PRECISION); \ 511 U = _mm_add_epi16(_mm_mullo_epi16(R, _mm_set1_epi16(param->matrix[1][0])), \ 512 _mm_mullo_epi16(G, _mm_set1_epi16(param->matrix[1][1]))); \ 513 U = _mm_add_epi16(U, _mm_mullo_epi16(B, _mm_set1_epi16(param->matrix[1][2]))); \ 514 U = _mm_add_epi16(U, _mm_set1_epi16(128<<PRECISION)); \ 515 U = _mm_srai_epi16(U, PRECISION); \ 516 V = _mm_add_epi16(_mm_mullo_epi16(R, _mm_set1_epi16(param->matrix[2][0])), \ 517 _mm_mullo_epi16(G, _mm_set1_epi16(param->matrix[2][1]))); \ 518 V = _mm_add_epi16(V, _mm_mullo_epi16(B, _mm_set1_epi16(param->matrix[2][2]))); \ 519 V = _mm_add_epi16(V, _mm_set1_epi16(128<<PRECISION)); \ 520 V = _mm_srai_epi16(V, PRECISION); 521 522 #define RGB2YUV_32 \ 523 __m128i r1, r2, b1, b2, g1, g2; \ 524 __m128i r_16, g_16, b_16; \ 525 __m128i y1_16, y2_16, u1_16, u2_16, v1_16, v2_16, y, u1, u2, v1, v2, u1_tmp, u2_tmp, v1_tmp, v2_tmp; \ 526 __m128i rgb1 = LOAD_SI128((const __m128i*)(rgb_ptr1)), \ 527 rgb2 = LOAD_SI128((const __m128i*)(rgb_ptr1+16)), \ 528 rgb3 = LOAD_SI128((const __m128i*)(rgb_ptr1+32)), \ 529 rgb4 = LOAD_SI128((const __m128i*)(rgb_ptr2)), \ 530 rgb5 = LOAD_SI128((const __m128i*)(rgb_ptr2+16)), \ 531 rgb6 = LOAD_SI128((const __m128i*)(rgb_ptr2+32)); \ 532 /* unpack rgb24 data to r, g and b data in separate channels*/ \ 533 UNPACK_RGB24_32(rgb1, rgb2, rgb3, rgb4, rgb5, rgb6, r1, r2, g1, g2, b1, b2) \ 534 /* process pixels of first line */ \ 535 r_16 = _mm_unpacklo_epi8(r1, _mm_setzero_si128()); \ 536 g_16 = _mm_unpacklo_epi8(g1, _mm_setzero_si128()); \ 537 b_16 = _mm_unpacklo_epi8(b1, _mm_setzero_si128()); \ 538 RGB2YUV_16(r_16, g_16, b_16, y1_16, u1_16, v1_16) \ 539 r_16 = _mm_unpackhi_epi8(r1, _mm_setzero_si128()); \ 540 g_16 = _mm_unpackhi_epi8(g1, _mm_setzero_si128()); \ 541 b_16 = _mm_unpackhi_epi8(b1, _mm_setzero_si128()); \ 542 RGB2YUV_16(r_16, g_16, b_16, y2_16, u2_16, v2_16) \ 543 y = _mm_packus_epi16(y1_16, y2_16); \ 544 u1 = _mm_packus_epi16(u1_16, u2_16); \ 545 v1 = _mm_packus_epi16(v1_16, v2_16); \ 546 /* save Y values */ \ 547 SAVE_SI128((__m128i*)(y_ptr1), y); \ 548 /* process pixels of second line */ \ 549 r_16 = _mm_unpacklo_epi8(r2, _mm_setzero_si128()); \ 550 g_16 = _mm_unpacklo_epi8(g2, _mm_setzero_si128()); \ 551 b_16 = _mm_unpacklo_epi8(b2, _mm_setzero_si128()); \ 552 RGB2YUV_16(r_16, g_16, b_16, y1_16, u1_16, v1_16) \ 553 r_16 = _mm_unpackhi_epi8(r2, _mm_setzero_si128()); \ 554 g_16 = _mm_unpackhi_epi8(g2, _mm_setzero_si128()); \ 555 b_16 = _mm_unpackhi_epi8(b2, _mm_setzero_si128()); \ 556 RGB2YUV_16(r_16, g_16, b_16, y2_16, u2_16, v2_16) \ 557 y = _mm_packus_epi16(y1_16, y2_16); \ 558 u2 = _mm_packus_epi16(u1_16, u2_16); \ 559 v2 = _mm_packus_epi16(v1_16, v2_16); \ 560 /* save Y values */ \ 561 SAVE_SI128((__m128i*)(y_ptr2), y); \ 562 /* vertical subsampling of u/v values */ \ 563 u1_tmp = _mm_avg_epu8(u1, u2); \ 564 v1_tmp = _mm_avg_epu8(v1, v2); \ 565 /* do the same again with next data */ \ 566 rgb1 = LOAD_SI128((const __m128i*)(rgb_ptr1+48)); \ 567 rgb2 = LOAD_SI128((const __m128i*)(rgb_ptr1+64)); \ 568 rgb3 = LOAD_SI128((const __m128i*)(rgb_ptr1+80)); \ 569 rgb4 = LOAD_SI128((const __m128i*)(rgb_ptr2+48)); \ 570 rgb5 = LOAD_SI128((const __m128i*)(rgb_ptr2+64)); \ 571 rgb6 = LOAD_SI128((const __m128i*)(rgb_ptr2+80)); \ 572 /* unpack rgb24 data to r, g and b data in separate channels*/ \ 573 UNPACK_RGB24_32(rgb1, rgb2, rgb3, rgb4, rgb5, rgb6, r1, r2, g1, g2, b1, b2) \ 574 /* process pixels of first line */ \ 575 r_16 = _mm_unpacklo_epi8(r1, _mm_setzero_si128()); \ 576 g_16 = _mm_unpacklo_epi8(g1, _mm_setzero_si128()); \ 577 b_16 = _mm_unpacklo_epi8(b1, _mm_setzero_si128()); \ 578 RGB2YUV_16(r_16, g_16, b_16, y1_16, u1_16, v1_16) \ 579 r_16 = _mm_unpackhi_epi8(r1, _mm_setzero_si128()); \ 580 g_16 = _mm_unpackhi_epi8(g1, _mm_setzero_si128()); \ 581 b_16 = _mm_unpackhi_epi8(b1, _mm_setzero_si128()); \ 582 RGB2YUV_16(r_16, g_16, b_16, y2_16, u2_16, v2_16) \ 583 y = _mm_packus_epi16(y1_16, y2_16); \ 584 u1 = _mm_packus_epi16(u1_16, u2_16); \ 585 v1 = _mm_packus_epi16(v1_16, v2_16); \ 586 /* save Y values */ \ 587 SAVE_SI128((__m128i*)(y_ptr1+16), y); \ 588 /* process pixels of second line */ \ 589 r_16 = _mm_unpacklo_epi8(r2, _mm_setzero_si128()); \ 590 g_16 = _mm_unpacklo_epi8(g2, _mm_setzero_si128()); \ 591 b_16 = _mm_unpacklo_epi8(b2, _mm_setzero_si128()); \ 592 RGB2YUV_16(r_16, g_16, b_16, y1_16, u1_16, v1_16) \ 593 r_16 = _mm_unpackhi_epi8(r2, _mm_setzero_si128()); \ 594 g_16 = _mm_unpackhi_epi8(g2, _mm_setzero_si128()); \ 595 b_16 = _mm_unpackhi_epi8(b2, _mm_setzero_si128()); \ 596 RGB2YUV_16(r_16, g_16, b_16, y2_16, u2_16, v2_16) \ 597 y = _mm_packus_epi16(y1_16, y2_16); \ 598 u2 = _mm_packus_epi16(u1_16, u2_16); \ 599 v2 = _mm_packus_epi16(v1_16, v2_16); \ 600 /* save Y values */ \ 601 SAVE_SI128((__m128i*)(y_ptr2+16), y); \ 602 /* vertical subsampling of u/v values */ \ 603 u2_tmp = _mm_avg_epu8(u1, u2); \ 604 v2_tmp = _mm_avg_epu8(v1, v2); \ 605 /* horizontal subsampling of u/v values */ \ 606 u1 = _mm_packus_epi16(_mm_srl_epi16(u1_tmp, _mm_cvtsi32_si128(8)), _mm_srl_epi16(u2_tmp, _mm_cvtsi32_si128(8))); \ 607 v1 = _mm_packus_epi16(_mm_srl_epi16(v1_tmp, _mm_cvtsi32_si128(8)), _mm_srl_epi16(v2_tmp, _mm_cvtsi32_si128(8))); \ 608 u2 = _mm_packus_epi16(_mm_and_si128(u1_tmp, _mm_set1_epi16(0xFF)), _mm_and_si128(u2_tmp, _mm_set1_epi16(0xFF))); \ 609 v2 = _mm_packus_epi16(_mm_and_si128(v1_tmp, _mm_set1_epi16(0xFF)), _mm_and_si128(v2_tmp, _mm_set1_epi16(0xFF))); \ 610 u1 = _mm_avg_epu8(u1, u2); \ 611 v1 = _mm_avg_epu8(v1, v2); \ 612 SAVE_SI128((__m128i*)(u_ptr), u1); \ 613 SAVE_SI128((__m128i*)(v_ptr), v1); 614 615 void rgb24_yuv420_sse(uint32_t width, uint32_t height, 616 const uint8_t *RGB, uint32_t RGB_stride, 617 uint8_t *Y, uint8_t *U, uint8_t *V, uint32_t Y_stride, uint32_t UV_stride, 618 YCbCrType yuv_type) 619 { 620 #define LOAD_SI128 _mm_load_si128 621 #define SAVE_SI128 _mm_stream_si128 622 const RGB2YUVParam *const param = &(RGB2YUV[yuv_type]); 623 624 uint32_t xpos, ypos; 625 for(ypos=0; ypos<(height-1); ypos+=2) 626 { 627 const uint8_t *rgb_ptr1=RGB+ypos*RGB_stride, 628 *rgb_ptr2=RGB+(ypos+1)*RGB_stride; 629 630 uint8_t *y_ptr1=Y+ypos*Y_stride, 631 *y_ptr2=Y+(ypos+1)*Y_stride, 632 *u_ptr=U+(ypos/2)*UV_stride, 633 *v_ptr=V+(ypos/2)*UV_stride; 634 635 for(xpos=0; xpos<(width-31); xpos+=32) 636 { 637 RGB2YUV_32 638 639 rgb_ptr1+=96; 640 rgb_ptr2+=96; 641 y_ptr1+=32; 642 y_ptr2+=32; 643 u_ptr+=16; 644 v_ptr+=16; 645 } 646 } 647 #undef LOAD_SI128 648 #undef SAVE_SI128 649 } 650 651 void rgb24_yuv420_sseu(uint32_t width, uint32_t height, 652 const uint8_t *RGB, uint32_t RGB_stride, 653 uint8_t *Y, uint8_t *U, uint8_t *V, uint32_t Y_stride, uint32_t UV_stride, 654 YCbCrType yuv_type) 655 { 656 #define LOAD_SI128 _mm_loadu_si128 657 #define SAVE_SI128 _mm_storeu_si128 658 const RGB2YUVParam *const param = &(RGB2YUV[yuv_type]); 659 660 uint32_t xpos, ypos; 661 for(ypos=0; ypos<(height-1); ypos+=2) 662 { 663 const uint8_t *rgb_ptr1=RGB+ypos*RGB_stride, 664 *rgb_ptr2=RGB+(ypos+1)*RGB_stride; 665 666 uint8_t *y_ptr1=Y+ypos*Y_stride, 667 *y_ptr2=Y+(ypos+1)*Y_stride, 668 *u_ptr=U+(ypos/2)*UV_stride, 669 *v_ptr=V+(ypos/2)*UV_stride; 670 671 for(xpos=0; xpos<(width-31); xpos+=32) 672 { 673 RGB2YUV_32 674 675 rgb_ptr1+=96; 676 rgb_ptr2+=96; 677 y_ptr1+=32; 678 y_ptr2+=32; 679 u_ptr+=16; 680 v_ptr+=16; 681 } 682 } 683 #undef LOAD_SI128 684 #undef SAVE_SI128 685 } 686 687 688 #endif //__SSE2__ 689 690 #endif /* SDL_HAVE_YUV */