SDL_fillrect.c (11406B)
1 /* 2 Simple DirectMedia Layer 3 Copyright (C) 1997-2020 Sam Lantinga <slouken@libsdl.org> 4 5 This software is provided 'as-is', without any express or implied 6 warranty. In no event will the authors be held liable for any damages 7 arising from the use of this software. 8 9 Permission is granted to anyone to use this software for any purpose, 10 including commercial applications, and to alter it and redistribute it 11 freely, subject to the following restrictions: 12 13 1. The origin of this software must not be misrepresented; you must not 14 claim that you wrote the original software. If you use this software 15 in a product, an acknowledgment in the product documentation would be 16 appreciated but is not required. 17 2. Altered source versions must be plainly marked as such, and must not be 18 misrepresented as being the original software. 19 3. This notice may not be removed or altered from any source distribution. 20 */ 21 #include "../SDL_internal.h" 22 23 #include "SDL_video.h" 24 #include "SDL_blit.h" 25 #include "SDL_cpuinfo.h" 26 27 28 #ifdef __SSE__ 29 /* *INDENT-OFF* */ 30 31 #ifdef _MSC_VER 32 #define SSE_BEGIN \ 33 __m128 c128; \ 34 c128.m128_u32[0] = color; \ 35 c128.m128_u32[1] = color; \ 36 c128.m128_u32[2] = color; \ 37 c128.m128_u32[3] = color; 38 #else 39 #define SSE_BEGIN \ 40 __m128 c128; \ 41 DECLARE_ALIGNED(Uint32, cccc[4], 16); \ 42 cccc[0] = color; \ 43 cccc[1] = color; \ 44 cccc[2] = color; \ 45 cccc[3] = color; \ 46 c128 = *(__m128 *)cccc; 47 #endif 48 49 #define SSE_WORK \ 50 for (i = n / 64; i--;) { \ 51 _mm_stream_ps((float *)(p+0), c128); \ 52 _mm_stream_ps((float *)(p+16), c128); \ 53 _mm_stream_ps((float *)(p+32), c128); \ 54 _mm_stream_ps((float *)(p+48), c128); \ 55 p += 64; \ 56 } 57 58 #define SSE_END 59 60 #define DEFINE_SSE_FILLRECT(bpp, type) \ 61 static void \ 62 SDL_FillRect##bpp##SSE(Uint8 *pixels, int pitch, Uint32 color, int w, int h) \ 63 { \ 64 int i, n; \ 65 Uint8 *p = NULL; \ 66 \ 67 SSE_BEGIN; \ 68 \ 69 while (h--) { \ 70 n = w * bpp; \ 71 p = pixels; \ 72 \ 73 if (n > 63) { \ 74 int adjust = 16 - ((uintptr_t)p & 15); \ 75 if (adjust < 16) { \ 76 n -= adjust; \ 77 adjust /= bpp; \ 78 while (adjust--) { \ 79 *((type *)p) = (type)color; \ 80 p += bpp; \ 81 } \ 82 } \ 83 SSE_WORK; \ 84 } \ 85 if (n & 63) { \ 86 int remainder = (n & 63); \ 87 remainder /= bpp; \ 88 while (remainder--) { \ 89 *((type *)p) = (type)color; \ 90 p += bpp; \ 91 } \ 92 } \ 93 pixels += pitch; \ 94 } \ 95 \ 96 SSE_END; \ 97 } 98 99 static void 100 SDL_FillRect1SSE(Uint8 *pixels, int pitch, Uint32 color, int w, int h) 101 { 102 int i, n; 103 104 SSE_BEGIN; 105 while (h--) { 106 Uint8 *p = pixels; 107 n = w; 108 109 if (n > 63) { 110 int adjust = 16 - ((uintptr_t)p & 15); 111 if (adjust) { 112 n -= adjust; 113 SDL_memset(p, color, adjust); 114 p += adjust; 115 } 116 SSE_WORK; 117 } 118 if (n & 63) { 119 int remainder = (n & 63); 120 SDL_memset(p, color, remainder); 121 } 122 pixels += pitch; 123 } 124 125 SSE_END; 126 } 127 /* DEFINE_SSE_FILLRECT(1, Uint8) */ 128 DEFINE_SSE_FILLRECT(2, Uint16) 129 DEFINE_SSE_FILLRECT(4, Uint32) 130 131 /* *INDENT-ON* */ 132 #endif /* __SSE__ */ 133 134 static void 135 SDL_FillRect1(Uint8 * pixels, int pitch, Uint32 color, int w, int h) 136 { 137 int n; 138 Uint8 *p = NULL; 139 140 while (h--) { 141 n = w; 142 p = pixels; 143 144 if (n > 3) { 145 switch ((uintptr_t) p & 3) { 146 case 1: 147 *p++ = (Uint8) color; 148 --n; /* fallthrough */ 149 case 2: 150 *p++ = (Uint8) color; 151 --n; /* fallthrough */ 152 case 3: 153 *p++ = (Uint8) color; 154 --n; /* fallthrough */ 155 } 156 SDL_memset4(p, color, (n >> 2)); 157 } 158 if (n & 3) { 159 p += (n & ~3); 160 switch (n & 3) { 161 case 3: 162 *p++ = (Uint8) color; /* fallthrough */ 163 case 2: 164 *p++ = (Uint8) color; /* fallthrough */ 165 case 1: 166 *p++ = (Uint8) color; /* fallthrough */ 167 } 168 } 169 pixels += pitch; 170 } 171 } 172 173 static void 174 SDL_FillRect2(Uint8 * pixels, int pitch, Uint32 color, int w, int h) 175 { 176 int n; 177 Uint16 *p = NULL; 178 179 while (h--) { 180 n = w; 181 p = (Uint16 *) pixels; 182 183 if (n > 1) { 184 if ((uintptr_t) p & 2) { 185 *p++ = (Uint16) color; 186 --n; 187 } 188 SDL_memset4(p, color, (n >> 1)); 189 } 190 if (n & 1) { 191 p[n - 1] = (Uint16) color; 192 } 193 pixels += pitch; 194 } 195 } 196 197 static void 198 SDL_FillRect3(Uint8 * pixels, int pitch, Uint32 color, int w, int h) 199 { 200 #if SDL_BYTEORDER == SDL_LIL_ENDIAN 201 Uint8 b1 = (Uint8) (color & 0xFF); 202 Uint8 b2 = (Uint8) ((color >> 8) & 0xFF); 203 Uint8 b3 = (Uint8) ((color >> 16) & 0xFF); 204 #elif SDL_BYTEORDER == SDL_BIG_ENDIAN 205 Uint8 b1 = (Uint8) ((color >> 16) & 0xFF); 206 Uint8 b2 = (Uint8) ((color >> 8) & 0xFF); 207 Uint8 b3 = (Uint8) (color & 0xFF); 208 #endif 209 int n; 210 Uint8 *p = NULL; 211 212 while (h--) { 213 n = w; 214 p = pixels; 215 216 while (n--) { 217 *p++ = b1; 218 *p++ = b2; 219 *p++ = b3; 220 } 221 pixels += pitch; 222 } 223 } 224 225 static void 226 SDL_FillRect4(Uint8 * pixels, int pitch, Uint32 color, int w, int h) 227 { 228 while (h--) { 229 SDL_memset4(pixels, color, w); 230 pixels += pitch; 231 } 232 } 233 234 /* 235 * This function performs a fast fill of the given rectangle with 'color' 236 */ 237 int 238 SDL_FillRect(SDL_Surface * dst, const SDL_Rect * rect, Uint32 color) 239 { 240 if (!dst) { 241 return SDL_SetError("Passed NULL destination surface"); 242 } 243 244 /* If 'rect' == NULL, then fill the whole surface */ 245 if (!rect) { 246 rect = &dst->clip_rect; 247 /* Don't attempt to fill if the surface's clip_rect is empty */ 248 if (SDL_RectEmpty(rect)) { 249 return 0; 250 } 251 } 252 253 return SDL_FillRects(dst, rect, 1, color); 254 } 255 256 #if SDL_ARM_NEON_BLITTERS 257 void FillRect8ARMNEONAsm(int32_t w, int32_t h, uint8_t *dst, int32_t dst_stride, uint8_t src); 258 void FillRect16ARMNEONAsm(int32_t w, int32_t h, uint16_t *dst, int32_t dst_stride, uint16_t src); 259 void FillRect32ARMNEONAsm(int32_t w, int32_t h, uint32_t *dst, int32_t dst_stride, uint32_t src); 260 261 static void fill_8_neon(Uint8 * pixels, int pitch, Uint32 color, int w, int h) { 262 FillRect8ARMNEONAsm(w, h, (uint8_t *) pixels, pitch >> 0, color); 263 return; 264 } 265 266 static void fill_16_neon(Uint8 * pixels, int pitch, Uint32 color, int w, int h) { 267 FillRect16ARMNEONAsm(w, h, (uint16_t *) pixels, pitch >> 1, color); 268 return; 269 } 270 271 static void fill_32_neon(Uint8 * pixels, int pitch, Uint32 color, int w, int h) { 272 FillRect32ARMNEONAsm(w, h, (uint32_t *) pixels, pitch >> 2, color); 273 return; 274 } 275 #endif 276 277 #if SDL_ARM_SIMD_BLITTERS 278 void FillRect8ARMSIMDAsm(int32_t w, int32_t h, uint8_t *dst, int32_t dst_stride, uint8_t src); 279 void FillRect16ARMSIMDAsm(int32_t w, int32_t h, uint16_t *dst, int32_t dst_stride, uint16_t src); 280 void FillRect32ARMSIMDAsm(int32_t w, int32_t h, uint32_t *dst, int32_t dst_stride, uint32_t src); 281 282 static void fill_8_simd(Uint8 * pixels, int pitch, Uint32 color, int w, int h) { 283 FillRect8ARMSIMDAsm(w, h, (uint8_t *) pixels, pitch >> 0, color); 284 return; 285 } 286 287 static void fill_16_simd(Uint8 * pixels, int pitch, Uint32 color, int w, int h) { 288 FillRect16ARMSIMDAsm(w, h, (uint16_t *) pixels, pitch >> 1, color); 289 return; 290 } 291 292 static void fill_32_simd(Uint8 * pixels, int pitch, Uint32 color, int w, int h) { 293 FillRect32ARMSIMDAsm(w, h, (uint32_t *) pixels, pitch >> 2, color); 294 return; 295 } 296 #endif 297 298 int 299 SDL_FillRects(SDL_Surface * dst, const SDL_Rect * rects, int count, 300 Uint32 color) 301 { 302 SDL_Rect clipped; 303 Uint8 *pixels; 304 const SDL_Rect* rect; 305 void (*fill_function)(Uint8 * pixels, int pitch, Uint32 color, int w, int h) = NULL; 306 int i; 307 308 if (!dst) { 309 return SDL_SetError("Passed NULL destination surface"); 310 } 311 312 /* This function doesn't work on surfaces < 8 bpp */ 313 if (dst->format->BitsPerPixel < 8) { 314 return SDL_SetError("SDL_FillRect(): Unsupported surface format"); 315 } 316 317 /* Nothing to do */ 318 if (dst->w == 0 || dst->h == 0) { 319 return 0; 320 } 321 322 /* Perform software fill */ 323 if (!dst->pixels) { 324 return SDL_SetError("SDL_FillRect(): You must lock the surface"); 325 } 326 327 if (!rects) { 328 return SDL_SetError("SDL_FillRects() passed NULL rects"); 329 } 330 331 #if SDL_ARM_NEON_BLITTERS 332 if (SDL_HasNEON() && dst->format->BytesPerPixel != 3 && fill_function == NULL) { 333 switch (dst->format->BytesPerPixel) { 334 case 1: 335 fill_function = fill_8_neon; 336 break; 337 case 2: 338 fill_function = fill_16_neon; 339 break; 340 case 4: 341 fill_function = fill_32_neon; 342 break; 343 } 344 } 345 #endif 346 #if SDL_ARM_SIMD_BLITTERS 347 if (SDL_HasARMSIMD() && dst->format->BytesPerPixel != 3 && fill_function == NULL) { 348 switch (dst->format->BytesPerPixel) { 349 case 1: 350 fill_function = fill_8_simd; 351 break; 352 case 2: 353 fill_function = fill_16_simd; 354 break; 355 case 4: 356 fill_function = fill_32_simd; 357 break; 358 } 359 } 360 #endif 361 362 if (fill_function == NULL) { 363 switch (dst->format->BytesPerPixel) { 364 case 1: 365 { 366 color |= (color << 8); 367 color |= (color << 16); 368 #ifdef __SSE__ 369 if (SDL_HasSSE()) { 370 fill_function = SDL_FillRect1SSE; 371 break; 372 } 373 #endif 374 fill_function = SDL_FillRect1; 375 break; 376 } 377 378 case 2: 379 { 380 color |= (color << 16); 381 #ifdef __SSE__ 382 if (SDL_HasSSE()) { 383 fill_function = SDL_FillRect2SSE; 384 break; 385 } 386 #endif 387 fill_function = SDL_FillRect2; 388 break; 389 } 390 391 case 3: 392 /* 24-bit RGB is a slow path, at least for now. */ 393 { 394 fill_function = SDL_FillRect3; 395 break; 396 } 397 398 case 4: 399 { 400 #ifdef __SSE__ 401 if (SDL_HasSSE()) { 402 fill_function = SDL_FillRect4SSE; 403 break; 404 } 405 #endif 406 fill_function = SDL_FillRect4; 407 break; 408 } 409 410 default: 411 return SDL_SetError("Unsupported pixel format"); 412 } 413 } 414 415 for (i = 0; i < count; ++i) { 416 rect = &rects[i]; 417 /* Perform clipping */ 418 if (!SDL_IntersectRect(rect, &dst->clip_rect, &clipped)) { 419 continue; 420 } 421 rect = &clipped; 422 423 pixels = (Uint8 *) dst->pixels + rect->y * dst->pitch + 424 rect->x * dst->format->BytesPerPixel; 425 426 fill_function(pixels, dst->pitch, color, rect->w, rect->h); 427 } 428 429 /* We're done! */ 430 return 0; 431 } 432 433 /* vi: set ts=4 sw=4 expandtab: */