sdl

FORK: Simple Directmedia Layer
git clone https://git.neptards.moe/neptards/sdl.git
Log | Files | Refs

SDL_fillrect.c (11406B)


      1 /*
      2   Simple DirectMedia Layer
      3   Copyright (C) 1997-2020 Sam Lantinga <slouken@libsdl.org>
      4 
      5   This software is provided 'as-is', without any express or implied
      6   warranty.  In no event will the authors be held liable for any damages
      7   arising from the use of this software.
      8 
      9   Permission is granted to anyone to use this software for any purpose,
     10   including commercial applications, and to alter it and redistribute it
     11   freely, subject to the following restrictions:
     12 
     13   1. The origin of this software must not be misrepresented; you must not
     14      claim that you wrote the original software. If you use this software
     15      in a product, an acknowledgment in the product documentation would be
     16      appreciated but is not required.
     17   2. Altered source versions must be plainly marked as such, and must not be
     18      misrepresented as being the original software.
     19   3. This notice may not be removed or altered from any source distribution.
     20 */
     21 #include "../SDL_internal.h"
     22 
     23 #include "SDL_video.h"
     24 #include "SDL_blit.h"
     25 #include "SDL_cpuinfo.h"
     26 
     27 
     28 #ifdef __SSE__
     29 /* *INDENT-OFF* */
     30 
     31 #ifdef _MSC_VER
     32 #define SSE_BEGIN \
     33     __m128 c128; \
     34     c128.m128_u32[0] = color; \
     35     c128.m128_u32[1] = color; \
     36     c128.m128_u32[2] = color; \
     37     c128.m128_u32[3] = color;
     38 #else
     39 #define SSE_BEGIN \
     40     __m128 c128; \
     41     DECLARE_ALIGNED(Uint32, cccc[4], 16); \
     42     cccc[0] = color; \
     43     cccc[1] = color; \
     44     cccc[2] = color; \
     45     cccc[3] = color; \
     46     c128 = *(__m128 *)cccc;
     47 #endif
     48 
     49 #define SSE_WORK \
     50     for (i = n / 64; i--;) { \
     51         _mm_stream_ps((float *)(p+0), c128); \
     52         _mm_stream_ps((float *)(p+16), c128); \
     53         _mm_stream_ps((float *)(p+32), c128); \
     54         _mm_stream_ps((float *)(p+48), c128); \
     55         p += 64; \
     56     }
     57 
     58 #define SSE_END
     59 
     60 #define DEFINE_SSE_FILLRECT(bpp, type) \
     61 static void \
     62 SDL_FillRect##bpp##SSE(Uint8 *pixels, int pitch, Uint32 color, int w, int h) \
     63 { \
     64     int i, n; \
     65     Uint8 *p = NULL; \
     66  \
     67     SSE_BEGIN; \
     68  \
     69     while (h--) { \
     70         n = w * bpp; \
     71         p = pixels; \
     72  \
     73         if (n > 63) { \
     74             int adjust = 16 - ((uintptr_t)p & 15); \
     75             if (adjust < 16) { \
     76                 n -= adjust; \
     77                 adjust /= bpp; \
     78                 while (adjust--) { \
     79                     *((type *)p) = (type)color; \
     80                     p += bpp; \
     81                 } \
     82             } \
     83             SSE_WORK; \
     84         } \
     85         if (n & 63) { \
     86             int remainder = (n & 63); \
     87             remainder /= bpp; \
     88             while (remainder--) { \
     89                 *((type *)p) = (type)color; \
     90                 p += bpp; \
     91             } \
     92         } \
     93         pixels += pitch; \
     94     } \
     95  \
     96     SSE_END; \
     97 }
     98 
     99 static void
    100 SDL_FillRect1SSE(Uint8 *pixels, int pitch, Uint32 color, int w, int h)
    101 {
    102     int i, n;
    103 
    104     SSE_BEGIN;
    105     while (h--) {
    106         Uint8 *p = pixels;
    107         n = w;
    108 
    109         if (n > 63) {
    110             int adjust = 16 - ((uintptr_t)p & 15);
    111             if (adjust) {
    112                 n -= adjust;
    113                 SDL_memset(p, color, adjust);
    114                 p += adjust;
    115             }
    116             SSE_WORK;
    117         }
    118         if (n & 63) {
    119             int remainder = (n & 63);
    120             SDL_memset(p, color, remainder);
    121         }
    122         pixels += pitch;
    123     }
    124 
    125     SSE_END;
    126 }
    127 /* DEFINE_SSE_FILLRECT(1, Uint8) */
    128 DEFINE_SSE_FILLRECT(2, Uint16)
    129 DEFINE_SSE_FILLRECT(4, Uint32)
    130 
    131 /* *INDENT-ON* */
    132 #endif /* __SSE__ */
    133 
    134 static void
    135 SDL_FillRect1(Uint8 * pixels, int pitch, Uint32 color, int w, int h)
    136 {
    137     int n;
    138     Uint8 *p = NULL;
    139     
    140     while (h--) {
    141         n = w;
    142         p = pixels;
    143 
    144         if (n > 3) {
    145             switch ((uintptr_t) p & 3) {
    146             case 1:
    147                 *p++ = (Uint8) color;
    148                 --n;                    /* fallthrough */
    149             case 2:
    150                 *p++ = (Uint8) color;
    151                 --n;                    /* fallthrough */
    152             case 3:
    153                 *p++ = (Uint8) color;
    154                 --n;                    /* fallthrough */
    155             }
    156             SDL_memset4(p, color, (n >> 2));
    157         }
    158         if (n & 3) {
    159             p += (n & ~3);
    160             switch (n & 3) {
    161             case 3:
    162                 *p++ = (Uint8) color;   /* fallthrough */
    163             case 2:
    164                 *p++ = (Uint8) color;   /* fallthrough */
    165             case 1:
    166                 *p++ = (Uint8) color;   /* fallthrough */
    167             }
    168         }
    169         pixels += pitch;
    170     }
    171 }
    172 
    173 static void
    174 SDL_FillRect2(Uint8 * pixels, int pitch, Uint32 color, int w, int h)
    175 {
    176     int n;
    177     Uint16 *p = NULL;
    178     
    179     while (h--) {
    180         n = w;
    181         p = (Uint16 *) pixels;
    182 
    183         if (n > 1) {
    184             if ((uintptr_t) p & 2) {
    185                 *p++ = (Uint16) color;
    186                 --n;
    187             }
    188             SDL_memset4(p, color, (n >> 1));
    189         }
    190         if (n & 1) {
    191             p[n - 1] = (Uint16) color;
    192         }
    193         pixels += pitch;
    194     }
    195 }
    196 
    197 static void
    198 SDL_FillRect3(Uint8 * pixels, int pitch, Uint32 color, int w, int h)
    199 {
    200 #if SDL_BYTEORDER == SDL_LIL_ENDIAN
    201     Uint8 b1 = (Uint8) (color & 0xFF);
    202     Uint8 b2 = (Uint8) ((color >> 8) & 0xFF);
    203     Uint8 b3 = (Uint8) ((color >> 16) & 0xFF);
    204 #elif SDL_BYTEORDER == SDL_BIG_ENDIAN
    205     Uint8 b1 = (Uint8) ((color >> 16) & 0xFF);
    206     Uint8 b2 = (Uint8) ((color >> 8) & 0xFF);
    207     Uint8 b3 = (Uint8) (color & 0xFF);
    208 #endif
    209     int n;
    210     Uint8 *p = NULL;
    211 
    212     while (h--) {
    213         n = w;
    214         p = pixels;
    215 
    216         while (n--) {
    217             *p++ = b1;
    218             *p++ = b2;
    219             *p++ = b3;
    220         }
    221         pixels += pitch;
    222     }
    223 }
    224 
    225 static void
    226 SDL_FillRect4(Uint8 * pixels, int pitch, Uint32 color, int w, int h)
    227 {
    228     while (h--) {
    229         SDL_memset4(pixels, color, w);
    230         pixels += pitch;
    231     }
    232 }
    233 
    234 /* 
    235  * This function performs a fast fill of the given rectangle with 'color'
    236  */
    237 int
    238 SDL_FillRect(SDL_Surface * dst, const SDL_Rect * rect, Uint32 color)
    239 {
    240     if (!dst) {
    241         return SDL_SetError("Passed NULL destination surface");
    242     }
    243 
    244     /* If 'rect' == NULL, then fill the whole surface */
    245     if (!rect) {
    246         rect = &dst->clip_rect;
    247         /* Don't attempt to fill if the surface's clip_rect is empty */
    248         if (SDL_RectEmpty(rect)) {
    249             return 0;
    250         }
    251     }
    252 
    253     return SDL_FillRects(dst, rect, 1, color);
    254 }
    255 
    256 #if SDL_ARM_NEON_BLITTERS
    257 void FillRect8ARMNEONAsm(int32_t w, int32_t h, uint8_t *dst, int32_t dst_stride, uint8_t src);
    258 void FillRect16ARMNEONAsm(int32_t w, int32_t h, uint16_t *dst, int32_t dst_stride, uint16_t src);
    259 void FillRect32ARMNEONAsm(int32_t w, int32_t h, uint32_t *dst, int32_t dst_stride, uint32_t src);
    260 
    261 static void fill_8_neon(Uint8 * pixels, int pitch, Uint32 color, int w, int h) {
    262     FillRect8ARMNEONAsm(w, h, (uint8_t *) pixels, pitch >> 0, color);
    263     return;
    264 }
    265 
    266 static void fill_16_neon(Uint8 * pixels, int pitch, Uint32 color, int w, int h) {
    267     FillRect16ARMNEONAsm(w, h, (uint16_t *) pixels, pitch >> 1, color);
    268     return;
    269 }
    270 
    271 static void fill_32_neon(Uint8 * pixels, int pitch, Uint32 color, int w, int h) {
    272     FillRect32ARMNEONAsm(w, h, (uint32_t *) pixels, pitch >> 2, color);
    273     return;
    274 }
    275 #endif
    276 
    277 #if SDL_ARM_SIMD_BLITTERS
    278 void FillRect8ARMSIMDAsm(int32_t w, int32_t h, uint8_t *dst, int32_t dst_stride, uint8_t src);
    279 void FillRect16ARMSIMDAsm(int32_t w, int32_t h, uint16_t *dst, int32_t dst_stride, uint16_t src);
    280 void FillRect32ARMSIMDAsm(int32_t w, int32_t h, uint32_t *dst, int32_t dst_stride, uint32_t src);
    281 
    282 static void fill_8_simd(Uint8 * pixels, int pitch, Uint32 color, int w, int h) {
    283     FillRect8ARMSIMDAsm(w, h, (uint8_t *) pixels, pitch >> 0, color);
    284     return;
    285 }
    286 
    287 static void fill_16_simd(Uint8 * pixels, int pitch, Uint32 color, int w, int h) {
    288     FillRect16ARMSIMDAsm(w, h, (uint16_t *) pixels, pitch >> 1, color);
    289     return;
    290 }
    291 
    292 static void fill_32_simd(Uint8 * pixels, int pitch, Uint32 color, int w, int h) {
    293     FillRect32ARMSIMDAsm(w, h, (uint32_t *) pixels, pitch >> 2, color);
    294     return;
    295 }
    296 #endif
    297 
    298 int
    299 SDL_FillRects(SDL_Surface * dst, const SDL_Rect * rects, int count,
    300               Uint32 color)
    301 {
    302     SDL_Rect clipped;
    303     Uint8 *pixels;
    304     const SDL_Rect* rect;
    305     void (*fill_function)(Uint8 * pixels, int pitch, Uint32 color, int w, int h) = NULL;
    306     int i;
    307 
    308     if (!dst) {
    309         return SDL_SetError("Passed NULL destination surface");
    310     }
    311 
    312     /* This function doesn't work on surfaces < 8 bpp */
    313     if (dst->format->BitsPerPixel < 8) {
    314         return SDL_SetError("SDL_FillRect(): Unsupported surface format");
    315     }
    316 
    317     /* Nothing to do */
    318     if (dst->w == 0 || dst->h == 0) {
    319         return 0;
    320     }
    321 
    322     /* Perform software fill */
    323     if (!dst->pixels) {
    324         return SDL_SetError("SDL_FillRect(): You must lock the surface");
    325     }
    326 
    327     if (!rects) {
    328         return SDL_SetError("SDL_FillRects() passed NULL rects");
    329     }
    330 
    331 #if SDL_ARM_NEON_BLITTERS
    332     if (SDL_HasNEON() && dst->format->BytesPerPixel != 3 && fill_function == NULL) {
    333         switch (dst->format->BytesPerPixel) {
    334         case 1:
    335             fill_function = fill_8_neon;
    336             break;
    337         case 2:
    338             fill_function = fill_16_neon;
    339             break;
    340         case 4:
    341             fill_function = fill_32_neon;
    342             break;
    343         }
    344     }
    345 #endif
    346 #if SDL_ARM_SIMD_BLITTERS
    347     if (SDL_HasARMSIMD() && dst->format->BytesPerPixel != 3 && fill_function == NULL) {
    348         switch (dst->format->BytesPerPixel) {
    349         case 1:
    350             fill_function = fill_8_simd;
    351             break;
    352         case 2:
    353             fill_function = fill_16_simd;
    354             break;
    355         case 4:
    356             fill_function = fill_32_simd;
    357             break;
    358         }
    359     }
    360 #endif
    361 
    362     if (fill_function == NULL) {
    363         switch (dst->format->BytesPerPixel) {
    364         case 1:
    365             {
    366                 color |= (color << 8);
    367                 color |= (color << 16);
    368 #ifdef __SSE__
    369                 if (SDL_HasSSE()) {
    370                     fill_function = SDL_FillRect1SSE;
    371                     break;
    372                 }
    373 #endif
    374                 fill_function = SDL_FillRect1;
    375                 break;
    376             }
    377 
    378         case 2:
    379             {
    380                 color |= (color << 16);
    381 #ifdef __SSE__
    382                 if (SDL_HasSSE()) {
    383                     fill_function = SDL_FillRect2SSE;
    384                     break;
    385                 }
    386 #endif
    387                 fill_function = SDL_FillRect2;
    388                 break;
    389             }
    390 
    391         case 3:
    392             /* 24-bit RGB is a slow path, at least for now. */
    393             {
    394                 fill_function = SDL_FillRect3;
    395                 break;
    396             }
    397 
    398         case 4:
    399             {
    400 #ifdef __SSE__
    401                 if (SDL_HasSSE()) {
    402                     fill_function = SDL_FillRect4SSE;
    403                     break;
    404                 }
    405 #endif
    406                 fill_function = SDL_FillRect4;
    407                 break;
    408             }
    409 
    410         default:
    411             return SDL_SetError("Unsupported pixel format");
    412         }
    413     }
    414 
    415     for (i = 0; i < count; ++i) {
    416         rect = &rects[i];
    417         /* Perform clipping */
    418         if (!SDL_IntersectRect(rect, &dst->clip_rect, &clipped)) {
    419             continue;
    420         }
    421         rect = &clipped;
    422 
    423         pixels = (Uint8 *) dst->pixels + rect->y * dst->pitch +
    424                                          rect->x * dst->format->BytesPerPixel;
    425 
    426         fill_function(pixels, dst->pitch, color, rect->w, rect->h);
    427     }
    428 
    429     /* We're done! */
    430     return 0;
    431 }
    432 
    433 /* vi: set ts=4 sw=4 expandtab: */