sdl

FORK: Simple Directmedia Layer
git clone https://git.neptards.moe/neptards/sdl.git
Log | Files | Refs

SDL_qsort.c (18852B)


      1 /*
      2   Simple DirectMedia Layer
      3   Copyright (C) 1997-2020 Sam Lantinga <slouken@libsdl.org>
      4 
      5   This software is provided 'as-is', without any express or implied
      6   warranty.  In no event will the authors be held liable for any damages
      7   arising from the use of this software.
      8 
      9   Permission is granted to anyone to use this software for any purpose,
     10   including commercial applications, and to alter it and redistribute it
     11   freely, subject to the following restrictions:
     12 
     13   1. The origin of this software must not be misrepresented; you must not
     14      claim that you wrote the original software. If you use this software
     15      in a product, an acknowledgment in the product documentation would be
     16      appreciated but is not required.
     17   2. Altered source versions must be plainly marked as such, and must not be
     18      misrepresented as being the original software.
     19   3. This notice may not be removed or altered from any source distribution.
     20 */
     21 
     22 #if defined(__clang_analyzer__) && !defined(SDL_DISABLE_ANALYZE_MACROS)
     23 #define SDL_DISABLE_ANALYZE_MACROS 1
     24 #endif
     25 
     26 #include "../SDL_internal.h"
     27 
     28 #include "SDL_stdinc.h"
     29 
     30 #if defined(HAVE_QSORT)
     31 void
     32 SDL_qsort(void *base, size_t nmemb, size_t size, int (*compare) (const void *, const void *))
     33 {
     34     qsort(base, nmemb, size, compare);
     35 }
     36 
     37 #else
     38 
     39 #ifdef assert
     40 #undef assert
     41 #endif
     42 #define assert SDL_assert
     43 #ifdef malloc
     44 #undef malloc
     45 #endif
     46 #define malloc SDL_malloc
     47 #ifdef free
     48 #undef free
     49 #endif
     50 #define free SDL_free
     51 #ifdef memcpy
     52 #undef memcpy
     53 #endif
     54 #define memcpy SDL_memcpy
     55 #ifdef memmove
     56 #undef memmove
     57 #endif
     58 #define memmove SDL_memmove
     59 #ifdef qsortG
     60 #undef qsortG
     61 #endif
     62 #define qsortG SDL_qsort
     63 
     64 /*
     65 This code came from Gareth McCaughan, under the zlib license.
     66 Specifically this: https://www.mccaughan.org.uk/software/qsort.c-1.15
     67 
     68 Everything below this comment until the HAVE_QSORT #endif was from Gareth
     69 (any minor changes will be noted inline).
     70 
     71 Thank you to Gareth for relicensing this code under the zlib license for our
     72 benefit!
     73 
     74 --ryan.
     75 */
     76 
     77 /* This is a drop-in replacement for the C library's |qsort()| routine.
     78  *
     79  * It is intended for use where you know or suspect that your
     80  * platform's qsort is bad. If that isn't the case, then you
     81  * should probably use the qsort your system gives you in preference
     82  * to mine -- it will likely have been tested and tuned better.
     83  *
     84  * Features:
     85  *   - Median-of-three pivoting (and more)
     86  *   - Truncation and final polishing by a single insertion sort
     87  *   - Early truncation when no swaps needed in pivoting step
     88  *   - Explicit recursion, guaranteed not to overflow
     89  *   - A few little wrinkles stolen from the GNU |qsort()|.
     90  *     (For the avoidance of doubt, no code was stolen, only
     91  *     broad ideas.)
     92  *   - separate code for non-aligned / aligned / word-size objects
     93  *
     94  * Earlier releases of this code used an idiosyncratic licence
     95  * I wrote myself, because I'm an idiot. The code is now released
     96  * under the "zlib/libpng licence"; you will find the actual
     97  * terms in the next comment. I request (but do not require)
     98  * that if you make any changes beyond the name of the exported
     99  * routine and reasonable tweaks to the TRUNC_* and
    100  * PIVOT_THRESHOLD values, you modify the _ID string so as
    101  * to make it clear that you have changed the code.
    102  *
    103  * If you find problems with this code, or find ways of
    104  * making it significantly faster, please let me know!
    105  * My e-mail address, valid as of early 2016 and for the
    106  * foreseeable future, is
    107  *    gareth.mccaughan@pobox.com
    108  * Thanks!
    109  *
    110  * Gareth McCaughan
    111  */
    112 
    113 /* Copyright (c) 1998-2016 Gareth McCaughan
    114  *
    115  * This software is provided 'as-is', without any express or implied
    116  * warranty. In no event will the authors be held liable for any
    117  * damages arising from the use of this software.
    118  *
    119  * Permission is granted to anyone to use this software for any purpose,
    120  * including commercial applications, and to alter it and redistribute it
    121  * freely, subject to the following restrictions:
    122  *
    123  * 1. The origin of this software must not be misrepresented;
    124  *    you must not claim that you wrote the original software.
    125  *    If you use this software in a product, an acknowledgment
    126  *    in the product documentation would be appreciated but
    127  *    is not required.
    128  *
    129  * 2. Altered source versions must be plainly marked as such,
    130  *    and must not be misrepresented as being the original software.
    131  *
    132  * 3. This notice may not be removed or altered from any source
    133  *    distribution.
    134  */
    135 
    136 /* Revision history since release:
    137  *   1998-03-19 v1.12 First release I have any records of.
    138  *   2007-09-02 v1.13 Fix bug kindly reported by Dan Bodoh
    139  *                    (premature termination of recursion).
    140  *                    Add a few clarifying comments.
    141  *                    Minor improvements to debug output.
    142  *   2016-02-21 v1.14 Replace licence with 2-clause BSD,
    143  *                    and clarify a couple of things in
    144  *                    comments. No code changes.
    145  *   2016-03-10 v1.15 Fix bug kindly reported by Ryan Gordon
    146  *                    (pre-insertion-sort messed up).
    147  *                    Disable DEBUG_QSORT by default.
    148  *                    Tweak comments very slightly.
    149  */
    150 
    151 /* BEGIN SDL CHANGE ... commented this out with an #if 0 block. --ryan. */
    152 #if 0
    153 #include <assert.h>
    154 #include <stdlib.h>
    155 #include <string.h>
    156 
    157 #undef DEBUG_QSORT
    158 
    159 static char _ID[]="<qsort.c gjm 1.15 2016-03-10>";
    160 #endif
    161 /* END SDL CHANGE ... commented this out with an #if 0 block. --ryan. */
    162 
    163 /* How many bytes are there per word? (Must be a power of 2,
    164  * and must in fact equal sizeof(int).)
    165  */
    166 #define WORD_BYTES sizeof(int)
    167 
    168 /* How big does our stack need to be? Answer: one entry per
    169  * bit in a |size_t|.
    170  */
    171 #define STACK_SIZE (8*sizeof(size_t))
    172 
    173 /* Different situations have slightly different requirements,
    174  * and we make life epsilon easier by using different truncation
    175  * points for the three different cases.
    176  * So far, I have tuned TRUNC_words and guessed that the same
    177  * value might work well for the other two cases. Of course
    178  * what works well on my machine might work badly on yours.
    179  */
    180 #define TRUNC_nonaligned	12
    181 #define TRUNC_aligned		12
    182 #define TRUNC_words		12*WORD_BYTES	/* nb different meaning */
    183 
    184 /* We use a simple pivoting algorithm for shortish sub-arrays
    185  * and a more complicated one for larger ones. The threshold
    186  * is PIVOT_THRESHOLD.
    187  */
    188 #define PIVOT_THRESHOLD 40
    189 
    190 typedef struct { char * first; char * last; } stack_entry;
    191 #define pushLeft {stack[stacktop].first=ffirst;stack[stacktop++].last=last;}
    192 #define pushRight {stack[stacktop].first=first;stack[stacktop++].last=llast;}
    193 #define doLeft {first=ffirst;llast=last;continue;}
    194 #define doRight {ffirst=first;last=llast;continue;}
    195 #define pop {if (--stacktop<0) break;\
    196   first=ffirst=stack[stacktop].first;\
    197   last=llast=stack[stacktop].last;\
    198   continue;}
    199 
    200 /* Some comments on the implementation.
    201  * 1. When we finish partitioning the array into "low"
    202  *    and "high", we forget entirely about short subarrays,
    203  *    because they'll be done later by insertion sort.
    204  *    Doing lots of little insertion sorts might be a win
    205  *    on large datasets for locality-of-reference reasons,
    206  *    but it makes the code much nastier and increases
    207  *    bookkeeping overhead.
    208  * 2. We always save the shorter and get to work on the
    209  *    longer. This guarantees that every time we push
    210  *    an item onto the stack its size is <= 1/2 of that
    211  *    of its parent; so the stack can't need more than
    212  *    log_2(max-array-size) entries.
    213  * 3. We choose a pivot by looking at the first, last
    214  *    and middle elements. We arrange them into order
    215  *    because it's easy to do that in conjunction with
    216  *    choosing the pivot, and it makes things a little
    217  *    easier in the partitioning step. Anyway, the pivot
    218  *    is the middle of these three. It's still possible
    219  *    to construct datasets where the algorithm takes
    220  *    time of order n^2, but it simply never happens in
    221  *    practice.
    222  * 3' Newsflash: On further investigation I find that
    223  *    it's easy to construct datasets where median-of-3
    224  *    simply isn't good enough. So on large-ish subarrays
    225  *    we do a more sophisticated pivoting: we take three
    226  *    sets of 3 elements, find their medians, and then
    227  *    take the median of those.
    228  * 4. We copy the pivot element to a separate place
    229  *    because that way we can always do our comparisons
    230  *    directly against a pointer to that separate place,
    231  *    and don't have to wonder "did we move the pivot
    232  *    element?". This makes the inner loop better.
    233  * 5. It's possible to make the pivoting even more
    234  *    reliable by looking at more candidates when n
    235  *    is larger. (Taking this to its logical conclusion
    236  *    results in a variant of quicksort that doesn't
    237  *    have that n^2 worst case.) However, the overhead
    238  *    from the extra bookkeeping means that it's just
    239  *    not worth while.
    240  * 6. This is pretty clean and portable code. Here are
    241  *    all the potential portability pitfalls and problems
    242  *    I know of:
    243  *      - In one place (the insertion sort) I construct
    244  *        a pointer that points just past the end of the
    245  *        supplied array, and assume that (a) it won't
    246  *        compare equal to any pointer within the array,
    247  *        and (b) it will compare equal to a pointer
    248  *        obtained by stepping off the end of the array.
    249  *        These might fail on some segmented architectures.
    250  *      - I assume that there are 8 bits in a |char| when
    251  *        computing the size of stack needed. This would
    252  *        fail on machines with 9-bit or 16-bit bytes.
    253  *      - I assume that if |((int)base&(sizeof(int)-1))==0|
    254  *        and |(size&(sizeof(int)-1))==0| then it's safe to
    255  *        get at array elements via |int*|s, and that if
    256  *        actually |size==sizeof(int)| as well then it's
    257  *        safe to treat the elements as |int|s. This might
    258  *        fail on systems that convert pointers to integers
    259  *        in non-standard ways.
    260  *      - I assume that |8*sizeof(size_t)<=INT_MAX|. This
    261  *        would be false on a machine with 8-bit |char|s,
    262  *        16-bit |int|s and 4096-bit |size_t|s. :-)
    263  */
    264 
    265 /* The recursion logic is the same in each case.
    266  * We keep chopping up until we reach subarrays of size
    267  * strictly less than Trunc; we leave these unsorted. */
    268 #define Recurse(Trunc)				\
    269       { size_t l=last-ffirst,r=llast-first;	\
    270         if (l<Trunc) {				\
    271           if (r>=Trunc) doRight			\
    272           else pop				\
    273         }					\
    274         else if (l<=r) { pushLeft; doRight }	\
    275         else if (r>=Trunc) { pushRight; doLeft }\
    276         else doLeft				\
    277       }
    278 
    279 /* and so is the pivoting logic (note: last is inclusive): */
    280 #define Pivot(swapper,sz)			\
    281   if ((size_t)(last-first)>PIVOT_THRESHOLD*sz) mid=pivot_big(first,mid,last,sz,compare);\
    282   else {	\
    283     if (compare(first,mid)<0) {			\
    284       if (compare(mid,last)>0) {		\
    285         swapper(mid,last);			\
    286         if (compare(first,mid)>0) swapper(first,mid);\
    287       }						\
    288     }						\
    289     else {					\
    290       if (compare(mid,last)>0) swapper(first,last)\
    291       else {					\
    292         swapper(first,mid);			\
    293         if (compare(mid,last)>0) swapper(mid,last);\
    294       }						\
    295     }						\
    296     first+=sz; last-=sz;			\
    297   }
    298 
    299 #ifdef DEBUG_QSORT
    300 #include <stdio.h>
    301 #endif
    302 
    303 /* and so is the partitioning logic: */
    304 #define Partition(swapper,sz) {			\
    305   do {						\
    306     while (compare(first,pivot)<0) first+=sz;	\
    307     while (compare(pivot,last)<0) last-=sz;	\
    308     if (first<last) {				\
    309       swapper(first,last);			\
    310       first+=sz; last-=sz; }			\
    311     else if (first==last) { first+=sz; last-=sz; break; }\
    312   } while (first<=last);			\
    313 }
    314 
    315 /* and so is the pre-insertion-sort operation of putting
    316  * the smallest element into place as a sentinel.
    317  * Doing this makes the inner loop nicer. I got this
    318  * idea from the GNU implementation of qsort().
    319  * We find the smallest element from the first |nmemb|,
    320  * or the first |limit|, whichever is smaller;
    321  * therefore we must have ensured that the globally smallest
    322  * element is in the first |limit| (because our
    323  * quicksort recursion bottoms out only once we
    324  * reach subarrays smaller than |limit|).
    325  */
    326 #define PreInsertion(swapper,limit,sz)		\
    327   first=base;					\
    328   last=first + ((nmemb>limit ? limit : nmemb)-1)*sz;\
    329   while (last!=base) {				\
    330     if (compare(first,last)>0) first=last;	\
    331     last-=sz; }					\
    332   if (first!=base) swapper(first,(char*)base);
    333 
    334 /* and so is the insertion sort, in the first two cases: */
    335 #define Insertion(swapper)			\
    336   last=((char*)base)+nmemb*size;		\
    337   for (first=((char*)base)+size;first!=last;first+=size) {	\
    338     char *test;					\
    339     /* Find the right place for |first|.	\
    340      * My apologies for var reuse. */		\
    341     for (test=first-size;compare(test,first)>0;test-=size) ;	\
    342     test+=size;					\
    343     if (test!=first) {				\
    344       /* Shift everything in [test,first)	\
    345        * up by one, and place |first|		\
    346        * where |test| is. */			\
    347       memcpy(pivot,first,size);			\
    348       memmove(test+size,test,first-test);	\
    349       memcpy(test,pivot,size);			\
    350     }						\
    351   }
    352 
    353 #define SWAP_nonaligned(a,b) { \
    354   register char *aa=(a),*bb=(b); \
    355   register size_t sz=size; \
    356   do { register char t=*aa; *aa++=*bb; *bb++=t; } while (--sz); }
    357 
    358 #define SWAP_aligned(a,b) { \
    359   register int *aa=(int*)(a),*bb=(int*)(b); \
    360   register size_t sz=size; \
    361   do { register int t=*aa;*aa++=*bb; *bb++=t; } while (sz-=WORD_BYTES); }
    362 
    363 #define SWAP_words(a,b) { \
    364   register int t=*((int*)a); *((int*)a)=*((int*)b); *((int*)b)=t; }
    365 
    366 /* ---------------------------------------------------------------------- */
    367 
    368 static char * pivot_big(char *first, char *mid, char *last, size_t size,
    369                         int compare(const void *, const void *)) {
    370   size_t d=(((last-first)/size)>>3)*size;
    371 #ifdef DEBUG_QSORT
    372 fprintf(stderr, "pivot_big: first=%p last=%p size=%lu n=%lu\n", first, (unsigned long)last, size, (unsigned long)((last-first+1)/size));
    373 #endif
    374   char *m1,*m2,*m3;
    375   { char *a=first, *b=first+d, *c=first+2*d;
    376 #ifdef DEBUG_QSORT
    377 fprintf(stderr,"< %d %d %d @ %p %p %p\n",*(int*)a,*(int*)b,*(int*)c, a,b,c);
    378 #endif
    379     m1 = compare(a,b)<0 ?
    380            (compare(b,c)<0 ? b : (compare(a,c)<0 ? c : a))
    381          : (compare(a,c)<0 ? a : (compare(b,c)<0 ? c : b));
    382   }
    383   { char *a=mid-d, *b=mid, *c=mid+d;
    384 #ifdef DEBUG_QSORT
    385 fprintf(stderr,". %d %d %d @ %p %p %p\n",*(int*)a,*(int*)b,*(int*)c, a,b,c);
    386 #endif
    387     m2 = compare(a,b)<0 ?
    388            (compare(b,c)<0 ? b : (compare(a,c)<0 ? c : a))
    389          : (compare(a,c)<0 ? a : (compare(b,c)<0 ? c : b));
    390   }
    391   { char *a=last-2*d, *b=last-d, *c=last;
    392 #ifdef DEBUG_QSORT
    393 fprintf(stderr,"> %d %d %d @ %p %p %p\n",*(int*)a,*(int*)b,*(int*)c, a,b,c);
    394 #endif
    395     m3 = compare(a,b)<0 ?
    396            (compare(b,c)<0 ? b : (compare(a,c)<0 ? c : a))
    397          : (compare(a,c)<0 ? a : (compare(b,c)<0 ? c : b));
    398   }
    399 #ifdef DEBUG_QSORT
    400 fprintf(stderr,"-> %d %d %d @ %p %p %p\n",*(int*)m1,*(int*)m2,*(int*)m3, m1,m2,m3);
    401 #endif
    402   return compare(m1,m2)<0 ?
    403            (compare(m2,m3)<0 ? m2 : (compare(m1,m3)<0 ? m3 : m1))
    404          : (compare(m1,m3)<0 ? m1 : (compare(m2,m3)<0 ? m3 : m2));
    405 }
    406 
    407 /* ---------------------------------------------------------------------- */
    408 
    409 static void qsort_nonaligned(void *base, size_t nmemb, size_t size,
    410            int (*compare)(const void *, const void *)) {
    411 
    412   stack_entry stack[STACK_SIZE];
    413   int stacktop=0;
    414   char *first,*last;
    415   char *pivot=malloc(size);
    416   size_t trunc=TRUNC_nonaligned*size;
    417   assert(pivot!=0);
    418 
    419   first=(char*)base; last=first+(nmemb-1)*size;
    420 
    421   if ((size_t)(last-first)>=trunc) {
    422     char *ffirst=first, *llast=last;
    423     while (1) {
    424       /* Select pivot */
    425       { char * mid=first+size*((last-first)/size >> 1);
    426         Pivot(SWAP_nonaligned,size);
    427         memcpy(pivot,mid,size);
    428       }
    429       /* Partition. */
    430       Partition(SWAP_nonaligned,size);
    431       /* Prepare to recurse/iterate. */
    432       Recurse(trunc)
    433     }
    434   }
    435   PreInsertion(SWAP_nonaligned,TRUNC_nonaligned,size);
    436   Insertion(SWAP_nonaligned);
    437   free(pivot);
    438 }
    439 
    440 static void qsort_aligned(void *base, size_t nmemb, size_t size,
    441            int (*compare)(const void *, const void *)) {
    442 
    443   stack_entry stack[STACK_SIZE];
    444   int stacktop=0;
    445   char *first,*last;
    446   char *pivot=malloc(size);
    447   size_t trunc=TRUNC_aligned*size;
    448   assert(pivot!=0);
    449 
    450   first=(char*)base; last=first+(nmemb-1)*size;
    451 
    452   if ((size_t)(last-first)>=trunc) {
    453     char *ffirst=first,*llast=last;
    454     while (1) {
    455       /* Select pivot */
    456       { char * mid=first+size*((last-first)/size >> 1);
    457         Pivot(SWAP_aligned,size);
    458         memcpy(pivot,mid,size);
    459       }
    460       /* Partition. */
    461       Partition(SWAP_aligned,size);
    462       /* Prepare to recurse/iterate. */
    463       Recurse(trunc)
    464     }
    465   }
    466   PreInsertion(SWAP_aligned,TRUNC_aligned,size);
    467   Insertion(SWAP_aligned);
    468   free(pivot);
    469 }
    470 
    471 static void qsort_words(void *base, size_t nmemb,
    472            int (*compare)(const void *, const void *)) {
    473 
    474   stack_entry stack[STACK_SIZE];
    475   int stacktop=0;
    476   char *first,*last;
    477   char *pivot=malloc(WORD_BYTES);
    478   assert(pivot!=0);
    479 
    480   first=(char*)base; last=first+(nmemb-1)*WORD_BYTES;
    481 
    482   if (last-first>=TRUNC_words) {
    483     char *ffirst=first, *llast=last;
    484     while (1) {
    485 #ifdef DEBUG_QSORT
    486 fprintf(stderr,"Doing %d:%d: ",
    487         (first-(char*)base)/WORD_BYTES,
    488         (last-(char*)base)/WORD_BYTES);
    489 #endif
    490       /* Select pivot */
    491       { char * mid=first+WORD_BYTES*((last-first) / (2*WORD_BYTES));
    492         Pivot(SWAP_words,WORD_BYTES);
    493         *(int*)pivot=*(int*)mid;
    494 #ifdef DEBUG_QSORT
    495 fprintf(stderr,"pivot = %p = #%lu = %d\n", mid, (unsigned long)(((int*)mid)-((int*)base)), *(int*)mid);
    496 #endif
    497       }
    498       /* Partition. */
    499       Partition(SWAP_words,WORD_BYTES);
    500 #ifdef DEBUG_QSORT
    501 fprintf(stderr, "after partitioning first=#%lu last=#%lu\n", (first-(char*)base)/4lu, (last-(char*)base)/4lu);
    502 #endif
    503       /* Prepare to recurse/iterate. */
    504       Recurse(TRUNC_words)
    505     }
    506   }
    507   PreInsertion(SWAP_words,TRUNC_words/WORD_BYTES,WORD_BYTES);
    508   /* Now do insertion sort. */
    509   last=((char*)base)+nmemb*WORD_BYTES;
    510   for (first=((char*)base)+WORD_BYTES;first!=last;first+=WORD_BYTES) {
    511     /* Find the right place for |first|. My apologies for var reuse */
    512     int *pl=(int*)(first-WORD_BYTES),*pr=(int*)first;
    513     *(int*)pivot=*(int*)first;
    514     for (;compare(pl,pivot)>0;pr=pl,--pl) {
    515       *pr=*pl; }
    516     if (pr!=(int*)first) *pr=*(int*)pivot;
    517   }
    518   free(pivot);
    519 }
    520 
    521 /* ---------------------------------------------------------------------- */
    522 
    523 extern void qsortG(void *base, size_t nmemb, size_t size,
    524            int (*compare)(const void *, const void *)) {
    525 
    526   if (nmemb<=1) return;
    527   if (((size_t)base|size)&(WORD_BYTES-1))
    528     qsort_nonaligned(base,nmemb,size,compare);
    529   else if (size!=WORD_BYTES)
    530     qsort_aligned(base,nmemb,size,compare);
    531   else
    532     qsort_words(base,nmemb,compare);
    533 }
    534 
    535 #endif /* HAVE_QSORT */
    536 
    537 /* vi: set ts=4 sw=4 expandtab: */
    538