SDL_qsort.c (18852B)
1 /* 2 Simple DirectMedia Layer 3 Copyright (C) 1997-2020 Sam Lantinga <slouken@libsdl.org> 4 5 This software is provided 'as-is', without any express or implied 6 warranty. In no event will the authors be held liable for any damages 7 arising from the use of this software. 8 9 Permission is granted to anyone to use this software for any purpose, 10 including commercial applications, and to alter it and redistribute it 11 freely, subject to the following restrictions: 12 13 1. The origin of this software must not be misrepresented; you must not 14 claim that you wrote the original software. If you use this software 15 in a product, an acknowledgment in the product documentation would be 16 appreciated but is not required. 17 2. Altered source versions must be plainly marked as such, and must not be 18 misrepresented as being the original software. 19 3. This notice may not be removed or altered from any source distribution. 20 */ 21 22 #if defined(__clang_analyzer__) && !defined(SDL_DISABLE_ANALYZE_MACROS) 23 #define SDL_DISABLE_ANALYZE_MACROS 1 24 #endif 25 26 #include "../SDL_internal.h" 27 28 #include "SDL_stdinc.h" 29 30 #if defined(HAVE_QSORT) 31 void 32 SDL_qsort(void *base, size_t nmemb, size_t size, int (*compare) (const void *, const void *)) 33 { 34 qsort(base, nmemb, size, compare); 35 } 36 37 #else 38 39 #ifdef assert 40 #undef assert 41 #endif 42 #define assert SDL_assert 43 #ifdef malloc 44 #undef malloc 45 #endif 46 #define malloc SDL_malloc 47 #ifdef free 48 #undef free 49 #endif 50 #define free SDL_free 51 #ifdef memcpy 52 #undef memcpy 53 #endif 54 #define memcpy SDL_memcpy 55 #ifdef memmove 56 #undef memmove 57 #endif 58 #define memmove SDL_memmove 59 #ifdef qsortG 60 #undef qsortG 61 #endif 62 #define qsortG SDL_qsort 63 64 /* 65 This code came from Gareth McCaughan, under the zlib license. 66 Specifically this: https://www.mccaughan.org.uk/software/qsort.c-1.15 67 68 Everything below this comment until the HAVE_QSORT #endif was from Gareth 69 (any minor changes will be noted inline). 70 71 Thank you to Gareth for relicensing this code under the zlib license for our 72 benefit! 73 74 --ryan. 75 */ 76 77 /* This is a drop-in replacement for the C library's |qsort()| routine. 78 * 79 * It is intended for use where you know or suspect that your 80 * platform's qsort is bad. If that isn't the case, then you 81 * should probably use the qsort your system gives you in preference 82 * to mine -- it will likely have been tested and tuned better. 83 * 84 * Features: 85 * - Median-of-three pivoting (and more) 86 * - Truncation and final polishing by a single insertion sort 87 * - Early truncation when no swaps needed in pivoting step 88 * - Explicit recursion, guaranteed not to overflow 89 * - A few little wrinkles stolen from the GNU |qsort()|. 90 * (For the avoidance of doubt, no code was stolen, only 91 * broad ideas.) 92 * - separate code for non-aligned / aligned / word-size objects 93 * 94 * Earlier releases of this code used an idiosyncratic licence 95 * I wrote myself, because I'm an idiot. The code is now released 96 * under the "zlib/libpng licence"; you will find the actual 97 * terms in the next comment. I request (but do not require) 98 * that if you make any changes beyond the name of the exported 99 * routine and reasonable tweaks to the TRUNC_* and 100 * PIVOT_THRESHOLD values, you modify the _ID string so as 101 * to make it clear that you have changed the code. 102 * 103 * If you find problems with this code, or find ways of 104 * making it significantly faster, please let me know! 105 * My e-mail address, valid as of early 2016 and for the 106 * foreseeable future, is 107 * gareth.mccaughan@pobox.com 108 * Thanks! 109 * 110 * Gareth McCaughan 111 */ 112 113 /* Copyright (c) 1998-2016 Gareth McCaughan 114 * 115 * This software is provided 'as-is', without any express or implied 116 * warranty. In no event will the authors be held liable for any 117 * damages arising from the use of this software. 118 * 119 * Permission is granted to anyone to use this software for any purpose, 120 * including commercial applications, and to alter it and redistribute it 121 * freely, subject to the following restrictions: 122 * 123 * 1. The origin of this software must not be misrepresented; 124 * you must not claim that you wrote the original software. 125 * If you use this software in a product, an acknowledgment 126 * in the product documentation would be appreciated but 127 * is not required. 128 * 129 * 2. Altered source versions must be plainly marked as such, 130 * and must not be misrepresented as being the original software. 131 * 132 * 3. This notice may not be removed or altered from any source 133 * distribution. 134 */ 135 136 /* Revision history since release: 137 * 1998-03-19 v1.12 First release I have any records of. 138 * 2007-09-02 v1.13 Fix bug kindly reported by Dan Bodoh 139 * (premature termination of recursion). 140 * Add a few clarifying comments. 141 * Minor improvements to debug output. 142 * 2016-02-21 v1.14 Replace licence with 2-clause BSD, 143 * and clarify a couple of things in 144 * comments. No code changes. 145 * 2016-03-10 v1.15 Fix bug kindly reported by Ryan Gordon 146 * (pre-insertion-sort messed up). 147 * Disable DEBUG_QSORT by default. 148 * Tweak comments very slightly. 149 */ 150 151 /* BEGIN SDL CHANGE ... commented this out with an #if 0 block. --ryan. */ 152 #if 0 153 #include <assert.h> 154 #include <stdlib.h> 155 #include <string.h> 156 157 #undef DEBUG_QSORT 158 159 static char _ID[]="<qsort.c gjm 1.15 2016-03-10>"; 160 #endif 161 /* END SDL CHANGE ... commented this out with an #if 0 block. --ryan. */ 162 163 /* How many bytes are there per word? (Must be a power of 2, 164 * and must in fact equal sizeof(int).) 165 */ 166 #define WORD_BYTES sizeof(int) 167 168 /* How big does our stack need to be? Answer: one entry per 169 * bit in a |size_t|. 170 */ 171 #define STACK_SIZE (8*sizeof(size_t)) 172 173 /* Different situations have slightly different requirements, 174 * and we make life epsilon easier by using different truncation 175 * points for the three different cases. 176 * So far, I have tuned TRUNC_words and guessed that the same 177 * value might work well for the other two cases. Of course 178 * what works well on my machine might work badly on yours. 179 */ 180 #define TRUNC_nonaligned 12 181 #define TRUNC_aligned 12 182 #define TRUNC_words 12*WORD_BYTES /* nb different meaning */ 183 184 /* We use a simple pivoting algorithm for shortish sub-arrays 185 * and a more complicated one for larger ones. The threshold 186 * is PIVOT_THRESHOLD. 187 */ 188 #define PIVOT_THRESHOLD 40 189 190 typedef struct { char * first; char * last; } stack_entry; 191 #define pushLeft {stack[stacktop].first=ffirst;stack[stacktop++].last=last;} 192 #define pushRight {stack[stacktop].first=first;stack[stacktop++].last=llast;} 193 #define doLeft {first=ffirst;llast=last;continue;} 194 #define doRight {ffirst=first;last=llast;continue;} 195 #define pop {if (--stacktop<0) break;\ 196 first=ffirst=stack[stacktop].first;\ 197 last=llast=stack[stacktop].last;\ 198 continue;} 199 200 /* Some comments on the implementation. 201 * 1. When we finish partitioning the array into "low" 202 * and "high", we forget entirely about short subarrays, 203 * because they'll be done later by insertion sort. 204 * Doing lots of little insertion sorts might be a win 205 * on large datasets for locality-of-reference reasons, 206 * but it makes the code much nastier and increases 207 * bookkeeping overhead. 208 * 2. We always save the shorter and get to work on the 209 * longer. This guarantees that every time we push 210 * an item onto the stack its size is <= 1/2 of that 211 * of its parent; so the stack can't need more than 212 * log_2(max-array-size) entries. 213 * 3. We choose a pivot by looking at the first, last 214 * and middle elements. We arrange them into order 215 * because it's easy to do that in conjunction with 216 * choosing the pivot, and it makes things a little 217 * easier in the partitioning step. Anyway, the pivot 218 * is the middle of these three. It's still possible 219 * to construct datasets where the algorithm takes 220 * time of order n^2, but it simply never happens in 221 * practice. 222 * 3' Newsflash: On further investigation I find that 223 * it's easy to construct datasets where median-of-3 224 * simply isn't good enough. So on large-ish subarrays 225 * we do a more sophisticated pivoting: we take three 226 * sets of 3 elements, find their medians, and then 227 * take the median of those. 228 * 4. We copy the pivot element to a separate place 229 * because that way we can always do our comparisons 230 * directly against a pointer to that separate place, 231 * and don't have to wonder "did we move the pivot 232 * element?". This makes the inner loop better. 233 * 5. It's possible to make the pivoting even more 234 * reliable by looking at more candidates when n 235 * is larger. (Taking this to its logical conclusion 236 * results in a variant of quicksort that doesn't 237 * have that n^2 worst case.) However, the overhead 238 * from the extra bookkeeping means that it's just 239 * not worth while. 240 * 6. This is pretty clean and portable code. Here are 241 * all the potential portability pitfalls and problems 242 * I know of: 243 * - In one place (the insertion sort) I construct 244 * a pointer that points just past the end of the 245 * supplied array, and assume that (a) it won't 246 * compare equal to any pointer within the array, 247 * and (b) it will compare equal to a pointer 248 * obtained by stepping off the end of the array. 249 * These might fail on some segmented architectures. 250 * - I assume that there are 8 bits in a |char| when 251 * computing the size of stack needed. This would 252 * fail on machines with 9-bit or 16-bit bytes. 253 * - I assume that if |((int)base&(sizeof(int)-1))==0| 254 * and |(size&(sizeof(int)-1))==0| then it's safe to 255 * get at array elements via |int*|s, and that if 256 * actually |size==sizeof(int)| as well then it's 257 * safe to treat the elements as |int|s. This might 258 * fail on systems that convert pointers to integers 259 * in non-standard ways. 260 * - I assume that |8*sizeof(size_t)<=INT_MAX|. This 261 * would be false on a machine with 8-bit |char|s, 262 * 16-bit |int|s and 4096-bit |size_t|s. :-) 263 */ 264 265 /* The recursion logic is the same in each case. 266 * We keep chopping up until we reach subarrays of size 267 * strictly less than Trunc; we leave these unsorted. */ 268 #define Recurse(Trunc) \ 269 { size_t l=last-ffirst,r=llast-first; \ 270 if (l<Trunc) { \ 271 if (r>=Trunc) doRight \ 272 else pop \ 273 } \ 274 else if (l<=r) { pushLeft; doRight } \ 275 else if (r>=Trunc) { pushRight; doLeft }\ 276 else doLeft \ 277 } 278 279 /* and so is the pivoting logic (note: last is inclusive): */ 280 #define Pivot(swapper,sz) \ 281 if ((size_t)(last-first)>PIVOT_THRESHOLD*sz) mid=pivot_big(first,mid,last,sz,compare);\ 282 else { \ 283 if (compare(first,mid)<0) { \ 284 if (compare(mid,last)>0) { \ 285 swapper(mid,last); \ 286 if (compare(first,mid)>0) swapper(first,mid);\ 287 } \ 288 } \ 289 else { \ 290 if (compare(mid,last)>0) swapper(first,last)\ 291 else { \ 292 swapper(first,mid); \ 293 if (compare(mid,last)>0) swapper(mid,last);\ 294 } \ 295 } \ 296 first+=sz; last-=sz; \ 297 } 298 299 #ifdef DEBUG_QSORT 300 #include <stdio.h> 301 #endif 302 303 /* and so is the partitioning logic: */ 304 #define Partition(swapper,sz) { \ 305 do { \ 306 while (compare(first,pivot)<0) first+=sz; \ 307 while (compare(pivot,last)<0) last-=sz; \ 308 if (first<last) { \ 309 swapper(first,last); \ 310 first+=sz; last-=sz; } \ 311 else if (first==last) { first+=sz; last-=sz; break; }\ 312 } while (first<=last); \ 313 } 314 315 /* and so is the pre-insertion-sort operation of putting 316 * the smallest element into place as a sentinel. 317 * Doing this makes the inner loop nicer. I got this 318 * idea from the GNU implementation of qsort(). 319 * We find the smallest element from the first |nmemb|, 320 * or the first |limit|, whichever is smaller; 321 * therefore we must have ensured that the globally smallest 322 * element is in the first |limit| (because our 323 * quicksort recursion bottoms out only once we 324 * reach subarrays smaller than |limit|). 325 */ 326 #define PreInsertion(swapper,limit,sz) \ 327 first=base; \ 328 last=first + ((nmemb>limit ? limit : nmemb)-1)*sz;\ 329 while (last!=base) { \ 330 if (compare(first,last)>0) first=last; \ 331 last-=sz; } \ 332 if (first!=base) swapper(first,(char*)base); 333 334 /* and so is the insertion sort, in the first two cases: */ 335 #define Insertion(swapper) \ 336 last=((char*)base)+nmemb*size; \ 337 for (first=((char*)base)+size;first!=last;first+=size) { \ 338 char *test; \ 339 /* Find the right place for |first|. \ 340 * My apologies for var reuse. */ \ 341 for (test=first-size;compare(test,first)>0;test-=size) ; \ 342 test+=size; \ 343 if (test!=first) { \ 344 /* Shift everything in [test,first) \ 345 * up by one, and place |first| \ 346 * where |test| is. */ \ 347 memcpy(pivot,first,size); \ 348 memmove(test+size,test,first-test); \ 349 memcpy(test,pivot,size); \ 350 } \ 351 } 352 353 #define SWAP_nonaligned(a,b) { \ 354 register char *aa=(a),*bb=(b); \ 355 register size_t sz=size; \ 356 do { register char t=*aa; *aa++=*bb; *bb++=t; } while (--sz); } 357 358 #define SWAP_aligned(a,b) { \ 359 register int *aa=(int*)(a),*bb=(int*)(b); \ 360 register size_t sz=size; \ 361 do { register int t=*aa;*aa++=*bb; *bb++=t; } while (sz-=WORD_BYTES); } 362 363 #define SWAP_words(a,b) { \ 364 register int t=*((int*)a); *((int*)a)=*((int*)b); *((int*)b)=t; } 365 366 /* ---------------------------------------------------------------------- */ 367 368 static char * pivot_big(char *first, char *mid, char *last, size_t size, 369 int compare(const void *, const void *)) { 370 size_t d=(((last-first)/size)>>3)*size; 371 #ifdef DEBUG_QSORT 372 fprintf(stderr, "pivot_big: first=%p last=%p size=%lu n=%lu\n", first, (unsigned long)last, size, (unsigned long)((last-first+1)/size)); 373 #endif 374 char *m1,*m2,*m3; 375 { char *a=first, *b=first+d, *c=first+2*d; 376 #ifdef DEBUG_QSORT 377 fprintf(stderr,"< %d %d %d @ %p %p %p\n",*(int*)a,*(int*)b,*(int*)c, a,b,c); 378 #endif 379 m1 = compare(a,b)<0 ? 380 (compare(b,c)<0 ? b : (compare(a,c)<0 ? c : a)) 381 : (compare(a,c)<0 ? a : (compare(b,c)<0 ? c : b)); 382 } 383 { char *a=mid-d, *b=mid, *c=mid+d; 384 #ifdef DEBUG_QSORT 385 fprintf(stderr,". %d %d %d @ %p %p %p\n",*(int*)a,*(int*)b,*(int*)c, a,b,c); 386 #endif 387 m2 = compare(a,b)<0 ? 388 (compare(b,c)<0 ? b : (compare(a,c)<0 ? c : a)) 389 : (compare(a,c)<0 ? a : (compare(b,c)<0 ? c : b)); 390 } 391 { char *a=last-2*d, *b=last-d, *c=last; 392 #ifdef DEBUG_QSORT 393 fprintf(stderr,"> %d %d %d @ %p %p %p\n",*(int*)a,*(int*)b,*(int*)c, a,b,c); 394 #endif 395 m3 = compare(a,b)<0 ? 396 (compare(b,c)<0 ? b : (compare(a,c)<0 ? c : a)) 397 : (compare(a,c)<0 ? a : (compare(b,c)<0 ? c : b)); 398 } 399 #ifdef DEBUG_QSORT 400 fprintf(stderr,"-> %d %d %d @ %p %p %p\n",*(int*)m1,*(int*)m2,*(int*)m3, m1,m2,m3); 401 #endif 402 return compare(m1,m2)<0 ? 403 (compare(m2,m3)<0 ? m2 : (compare(m1,m3)<0 ? m3 : m1)) 404 : (compare(m1,m3)<0 ? m1 : (compare(m2,m3)<0 ? m3 : m2)); 405 } 406 407 /* ---------------------------------------------------------------------- */ 408 409 static void qsort_nonaligned(void *base, size_t nmemb, size_t size, 410 int (*compare)(const void *, const void *)) { 411 412 stack_entry stack[STACK_SIZE]; 413 int stacktop=0; 414 char *first,*last; 415 char *pivot=malloc(size); 416 size_t trunc=TRUNC_nonaligned*size; 417 assert(pivot!=0); 418 419 first=(char*)base; last=first+(nmemb-1)*size; 420 421 if ((size_t)(last-first)>=trunc) { 422 char *ffirst=first, *llast=last; 423 while (1) { 424 /* Select pivot */ 425 { char * mid=first+size*((last-first)/size >> 1); 426 Pivot(SWAP_nonaligned,size); 427 memcpy(pivot,mid,size); 428 } 429 /* Partition. */ 430 Partition(SWAP_nonaligned,size); 431 /* Prepare to recurse/iterate. */ 432 Recurse(trunc) 433 } 434 } 435 PreInsertion(SWAP_nonaligned,TRUNC_nonaligned,size); 436 Insertion(SWAP_nonaligned); 437 free(pivot); 438 } 439 440 static void qsort_aligned(void *base, size_t nmemb, size_t size, 441 int (*compare)(const void *, const void *)) { 442 443 stack_entry stack[STACK_SIZE]; 444 int stacktop=0; 445 char *first,*last; 446 char *pivot=malloc(size); 447 size_t trunc=TRUNC_aligned*size; 448 assert(pivot!=0); 449 450 first=(char*)base; last=first+(nmemb-1)*size; 451 452 if ((size_t)(last-first)>=trunc) { 453 char *ffirst=first,*llast=last; 454 while (1) { 455 /* Select pivot */ 456 { char * mid=first+size*((last-first)/size >> 1); 457 Pivot(SWAP_aligned,size); 458 memcpy(pivot,mid,size); 459 } 460 /* Partition. */ 461 Partition(SWAP_aligned,size); 462 /* Prepare to recurse/iterate. */ 463 Recurse(trunc) 464 } 465 } 466 PreInsertion(SWAP_aligned,TRUNC_aligned,size); 467 Insertion(SWAP_aligned); 468 free(pivot); 469 } 470 471 static void qsort_words(void *base, size_t nmemb, 472 int (*compare)(const void *, const void *)) { 473 474 stack_entry stack[STACK_SIZE]; 475 int stacktop=0; 476 char *first,*last; 477 char *pivot=malloc(WORD_BYTES); 478 assert(pivot!=0); 479 480 first=(char*)base; last=first+(nmemb-1)*WORD_BYTES; 481 482 if (last-first>=TRUNC_words) { 483 char *ffirst=first, *llast=last; 484 while (1) { 485 #ifdef DEBUG_QSORT 486 fprintf(stderr,"Doing %d:%d: ", 487 (first-(char*)base)/WORD_BYTES, 488 (last-(char*)base)/WORD_BYTES); 489 #endif 490 /* Select pivot */ 491 { char * mid=first+WORD_BYTES*((last-first) / (2*WORD_BYTES)); 492 Pivot(SWAP_words,WORD_BYTES); 493 *(int*)pivot=*(int*)mid; 494 #ifdef DEBUG_QSORT 495 fprintf(stderr,"pivot = %p = #%lu = %d\n", mid, (unsigned long)(((int*)mid)-((int*)base)), *(int*)mid); 496 #endif 497 } 498 /* Partition. */ 499 Partition(SWAP_words,WORD_BYTES); 500 #ifdef DEBUG_QSORT 501 fprintf(stderr, "after partitioning first=#%lu last=#%lu\n", (first-(char*)base)/4lu, (last-(char*)base)/4lu); 502 #endif 503 /* Prepare to recurse/iterate. */ 504 Recurse(TRUNC_words) 505 } 506 } 507 PreInsertion(SWAP_words,TRUNC_words/WORD_BYTES,WORD_BYTES); 508 /* Now do insertion sort. */ 509 last=((char*)base)+nmemb*WORD_BYTES; 510 for (first=((char*)base)+WORD_BYTES;first!=last;first+=WORD_BYTES) { 511 /* Find the right place for |first|. My apologies for var reuse */ 512 int *pl=(int*)(first-WORD_BYTES),*pr=(int*)first; 513 *(int*)pivot=*(int*)first; 514 for (;compare(pl,pivot)>0;pr=pl,--pl) { 515 *pr=*pl; } 516 if (pr!=(int*)first) *pr=*(int*)pivot; 517 } 518 free(pivot); 519 } 520 521 /* ---------------------------------------------------------------------- */ 522 523 extern void qsortG(void *base, size_t nmemb, size_t size, 524 int (*compare)(const void *, const void *)) { 525 526 if (nmemb<=1) return; 527 if (((size_t)base|size)&(WORD_BYTES-1)) 528 qsort_nonaligned(base,nmemb,size,compare); 529 else if (size!=WORD_BYTES) 530 qsort_aligned(base,nmemb,size,compare); 531 else 532 qsort_words(base,nmemb,compare); 533 } 534 535 #endif /* HAVE_QSORT */ 536 537 /* vi: set ts=4 sw=4 expandtab: */ 538