SDL_cpuinfo.c (33209B)
1 /* 2 Simple DirectMedia Layer 3 Copyright (C) 1997-2020 Sam Lantinga <slouken@libsdl.org> 4 5 This software is provided 'as-is', without any express or implied 6 warranty. In no event will the authors be held liable for any damages 7 arising from the use of this software. 8 9 Permission is granted to anyone to use this software for any purpose, 10 including commercial applications, and to alter it and redistribute it 11 freely, subject to the following restrictions: 12 13 1. The origin of this software must not be misrepresented; you must not 14 claim that you wrote the original software. If you use this software 15 in a product, an acknowledgment in the product documentation would be 16 appreciated but is not required. 17 2. Altered source versions must be plainly marked as such, and must not be 18 misrepresented as being the original software. 19 3. This notice may not be removed or altered from any source distribution. 20 */ 21 #ifdef TEST_MAIN 22 #include "SDL_config.h" 23 #else 24 #include "../SDL_internal.h" 25 #endif 26 27 #if defined(__WIN32__) || defined(__WINRT__) 28 #include "../core/windows/SDL_windows.h" 29 #endif 30 #if defined(__OS2__) 31 #undef HAVE_SYSCTLBYNAME 32 #define INCL_DOS 33 #include <os2.h> 34 #ifndef QSV_NUMPROCESSORS 35 #define QSV_NUMPROCESSORS 26 36 #endif 37 #endif 38 39 /* CPU feature detection for SDL */ 40 41 #include "SDL_cpuinfo.h" 42 #include "SDL_assert.h" 43 44 #ifdef HAVE_SYSCONF 45 #include <unistd.h> 46 #endif 47 #ifdef HAVE_SYSCTLBYNAME 48 #include <sys/types.h> 49 #include <sys/sysctl.h> 50 #endif 51 #if defined(__MACOSX__) && (defined(__ppc__) || defined(__ppc64__)) 52 #include <sys/sysctl.h> /* For AltiVec check */ 53 #elif (defined(__OpenBSD__) || defined(__FreeBSD__)) && defined(__powerpc__) 54 #include <sys/param.h> 55 #include <sys/sysctl.h> /* For AltiVec check */ 56 #include <machine/cpu.h> 57 #elif SDL_ALTIVEC_BLITTERS && HAVE_SETJMP 58 #include <signal.h> 59 #include <setjmp.h> 60 #endif 61 62 #if defined(__QNXNTO__) 63 #include <sys/syspage.h> 64 #endif 65 66 #if (defined(__LINUX__) || defined(__ANDROID__)) && defined(__ARM_ARCH) 67 /*#include <asm/hwcap.h>*/ 68 #ifndef AT_HWCAP 69 #define AT_HWCAP 16 70 #endif 71 #ifndef AT_PLATFORM 72 #define AT_PLATFORM 15 73 #endif 74 /* Prevent compilation error when including elf.h would also try to define AT_* as an enum */ 75 #ifndef AT_NULL 76 #define AT_NULL 0 77 #endif 78 #ifndef HWCAP_NEON 79 #define HWCAP_NEON (1 << 12) 80 #endif 81 #if defined HAVE_GETAUXVAL 82 #include <sys/auxv.h> 83 #else 84 #include <fcntl.h> 85 #endif 86 #endif 87 88 #if defined(__ANDROID__) && defined(__ARM_ARCH) && !defined(HAVE_GETAUXVAL) 89 #if __ARM_ARCH < 8 90 #include <cpu-features.h> 91 #endif 92 #endif 93 94 #if defined(HAVE_ELF_AUX_INFO) 95 #include <sys/auxv.h> 96 #endif 97 98 #ifdef __RISCOS__ 99 #include <kernel.h> 100 #include <swis.h> 101 #endif 102 103 #define CPU_HAS_RDTSC (1 << 0) 104 #define CPU_HAS_ALTIVEC (1 << 1) 105 #define CPU_HAS_MMX (1 << 2) 106 #define CPU_HAS_3DNOW (1 << 3) 107 #define CPU_HAS_SSE (1 << 4) 108 #define CPU_HAS_SSE2 (1 << 5) 109 #define CPU_HAS_SSE3 (1 << 6) 110 #define CPU_HAS_SSE41 (1 << 7) 111 #define CPU_HAS_SSE42 (1 << 8) 112 #define CPU_HAS_AVX (1 << 9) 113 #define CPU_HAS_AVX2 (1 << 10) 114 #define CPU_HAS_NEON (1 << 11) 115 #define CPU_HAS_AVX512F (1 << 12) 116 #define CPU_HAS_ARM_SIMD (1 << 13) 117 118 #if SDL_ALTIVEC_BLITTERS && HAVE_SETJMP && !__MACOSX__ && !__OpenBSD__ 119 /* This is the brute force way of detecting instruction sets... 120 the idea is borrowed from the libmpeg2 library - thanks! 121 */ 122 static jmp_buf jmpbuf; 123 static void 124 illegal_instruction(int sig) 125 { 126 longjmp(jmpbuf, 1); 127 } 128 #endif /* HAVE_SETJMP */ 129 130 static int 131 CPU_haveCPUID(void) 132 { 133 int has_CPUID = 0; 134 135 /* *INDENT-OFF* */ 136 #ifndef SDL_CPUINFO_DISABLED 137 #if (defined(__GNUC__) || defined(__clang__)) && defined(i386) 138 __asm__ ( 139 " pushfl # Get original EFLAGS \n" 140 " popl %%eax \n" 141 " movl %%eax,%%ecx \n" 142 " xorl $0x200000,%%eax # Flip ID bit in EFLAGS \n" 143 " pushl %%eax # Save new EFLAGS value on stack \n" 144 " popfl # Replace current EFLAGS value \n" 145 " pushfl # Get new EFLAGS \n" 146 " popl %%eax # Store new EFLAGS in EAX \n" 147 " xorl %%ecx,%%eax # Can not toggle ID bit, \n" 148 " jz 1f # Processor=80486 \n" 149 " movl $1,%0 # We have CPUID support \n" 150 "1: \n" 151 : "=m" (has_CPUID) 152 : 153 : "%eax", "%ecx" 154 ); 155 #elif (defined(__GNUC__) || defined(__clang__)) && defined(__x86_64__) 156 /* Technically, if this is being compiled under __x86_64__ then it has 157 CPUid by definition. But it's nice to be able to prove it. :) */ 158 __asm__ ( 159 " pushfq # Get original EFLAGS \n" 160 " popq %%rax \n" 161 " movq %%rax,%%rcx \n" 162 " xorl $0x200000,%%eax # Flip ID bit in EFLAGS \n" 163 " pushq %%rax # Save new EFLAGS value on stack \n" 164 " popfq # Replace current EFLAGS value \n" 165 " pushfq # Get new EFLAGS \n" 166 " popq %%rax # Store new EFLAGS in EAX \n" 167 " xorl %%ecx,%%eax # Can not toggle ID bit, \n" 168 " jz 1f # Processor=80486 \n" 169 " movl $1,%0 # We have CPUID support \n" 170 "1: \n" 171 : "=m" (has_CPUID) 172 : 173 : "%rax", "%rcx" 174 ); 175 #elif (defined(_MSC_VER) && defined(_M_IX86)) || defined(__WATCOMC__) 176 __asm { 177 pushfd ; Get original EFLAGS 178 pop eax 179 mov ecx, eax 180 xor eax, 200000h ; Flip ID bit in EFLAGS 181 push eax ; Save new EFLAGS value on stack 182 popfd ; Replace current EFLAGS value 183 pushfd ; Get new EFLAGS 184 pop eax ; Store new EFLAGS in EAX 185 xor eax, ecx ; Can not toggle ID bit, 186 jz done ; Processor=80486 187 mov has_CPUID,1 ; We have CPUID support 188 done: 189 } 190 #elif defined(_MSC_VER) && defined(_M_X64) 191 has_CPUID = 1; 192 #elif defined(__sun) && defined(__i386) 193 __asm ( 194 " pushfl \n" 195 " popl %eax \n" 196 " movl %eax,%ecx \n" 197 " xorl $0x200000,%eax \n" 198 " pushl %eax \n" 199 " popfl \n" 200 " pushfl \n" 201 " popl %eax \n" 202 " xorl %ecx,%eax \n" 203 " jz 1f \n" 204 " movl $1,-8(%ebp) \n" 205 "1: \n" 206 ); 207 #elif defined(__sun) && defined(__amd64) 208 __asm ( 209 " pushfq \n" 210 " popq %rax \n" 211 " movq %rax,%rcx \n" 212 " xorl $0x200000,%eax \n" 213 " pushq %rax \n" 214 " popfq \n" 215 " pushfq \n" 216 " popq %rax \n" 217 " xorl %ecx,%eax \n" 218 " jz 1f \n" 219 " movl $1,-8(%rbp) \n" 220 "1: \n" 221 ); 222 #endif 223 #endif 224 /* *INDENT-ON* */ 225 return has_CPUID; 226 } 227 228 #if (defined(__GNUC__) || defined(__clang__)) && defined(i386) 229 #define cpuid(func, a, b, c, d) \ 230 __asm__ __volatile__ ( \ 231 " pushl %%ebx \n" \ 232 " xorl %%ecx,%%ecx \n" \ 233 " cpuid \n" \ 234 " movl %%ebx, %%esi \n" \ 235 " popl %%ebx \n" : \ 236 "=a" (a), "=S" (b), "=c" (c), "=d" (d) : "a" (func)) 237 #elif (defined(__GNUC__) || defined(__clang__)) && defined(__x86_64__) 238 #define cpuid(func, a, b, c, d) \ 239 __asm__ __volatile__ ( \ 240 " pushq %%rbx \n" \ 241 " xorq %%rcx,%%rcx \n" \ 242 " cpuid \n" \ 243 " movq %%rbx, %%rsi \n" \ 244 " popq %%rbx \n" : \ 245 "=a" (a), "=S" (b), "=c" (c), "=d" (d) : "a" (func)) 246 #elif (defined(_MSC_VER) && defined(_M_IX86)) || defined(__WATCOMC__) 247 #define cpuid(func, a, b, c, d) \ 248 __asm { \ 249 __asm mov eax, func \ 250 __asm xor ecx, ecx \ 251 __asm cpuid \ 252 __asm mov a, eax \ 253 __asm mov b, ebx \ 254 __asm mov c, ecx \ 255 __asm mov d, edx \ 256 } 257 #elif defined(_MSC_VER) && defined(_M_X64) 258 #define cpuid(func, a, b, c, d) \ 259 { \ 260 int CPUInfo[4]; \ 261 __cpuid(CPUInfo, func); \ 262 a = CPUInfo[0]; \ 263 b = CPUInfo[1]; \ 264 c = CPUInfo[2]; \ 265 d = CPUInfo[3]; \ 266 } 267 #else 268 #define cpuid(func, a, b, c, d) \ 269 do { a = b = c = d = 0; (void) a; (void) b; (void) c; (void) d; } while (0) 270 #endif 271 272 static int CPU_CPUIDFeatures[4]; 273 static int CPU_CPUIDMaxFunction = 0; 274 static SDL_bool CPU_OSSavesYMM = SDL_FALSE; 275 static SDL_bool CPU_OSSavesZMM = SDL_FALSE; 276 277 static void 278 CPU_calcCPUIDFeatures(void) 279 { 280 static SDL_bool checked = SDL_FALSE; 281 if (!checked) { 282 checked = SDL_TRUE; 283 if (CPU_haveCPUID()) { 284 int a, b, c, d; 285 cpuid(0, a, b, c, d); 286 CPU_CPUIDMaxFunction = a; 287 if (CPU_CPUIDMaxFunction >= 1) { 288 cpuid(1, a, b, c, d); 289 CPU_CPUIDFeatures[0] = a; 290 CPU_CPUIDFeatures[1] = b; 291 CPU_CPUIDFeatures[2] = c; 292 CPU_CPUIDFeatures[3] = d; 293 294 /* Check to make sure we can call xgetbv */ 295 if (c & 0x08000000) { 296 /* Call xgetbv to see if YMM (etc) register state is saved */ 297 #if (defined(__GNUC__) || defined(__clang__)) && (defined(i386) || defined(__x86_64__)) 298 __asm__(".byte 0x0f, 0x01, 0xd0" : "=a" (a) : "c" (0) : "%edx"); 299 #elif defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64)) && (_MSC_FULL_VER >= 160040219) /* VS2010 SP1 */ 300 a = (int)_xgetbv(0); 301 #elif (defined(_MSC_VER) && defined(_M_IX86)) || defined(__WATCOMC__) 302 __asm 303 { 304 xor ecx, ecx 305 _asm _emit 0x0f _asm _emit 0x01 _asm _emit 0xd0 306 mov a, eax 307 } 308 #endif 309 CPU_OSSavesYMM = ((a & 6) == 6) ? SDL_TRUE : SDL_FALSE; 310 CPU_OSSavesZMM = (CPU_OSSavesYMM && ((a & 0xe0) == 0xe0)) ? SDL_TRUE : SDL_FALSE; 311 } 312 } 313 } 314 } 315 } 316 317 static int 318 CPU_haveAltiVec(void) 319 { 320 volatile int altivec = 0; 321 #ifndef SDL_CPUINFO_DISABLED 322 #if (defined(__MACOSX__) && (defined(__ppc__) || defined(__ppc64__))) || (defined(__OpenBSD__) && defined(__powerpc__)) || (defined(__FreeBSD__) && defined(__powerpc__)) 323 #ifdef __OpenBSD__ 324 int selectors[2] = { CTL_MACHDEP, CPU_ALTIVEC }; 325 #elif defined(__FreeBSD__) 326 int selectors[2] = { CTL_HW, PPC_FEATURE_HAS_ALTIVEC }; 327 #else 328 int selectors[2] = { CTL_HW, HW_VECTORUNIT }; 329 #endif 330 int hasVectorUnit = 0; 331 size_t length = sizeof(hasVectorUnit); 332 int error = sysctl(selectors, 2, &hasVectorUnit, &length, NULL, 0); 333 if (0 == error) 334 altivec = (hasVectorUnit != 0); 335 #elif SDL_ALTIVEC_BLITTERS && HAVE_SETJMP 336 void (*handler) (int sig); 337 handler = signal(SIGILL, illegal_instruction); 338 if (setjmp(jmpbuf) == 0) { 339 asm volatile ("mtspr 256, %0\n\t" "vand %%v0, %%v0, %%v0"::"r" (-1)); 340 altivec = 1; 341 } 342 signal(SIGILL, handler); 343 #endif 344 #endif 345 return altivec; 346 } 347 348 #if defined(__ARM_ARCH) && (__ARM_ARCH >= 6) 349 static int 350 CPU_haveARMSIMD(void) 351 { 352 return 1; 353 } 354 355 #elif !defined(__arm__) 356 static int 357 CPU_haveARMSIMD(void) 358 { 359 return 0; 360 } 361 362 #elif defined(__LINUX__) 363 #include <unistd.h> 364 #include <sys/types.h> 365 #include <sys/stat.h> 366 #include <fcntl.h> 367 #include <elf.h> 368 369 static int 370 CPU_haveARMSIMD(void) 371 { 372 int arm_simd = 0; 373 int fd; 374 375 fd = open("/proc/self/auxv", O_RDONLY); 376 if (fd >= 0) 377 { 378 Elf32_auxv_t aux; 379 while (read(fd, &aux, sizeof aux) == sizeof aux) 380 { 381 if (aux.a_type == AT_PLATFORM) 382 { 383 const char *plat = (const char *) aux.a_un.a_val; 384 if (plat) { 385 arm_simd = strncmp(plat, "v6l", 3) == 0 || 386 strncmp(plat, "v7l", 3) == 0; 387 } 388 } 389 } 390 close(fd); 391 } 392 return arm_simd; 393 } 394 395 #elif defined(__RISCOS__) 396 397 static int 398 CPU_haveARMSIMD(void) 399 { 400 _kernel_swi_regs regs; 401 regs.r[0] = 0; 402 if (_kernel_swi(OS_PlatformFeatures, ®s, ®s) != NULL) 403 return 0; 404 405 if (!(regs.r[0] & (1<<31))) 406 return 0; 407 408 regs.r[0] = 34; 409 regs.r[1] = 29; 410 if (_kernel_swi(OS_PlatformFeatures, ®s, ®s) != NULL) 411 return 0; 412 413 return regs.r[0]; 414 } 415 416 #else 417 static int 418 CPU_haveARMSIMD(void) 419 { 420 #warning SDL_HasARMSIMD is not implemented for this ARM platform. Write me. 421 return 0; 422 } 423 #endif 424 425 #if defined(__LINUX__) && defined(__ARM_ARCH) && !defined(HAVE_GETAUXVAL) 426 static int 427 readProcAuxvForNeon(void) 428 { 429 int neon = 0; 430 int kv[2]; 431 const int fd = open("/proc/self/auxv", O_RDONLY); 432 if (fd != -1) { 433 while (read(fd, kv, sizeof (kv)) == sizeof (kv)) { 434 if (kv[0] == AT_HWCAP) { 435 neon = ((kv[1] & HWCAP_NEON) == HWCAP_NEON); 436 break; 437 } 438 } 439 close(fd); 440 } 441 return neon; 442 } 443 #endif 444 445 static int 446 CPU_haveNEON(void) 447 { 448 /* The way you detect NEON is a privileged instruction on ARM, so you have 449 query the OS kernel in a platform-specific way. :/ */ 450 #if defined(SDL_CPUINFO_DISABLED) 451 return 0; /* disabled */ 452 #elif (defined(__WINDOWS__) || defined(__WINRT__)) && (defined(_M_ARM) || defined(_M_ARM64)) 453 /* Visual Studio, for ARM, doesn't define __ARM_ARCH. Handle this first. */ 454 /* Seems to have been removed */ 455 # if !defined(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE) 456 # define PF_ARM_NEON_INSTRUCTIONS_AVAILABLE 19 457 # endif 458 /* All WinRT ARM devices are required to support NEON, but just in case. */ 459 return IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE) != 0; 460 #elif defined(__ARM_ARCH) && (__ARM_ARCH >= 8) 461 return 1; /* ARMv8 always has non-optional NEON support. */ 462 #elif defined(__APPLE__) && defined(__ARM_ARCH) && (__ARM_ARCH >= 7) 463 /* (note that sysctlbyname("hw.optional.neon") doesn't work!) */ 464 return 1; /* all Apple ARMv7 chips and later have NEON. */ 465 #elif defined(__APPLE__) 466 return 0; /* assume anything else from Apple doesn't have NEON. */ 467 #elif defined(__OpenBSD__) 468 return 1; /* OpenBSD only supports ARMv7 CPUs that have NEON. */ 469 #elif defined(HAVE_ELF_AUX_INFO) && defined(HWCAP_NEON) 470 unsigned long hasneon = 0; 471 if (elf_aux_info(AT_HWCAP, (void *)&hasneon, (int)sizeof(hasneon)) != 0) 472 return 0; 473 return ((hasneon & HWCAP_NEON) == HWCAP_NEON); 474 #elif !defined(__arm__) 475 return 0; /* not an ARM CPU at all. */ 476 #elif defined(__QNXNTO__) 477 return SYSPAGE_ENTRY(cpuinfo)->flags & ARM_CPU_FLAG_NEON; 478 #elif (defined(__LINUX__) || defined(__ANDROID__)) && defined(HAVE_GETAUXVAL) 479 return ((getauxval(AT_HWCAP) & HWCAP_NEON) == HWCAP_NEON); 480 #elif defined(__LINUX__) 481 return readProcAuxvForNeon(); 482 #elif defined(__ANDROID__) 483 /* Use NDK cpufeatures to read either /proc/self/auxv or /proc/cpuinfo */ 484 { 485 AndroidCpuFamily cpu_family = android_getCpuFamily(); 486 if (cpu_family == ANDROID_CPU_FAMILY_ARM) { 487 uint64_t cpu_features = android_getCpuFeatures(); 488 if ((cpu_features & ANDROID_CPU_ARM_FEATURE_NEON) != 0) { 489 return 1; 490 } 491 } 492 return 0; 493 } 494 #elif defined(__RISCOS__) 495 /* Use the VFPSupport_Features SWI to access the MVFR registers */ 496 { 497 _kernel_swi_regs regs; 498 regs.r[0] = 0; 499 if (_kernel_swi(VFPSupport_Features, ®s, ®s) == NULL) { 500 if ((regs.r[2] & 0xFFF000) == 0x111000) { 501 return 1; 502 } 503 } 504 return 0; 505 } 506 #else 507 #warning SDL_HasNEON is not implemented for this ARM platform. Write me. 508 return 0; 509 #endif 510 } 511 512 static int 513 CPU_have3DNow(void) 514 { 515 if (CPU_CPUIDMaxFunction > 0) { /* that is, do we have CPUID at all? */ 516 int a, b, c, d; 517 cpuid(0x80000000, a, b, c, d); 518 if (a >= 0x80000001) { 519 cpuid(0x80000001, a, b, c, d); 520 return (d & 0x80000000); 521 } 522 } 523 return 0; 524 } 525 526 #define CPU_haveRDTSC() (CPU_CPUIDFeatures[3] & 0x00000010) 527 #define CPU_haveMMX() (CPU_CPUIDFeatures[3] & 0x00800000) 528 #define CPU_haveSSE() (CPU_CPUIDFeatures[3] & 0x02000000) 529 #define CPU_haveSSE2() (CPU_CPUIDFeatures[3] & 0x04000000) 530 #define CPU_haveSSE3() (CPU_CPUIDFeatures[2] & 0x00000001) 531 #define CPU_haveSSE41() (CPU_CPUIDFeatures[2] & 0x00080000) 532 #define CPU_haveSSE42() (CPU_CPUIDFeatures[2] & 0x00100000) 533 #define CPU_haveAVX() (CPU_OSSavesYMM && (CPU_CPUIDFeatures[2] & 0x10000000)) 534 535 static int 536 CPU_haveAVX2(void) 537 { 538 if (CPU_OSSavesYMM && (CPU_CPUIDMaxFunction >= 7)) { 539 int a, b, c, d; 540 (void) a; (void) b; (void) c; (void) d; /* compiler warnings... */ 541 cpuid(7, a, b, c, d); 542 return (b & 0x00000020); 543 } 544 return 0; 545 } 546 547 static int 548 CPU_haveAVX512F(void) 549 { 550 if (CPU_OSSavesZMM && (CPU_CPUIDMaxFunction >= 7)) { 551 int a, b, c, d; 552 (void) a; (void) b; (void) c; (void) d; /* compiler warnings... */ 553 cpuid(7, a, b, c, d); 554 return (b & 0x00010000); 555 } 556 return 0; 557 } 558 559 static int SDL_CPUCount = 0; 560 561 int 562 SDL_GetCPUCount(void) 563 { 564 if (!SDL_CPUCount) { 565 #ifndef SDL_CPUINFO_DISABLED 566 #if defined(HAVE_SYSCONF) && defined(_SC_NPROCESSORS_ONLN) 567 if (SDL_CPUCount <= 0) { 568 SDL_CPUCount = (int)sysconf(_SC_NPROCESSORS_ONLN); 569 } 570 #endif 571 #ifdef HAVE_SYSCTLBYNAME 572 if (SDL_CPUCount <= 0) { 573 size_t size = sizeof(SDL_CPUCount); 574 sysctlbyname("hw.ncpu", &SDL_CPUCount, &size, NULL, 0); 575 } 576 #endif 577 #ifdef __WIN32__ 578 if (SDL_CPUCount <= 0) { 579 SYSTEM_INFO info; 580 GetSystemInfo(&info); 581 SDL_CPUCount = info.dwNumberOfProcessors; 582 } 583 #endif 584 #ifdef __OS2__ 585 if (SDL_CPUCount <= 0) { 586 DosQuerySysInfo(QSV_NUMPROCESSORS, QSV_NUMPROCESSORS, 587 &SDL_CPUCount, sizeof(SDL_CPUCount) ); 588 } 589 #endif 590 #endif 591 /* There has to be at least 1, right? :) */ 592 if (SDL_CPUCount <= 0) { 593 SDL_CPUCount = 1; 594 } 595 } 596 return SDL_CPUCount; 597 } 598 599 /* Oh, such a sweet sweet trick, just not very useful. :) */ 600 static const char * 601 SDL_GetCPUType(void) 602 { 603 static char SDL_CPUType[13]; 604 605 if (!SDL_CPUType[0]) { 606 int i = 0; 607 608 CPU_calcCPUIDFeatures(); 609 if (CPU_CPUIDMaxFunction > 0) { /* do we have CPUID at all? */ 610 int a, b, c, d; 611 cpuid(0x00000000, a, b, c, d); 612 (void) a; 613 SDL_CPUType[i++] = (char)(b & 0xff); b >>= 8; 614 SDL_CPUType[i++] = (char)(b & 0xff); b >>= 8; 615 SDL_CPUType[i++] = (char)(b & 0xff); b >>= 8; 616 SDL_CPUType[i++] = (char)(b & 0xff); 617 618 SDL_CPUType[i++] = (char)(d & 0xff); d >>= 8; 619 SDL_CPUType[i++] = (char)(d & 0xff); d >>= 8; 620 SDL_CPUType[i++] = (char)(d & 0xff); d >>= 8; 621 SDL_CPUType[i++] = (char)(d & 0xff); 622 623 SDL_CPUType[i++] = (char)(c & 0xff); c >>= 8; 624 SDL_CPUType[i++] = (char)(c & 0xff); c >>= 8; 625 SDL_CPUType[i++] = (char)(c & 0xff); c >>= 8; 626 SDL_CPUType[i++] = (char)(c & 0xff); 627 } 628 if (!SDL_CPUType[0]) { 629 SDL_strlcpy(SDL_CPUType, "Unknown", sizeof(SDL_CPUType)); 630 } 631 } 632 return SDL_CPUType; 633 } 634 635 636 #ifdef TEST_MAIN /* !!! FIXME: only used for test at the moment. */ 637 static const char * 638 SDL_GetCPUName(void) 639 { 640 static char SDL_CPUName[48]; 641 642 if (!SDL_CPUName[0]) { 643 int i = 0; 644 int a, b, c, d; 645 646 CPU_calcCPUIDFeatures(); 647 if (CPU_CPUIDMaxFunction > 0) { /* do we have CPUID at all? */ 648 cpuid(0x80000000, a, b, c, d); 649 if (a >= 0x80000004) { 650 cpuid(0x80000002, a, b, c, d); 651 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8; 652 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8; 653 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8; 654 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8; 655 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8; 656 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8; 657 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8; 658 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8; 659 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8; 660 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8; 661 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8; 662 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8; 663 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8; 664 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8; 665 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8; 666 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8; 667 cpuid(0x80000003, a, b, c, d); 668 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8; 669 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8; 670 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8; 671 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8; 672 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8; 673 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8; 674 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8; 675 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8; 676 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8; 677 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8; 678 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8; 679 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8; 680 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8; 681 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8; 682 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8; 683 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8; 684 cpuid(0x80000004, a, b, c, d); 685 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8; 686 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8; 687 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8; 688 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8; 689 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8; 690 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8; 691 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8; 692 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8; 693 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8; 694 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8; 695 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8; 696 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8; 697 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8; 698 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8; 699 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8; 700 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8; 701 } 702 } 703 if (!SDL_CPUName[0]) { 704 SDL_strlcpy(SDL_CPUName, "Unknown", sizeof(SDL_CPUName)); 705 } 706 } 707 return SDL_CPUName; 708 } 709 #endif 710 711 int 712 SDL_GetCPUCacheLineSize(void) 713 { 714 const char *cpuType = SDL_GetCPUType(); 715 int a, b, c, d; 716 (void) a; (void) b; (void) c; (void) d; 717 if (SDL_strcmp(cpuType, "GenuineIntel") == 0 || SDL_strcmp(cpuType, "CentaurHauls") == 0 || SDL_strcmp(cpuType, " Shanghai ") == 0) { 718 cpuid(0x00000001, a, b, c, d); 719 return (((b >> 8) & 0xff) * 8); 720 } else if (SDL_strcmp(cpuType, "AuthenticAMD") == 0 || SDL_strcmp(cpuType, "HygonGenuine") == 0) { 721 cpuid(0x80000005, a, b, c, d); 722 return (c & 0xff); 723 } else { 724 /* Just make a guess here... */ 725 return SDL_CACHELINE_SIZE; 726 } 727 } 728 729 static Uint32 SDL_CPUFeatures = 0xFFFFFFFF; 730 static Uint32 SDL_SIMDAlignment = 0xFFFFFFFF; 731 732 static Uint32 733 SDL_GetCPUFeatures(void) 734 { 735 if (SDL_CPUFeatures == 0xFFFFFFFF) { 736 CPU_calcCPUIDFeatures(); 737 SDL_CPUFeatures = 0; 738 SDL_SIMDAlignment = sizeof(void *); /* a good safe base value */ 739 if (CPU_haveRDTSC()) { 740 SDL_CPUFeatures |= CPU_HAS_RDTSC; 741 } 742 if (CPU_haveAltiVec()) { 743 SDL_CPUFeatures |= CPU_HAS_ALTIVEC; 744 SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16); 745 } 746 if (CPU_haveMMX()) { 747 SDL_CPUFeatures |= CPU_HAS_MMX; 748 SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 8); 749 } 750 if (CPU_have3DNow()) { 751 SDL_CPUFeatures |= CPU_HAS_3DNOW; 752 SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 8); 753 } 754 if (CPU_haveSSE()) { 755 SDL_CPUFeatures |= CPU_HAS_SSE; 756 SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16); 757 } 758 if (CPU_haveSSE2()) { 759 SDL_CPUFeatures |= CPU_HAS_SSE2; 760 SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16); 761 } 762 if (CPU_haveSSE3()) { 763 SDL_CPUFeatures |= CPU_HAS_SSE3; 764 SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16); 765 } 766 if (CPU_haveSSE41()) { 767 SDL_CPUFeatures |= CPU_HAS_SSE41; 768 SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16); 769 } 770 if (CPU_haveSSE42()) { 771 SDL_CPUFeatures |= CPU_HAS_SSE42; 772 SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16); 773 } 774 if (CPU_haveAVX()) { 775 SDL_CPUFeatures |= CPU_HAS_AVX; 776 SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 32); 777 } 778 if (CPU_haveAVX2()) { 779 SDL_CPUFeatures |= CPU_HAS_AVX2; 780 SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 32); 781 } 782 if (CPU_haveAVX512F()) { 783 SDL_CPUFeatures |= CPU_HAS_AVX512F; 784 SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 64); 785 } 786 if (CPU_haveARMSIMD()) { 787 SDL_CPUFeatures |= CPU_HAS_ARM_SIMD; 788 SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16); 789 } 790 if (CPU_haveNEON()) { 791 SDL_CPUFeatures |= CPU_HAS_NEON; 792 SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16); 793 } 794 } 795 return SDL_CPUFeatures; 796 } 797 798 #define CPU_FEATURE_AVAILABLE(f) ((SDL_GetCPUFeatures() & f) ? SDL_TRUE : SDL_FALSE) 799 800 SDL_bool SDL_HasRDTSC(void) 801 { 802 return CPU_FEATURE_AVAILABLE(CPU_HAS_RDTSC); 803 } 804 805 SDL_bool 806 SDL_HasAltiVec(void) 807 { 808 return CPU_FEATURE_AVAILABLE(CPU_HAS_ALTIVEC); 809 } 810 811 SDL_bool 812 SDL_HasMMX(void) 813 { 814 return CPU_FEATURE_AVAILABLE(CPU_HAS_MMX); 815 } 816 817 SDL_bool 818 SDL_Has3DNow(void) 819 { 820 return CPU_FEATURE_AVAILABLE(CPU_HAS_3DNOW); 821 } 822 823 SDL_bool 824 SDL_HasSSE(void) 825 { 826 return CPU_FEATURE_AVAILABLE(CPU_HAS_SSE); 827 } 828 829 SDL_bool 830 SDL_HasSSE2(void) 831 { 832 return CPU_FEATURE_AVAILABLE(CPU_HAS_SSE2); 833 } 834 835 SDL_bool 836 SDL_HasSSE3(void) 837 { 838 return CPU_FEATURE_AVAILABLE(CPU_HAS_SSE3); 839 } 840 841 SDL_bool 842 SDL_HasSSE41(void) 843 { 844 return CPU_FEATURE_AVAILABLE(CPU_HAS_SSE41); 845 } 846 847 SDL_bool 848 SDL_HasSSE42(void) 849 { 850 return CPU_FEATURE_AVAILABLE(CPU_HAS_SSE42); 851 } 852 853 SDL_bool 854 SDL_HasAVX(void) 855 { 856 return CPU_FEATURE_AVAILABLE(CPU_HAS_AVX); 857 } 858 859 SDL_bool 860 SDL_HasAVX2(void) 861 { 862 return CPU_FEATURE_AVAILABLE(CPU_HAS_AVX2); 863 } 864 865 SDL_bool 866 SDL_HasAVX512F(void) 867 { 868 return CPU_FEATURE_AVAILABLE(CPU_HAS_AVX512F); 869 } 870 871 SDL_bool 872 SDL_HasARMSIMD(void) 873 { 874 return CPU_FEATURE_AVAILABLE(CPU_HAS_ARM_SIMD); 875 } 876 877 SDL_bool 878 SDL_HasNEON(void) 879 { 880 return CPU_FEATURE_AVAILABLE(CPU_HAS_NEON); 881 } 882 883 static int SDL_SystemRAM = 0; 884 885 int 886 SDL_GetSystemRAM(void) 887 { 888 if (!SDL_SystemRAM) { 889 #ifndef SDL_CPUINFO_DISABLED 890 #if defined(HAVE_SYSCONF) && defined(_SC_PHYS_PAGES) && defined(_SC_PAGESIZE) 891 if (SDL_SystemRAM <= 0) { 892 SDL_SystemRAM = (int)((Sint64)sysconf(_SC_PHYS_PAGES) * sysconf(_SC_PAGESIZE) / (1024*1024)); 893 } 894 #endif 895 #ifdef HAVE_SYSCTLBYNAME 896 if (SDL_SystemRAM <= 0) { 897 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || defined(__NetBSD__) || defined(__DragonFly__) 898 #ifdef HW_REALMEM 899 int mib[2] = {CTL_HW, HW_REALMEM}; 900 #else 901 /* might only report up to 2 GiB */ 902 int mib[2] = {CTL_HW, HW_PHYSMEM}; 903 #endif /* HW_REALMEM */ 904 #else 905 int mib[2] = {CTL_HW, HW_MEMSIZE}; 906 #endif /* __FreeBSD__ || __FreeBSD_kernel__ */ 907 Uint64 memsize = 0; 908 size_t len = sizeof(memsize); 909 910 if (sysctl(mib, 2, &memsize, &len, NULL, 0) == 0) { 911 SDL_SystemRAM = (int)(memsize / (1024*1024)); 912 } 913 } 914 #endif 915 #ifdef __WIN32__ 916 if (SDL_SystemRAM <= 0) { 917 MEMORYSTATUSEX stat; 918 stat.dwLength = sizeof(stat); 919 if (GlobalMemoryStatusEx(&stat)) { 920 SDL_SystemRAM = (int)(stat.ullTotalPhys / (1024 * 1024)); 921 } 922 } 923 #endif 924 #ifdef __OS2__ 925 if (SDL_SystemRAM <= 0) { 926 Uint32 sysram = 0; 927 DosQuerySysInfo(QSV_TOTPHYSMEM, QSV_TOTPHYSMEM, &sysram, 4); 928 SDL_SystemRAM = (int) (sysram / 0x100000U); 929 } 930 #endif 931 #ifdef __RISCOS__ 932 if (SDL_SystemRAM <= 0) { 933 _kernel_swi_regs regs; 934 regs.r[0] = 0x108; 935 if (_kernel_swi(OS_Memory, ®s, ®s) == NULL) { 936 SDL_SystemRAM = (int)(regs.r[1] * regs.r[2] / (1024 * 1024)); 937 } 938 } 939 #endif 940 #endif 941 } 942 return SDL_SystemRAM; 943 } 944 945 946 size_t 947 SDL_SIMDGetAlignment(void) 948 { 949 if (SDL_SIMDAlignment == 0xFFFFFFFF) { 950 SDL_GetCPUFeatures(); /* make sure this has been calculated */ 951 } 952 SDL_assert(SDL_SIMDAlignment != 0); 953 return SDL_SIMDAlignment; 954 } 955 956 void * 957 SDL_SIMDAlloc(const size_t len) 958 { 959 const size_t alignment = SDL_SIMDGetAlignment(); 960 const size_t padding = alignment - (len % alignment); 961 const size_t padded = (padding != alignment) ? (len + padding) : len; 962 Uint8 *retval = NULL; 963 Uint8 *ptr = (Uint8 *) SDL_malloc(padded + alignment + sizeof (void *)); 964 if (ptr) { 965 /* store the actual malloc pointer right before our aligned pointer. */ 966 retval = ptr + sizeof (void *); 967 retval += alignment - (((size_t) retval) % alignment); 968 *(((void **) retval) - 1) = ptr; 969 } 970 return retval; 971 } 972 973 void * 974 SDL_SIMDRealloc(void *mem, const size_t len) 975 { 976 const size_t alignment = SDL_SIMDGetAlignment(); 977 const size_t padding = alignment - (len % alignment); 978 const size_t padded = (padding != alignment) ? (len + padding) : len; 979 Uint8 *retval = (Uint8*) mem; 980 void *oldmem = mem; 981 size_t memdiff = 0, ptrdiff; 982 Uint8 *ptr; 983 984 if (mem) { 985 void **realptr = (void **) mem; 986 realptr--; 987 mem = *(((void **) mem) - 1); 988 989 /* Check the delta between the real pointer and user pointer */ 990 memdiff = ((size_t) oldmem) - ((size_t) mem); 991 } 992 993 ptr = (Uint8 *) SDL_realloc(mem, padded + alignment + sizeof (void *)); 994 995 if (ptr == mem) { 996 return retval; /* Pointer didn't change, nothing to do */ 997 } 998 if (ptr == NULL) { 999 return NULL; /* Out of memory, bail! */ 1000 } 1001 1002 /* Store the actual malloc pointer right before our aligned pointer. */ 1003 retval = ptr + sizeof (void *); 1004 retval += alignment - (((size_t) retval) % alignment); 1005 1006 /* Make sure the delta is the same! */ 1007 if (mem) { 1008 ptrdiff = ((size_t) retval) - ((size_t) ptr); 1009 if (memdiff != ptrdiff) { /* Delta has changed, copy to new offset! */ 1010 oldmem = (void*) (((size_t) ptr) + memdiff); 1011 1012 /* Even though the data past the old `len` is undefined, this is the 1013 * only length value we have, and it guarantees that we copy all the 1014 * previous memory anyhow. 1015 */ 1016 SDL_memmove(retval, oldmem, len); 1017 } 1018 } 1019 1020 /* Actually store the malloc pointer, finally. */ 1021 *(((void **) retval) - 1) = ptr; 1022 return retval; 1023 } 1024 1025 void 1026 SDL_SIMDFree(void *ptr) 1027 { 1028 if (ptr) { 1029 void **realptr = (void **) ptr; 1030 realptr--; 1031 SDL_free(*(((void **) ptr) - 1)); 1032 } 1033 } 1034 1035 1036 #ifdef TEST_MAIN 1037 1038 #include <stdio.h> 1039 1040 int 1041 main() 1042 { 1043 printf("CPU count: %d\n", SDL_GetCPUCount()); 1044 printf("CPU type: %s\n", SDL_GetCPUType()); 1045 printf("CPU name: %s\n", SDL_GetCPUName()); 1046 printf("CacheLine size: %d\n", SDL_GetCPUCacheLineSize()); 1047 printf("RDTSC: %d\n", SDL_HasRDTSC()); 1048 printf("Altivec: %d\n", SDL_HasAltiVec()); 1049 printf("MMX: %d\n", SDL_HasMMX()); 1050 printf("3DNow: %d\n", SDL_Has3DNow()); 1051 printf("SSE: %d\n", SDL_HasSSE()); 1052 printf("SSE2: %d\n", SDL_HasSSE2()); 1053 printf("SSE3: %d\n", SDL_HasSSE3()); 1054 printf("SSE4.1: %d\n", SDL_HasSSE41()); 1055 printf("SSE4.2: %d\n", SDL_HasSSE42()); 1056 printf("AVX: %d\n", SDL_HasAVX()); 1057 printf("AVX2: %d\n", SDL_HasAVX2()); 1058 printf("AVX-512F: %d\n", SDL_HasAVX512F()); 1059 printf("ARM SIMD: %d\n", SDL_HasARMSIMD()); 1060 printf("NEON: %d\n", SDL_HasNEON()); 1061 printf("RAM: %d MB\n", SDL_GetSystemRAM()); 1062 return 0; 1063 } 1064 1065 #endif /* TEST_MAIN */ 1066 1067 /* vi: set ts=4 sw=4 expandtab: */