sdl

FORK: Simple Directmedia Layer
git clone https://git.neptards.moe/neptards/sdl.git
Log | Files | Refs

SDL_cpuinfo.c (33209B)


      1 /*
      2   Simple DirectMedia Layer
      3   Copyright (C) 1997-2020 Sam Lantinga <slouken@libsdl.org>
      4 
      5   This software is provided 'as-is', without any express or implied
      6   warranty.  In no event will the authors be held liable for any damages
      7   arising from the use of this software.
      8 
      9   Permission is granted to anyone to use this software for any purpose,
     10   including commercial applications, and to alter it and redistribute it
     11   freely, subject to the following restrictions:
     12 
     13   1. The origin of this software must not be misrepresented; you must not
     14      claim that you wrote the original software. If you use this software
     15      in a product, an acknowledgment in the product documentation would be
     16      appreciated but is not required.
     17   2. Altered source versions must be plainly marked as such, and must not be
     18      misrepresented as being the original software.
     19   3. This notice may not be removed or altered from any source distribution.
     20 */
     21 #ifdef TEST_MAIN
     22 #include "SDL_config.h"
     23 #else
     24 #include "../SDL_internal.h"
     25 #endif
     26 
     27 #if defined(__WIN32__) || defined(__WINRT__)
     28 #include "../core/windows/SDL_windows.h"
     29 #endif
     30 #if defined(__OS2__)
     31 #undef HAVE_SYSCTLBYNAME
     32 #define INCL_DOS
     33 #include <os2.h>
     34 #ifndef QSV_NUMPROCESSORS
     35 #define QSV_NUMPROCESSORS 26
     36 #endif
     37 #endif
     38 
     39 /* CPU feature detection for SDL */
     40 
     41 #include "SDL_cpuinfo.h"
     42 #include "SDL_assert.h"
     43 
     44 #ifdef HAVE_SYSCONF
     45 #include <unistd.h>
     46 #endif
     47 #ifdef HAVE_SYSCTLBYNAME
     48 #include <sys/types.h>
     49 #include <sys/sysctl.h>
     50 #endif
     51 #if defined(__MACOSX__) && (defined(__ppc__) || defined(__ppc64__))
     52 #include <sys/sysctl.h>         /* For AltiVec check */
     53 #elif (defined(__OpenBSD__) || defined(__FreeBSD__)) && defined(__powerpc__)
     54 #include <sys/param.h>
     55 #include <sys/sysctl.h> /* For AltiVec check */
     56 #include <machine/cpu.h>
     57 #elif SDL_ALTIVEC_BLITTERS && HAVE_SETJMP
     58 #include <signal.h>
     59 #include <setjmp.h>
     60 #endif
     61 
     62 #if defined(__QNXNTO__)
     63 #include <sys/syspage.h>
     64 #endif
     65 
     66 #if (defined(__LINUX__) || defined(__ANDROID__)) && defined(__ARM_ARCH)
     67 /*#include <asm/hwcap.h>*/
     68 #ifndef AT_HWCAP
     69 #define AT_HWCAP 16
     70 #endif
     71 #ifndef AT_PLATFORM
     72 #define AT_PLATFORM 15
     73 #endif
     74 /* Prevent compilation error when including elf.h would also try to define AT_* as an enum */
     75 #ifndef AT_NULL
     76 #define AT_NULL 0
     77 #endif
     78 #ifndef HWCAP_NEON
     79 #define HWCAP_NEON (1 << 12)
     80 #endif
     81 #if defined HAVE_GETAUXVAL
     82 #include <sys/auxv.h>
     83 #else
     84 #include <fcntl.h>
     85 #endif
     86 #endif
     87 
     88 #if defined(__ANDROID__) && defined(__ARM_ARCH) && !defined(HAVE_GETAUXVAL)
     89 #if __ARM_ARCH < 8
     90 #include <cpu-features.h>
     91 #endif
     92 #endif
     93 
     94 #if defined(HAVE_ELF_AUX_INFO)
     95 #include <sys/auxv.h>
     96 #endif
     97 
     98 #ifdef __RISCOS__
     99 #include <kernel.h>
    100 #include <swis.h>
    101 #endif
    102 
    103 #define CPU_HAS_RDTSC   (1 << 0)
    104 #define CPU_HAS_ALTIVEC (1 << 1)
    105 #define CPU_HAS_MMX     (1 << 2)
    106 #define CPU_HAS_3DNOW   (1 << 3)
    107 #define CPU_HAS_SSE     (1 << 4)
    108 #define CPU_HAS_SSE2    (1 << 5)
    109 #define CPU_HAS_SSE3    (1 << 6)
    110 #define CPU_HAS_SSE41   (1 << 7)
    111 #define CPU_HAS_SSE42   (1 << 8)
    112 #define CPU_HAS_AVX     (1 << 9)
    113 #define CPU_HAS_AVX2    (1 << 10)
    114 #define CPU_HAS_NEON    (1 << 11)
    115 #define CPU_HAS_AVX512F (1 << 12)
    116 #define CPU_HAS_ARM_SIMD (1 << 13)
    117 
    118 #if SDL_ALTIVEC_BLITTERS && HAVE_SETJMP && !__MACOSX__ && !__OpenBSD__
    119 /* This is the brute force way of detecting instruction sets...
    120    the idea is borrowed from the libmpeg2 library - thanks!
    121  */
    122 static jmp_buf jmpbuf;
    123 static void
    124 illegal_instruction(int sig)
    125 {
    126     longjmp(jmpbuf, 1);
    127 }
    128 #endif /* HAVE_SETJMP */
    129 
    130 static int
    131 CPU_haveCPUID(void)
    132 {
    133     int has_CPUID = 0;
    134 
    135 /* *INDENT-OFF* */
    136 #ifndef SDL_CPUINFO_DISABLED
    137 #if (defined(__GNUC__) || defined(__clang__)) && defined(i386)
    138     __asm__ (
    139 "        pushfl                      # Get original EFLAGS             \n"
    140 "        popl    %%eax                                                 \n"
    141 "        movl    %%eax,%%ecx                                           \n"
    142 "        xorl    $0x200000,%%eax     # Flip ID bit in EFLAGS           \n"
    143 "        pushl   %%eax               # Save new EFLAGS value on stack  \n"
    144 "        popfl                       # Replace current EFLAGS value    \n"
    145 "        pushfl                      # Get new EFLAGS                  \n"
    146 "        popl    %%eax               # Store new EFLAGS in EAX         \n"
    147 "        xorl    %%ecx,%%eax         # Can not toggle ID bit,          \n"
    148 "        jz      1f                  # Processor=80486                 \n"
    149 "        movl    $1,%0               # We have CPUID support           \n"
    150 "1:                                                                    \n"
    151     : "=m" (has_CPUID)
    152     :
    153     : "%eax", "%ecx"
    154     );
    155 #elif (defined(__GNUC__) || defined(__clang__)) && defined(__x86_64__)
    156 /* Technically, if this is being compiled under __x86_64__ then it has
    157    CPUid by definition.  But it's nice to be able to prove it.  :)      */
    158     __asm__ (
    159 "        pushfq                      # Get original EFLAGS             \n"
    160 "        popq    %%rax                                                 \n"
    161 "        movq    %%rax,%%rcx                                           \n"
    162 "        xorl    $0x200000,%%eax     # Flip ID bit in EFLAGS           \n"
    163 "        pushq   %%rax               # Save new EFLAGS value on stack  \n"
    164 "        popfq                       # Replace current EFLAGS value    \n"
    165 "        pushfq                      # Get new EFLAGS                  \n"
    166 "        popq    %%rax               # Store new EFLAGS in EAX         \n"
    167 "        xorl    %%ecx,%%eax         # Can not toggle ID bit,          \n"
    168 "        jz      1f                  # Processor=80486                 \n"
    169 "        movl    $1,%0               # We have CPUID support           \n"
    170 "1:                                                                    \n"
    171     : "=m" (has_CPUID)
    172     :
    173     : "%rax", "%rcx"
    174     );
    175 #elif (defined(_MSC_VER) && defined(_M_IX86)) || defined(__WATCOMC__)
    176     __asm {
    177         pushfd                      ; Get original EFLAGS
    178         pop     eax
    179         mov     ecx, eax
    180         xor     eax, 200000h        ; Flip ID bit in EFLAGS
    181         push    eax                 ; Save new EFLAGS value on stack
    182         popfd                       ; Replace current EFLAGS value
    183         pushfd                      ; Get new EFLAGS
    184         pop     eax                 ; Store new EFLAGS in EAX
    185         xor     eax, ecx            ; Can not toggle ID bit,
    186         jz      done                ; Processor=80486
    187         mov     has_CPUID,1         ; We have CPUID support
    188 done:
    189     }
    190 #elif defined(_MSC_VER) && defined(_M_X64)
    191     has_CPUID = 1;
    192 #elif defined(__sun) && defined(__i386)
    193     __asm (
    194 "       pushfl                 \n"
    195 "       popl    %eax           \n"
    196 "       movl    %eax,%ecx      \n"
    197 "       xorl    $0x200000,%eax \n"
    198 "       pushl   %eax           \n"
    199 "       popfl                  \n"
    200 "       pushfl                 \n"
    201 "       popl    %eax           \n"
    202 "       xorl    %ecx,%eax      \n"
    203 "       jz      1f             \n"
    204 "       movl    $1,-8(%ebp)    \n"
    205 "1:                            \n"
    206     );
    207 #elif defined(__sun) && defined(__amd64)
    208     __asm (
    209 "       pushfq                 \n"
    210 "       popq    %rax           \n"
    211 "       movq    %rax,%rcx      \n"
    212 "       xorl    $0x200000,%eax \n"
    213 "       pushq   %rax           \n"
    214 "       popfq                  \n"
    215 "       pushfq                 \n"
    216 "       popq    %rax           \n"
    217 "       xorl    %ecx,%eax      \n"
    218 "       jz      1f             \n"
    219 "       movl    $1,-8(%rbp)    \n"
    220 "1:                            \n"
    221     );
    222 #endif
    223 #endif
    224 /* *INDENT-ON* */
    225     return has_CPUID;
    226 }
    227 
    228 #if (defined(__GNUC__) || defined(__clang__)) && defined(i386)
    229 #define cpuid(func, a, b, c, d) \
    230     __asm__ __volatile__ ( \
    231 "        pushl %%ebx        \n" \
    232 "        xorl %%ecx,%%ecx   \n" \
    233 "        cpuid              \n" \
    234 "        movl %%ebx, %%esi  \n" \
    235 "        popl %%ebx         \n" : \
    236             "=a" (a), "=S" (b), "=c" (c), "=d" (d) : "a" (func))
    237 #elif (defined(__GNUC__) || defined(__clang__)) && defined(__x86_64__)
    238 #define cpuid(func, a, b, c, d) \
    239     __asm__ __volatile__ ( \
    240 "        pushq %%rbx        \n" \
    241 "        xorq %%rcx,%%rcx   \n" \
    242 "        cpuid              \n" \
    243 "        movq %%rbx, %%rsi  \n" \
    244 "        popq %%rbx         \n" : \
    245             "=a" (a), "=S" (b), "=c" (c), "=d" (d) : "a" (func))
    246 #elif (defined(_MSC_VER) && defined(_M_IX86)) || defined(__WATCOMC__)
    247 #define cpuid(func, a, b, c, d) \
    248     __asm { \
    249         __asm mov eax, func \
    250         __asm xor ecx, ecx \
    251         __asm cpuid \
    252         __asm mov a, eax \
    253         __asm mov b, ebx \
    254         __asm mov c, ecx \
    255         __asm mov d, edx \
    256 }
    257 #elif defined(_MSC_VER) && defined(_M_X64)
    258 #define cpuid(func, a, b, c, d) \
    259 { \
    260     int CPUInfo[4]; \
    261     __cpuid(CPUInfo, func); \
    262     a = CPUInfo[0]; \
    263     b = CPUInfo[1]; \
    264     c = CPUInfo[2]; \
    265     d = CPUInfo[3]; \
    266 }
    267 #else
    268 #define cpuid(func, a, b, c, d) \
    269     do { a = b = c = d = 0; (void) a; (void) b; (void) c; (void) d; } while (0)
    270 #endif
    271 
    272 static int CPU_CPUIDFeatures[4];
    273 static int CPU_CPUIDMaxFunction = 0;
    274 static SDL_bool CPU_OSSavesYMM = SDL_FALSE;
    275 static SDL_bool CPU_OSSavesZMM = SDL_FALSE;
    276 
    277 static void
    278 CPU_calcCPUIDFeatures(void)
    279 {
    280     static SDL_bool checked = SDL_FALSE;
    281     if (!checked) {
    282         checked = SDL_TRUE;
    283         if (CPU_haveCPUID()) {
    284             int a, b, c, d;
    285             cpuid(0, a, b, c, d);
    286             CPU_CPUIDMaxFunction = a;
    287             if (CPU_CPUIDMaxFunction >= 1) {
    288                 cpuid(1, a, b, c, d);
    289                 CPU_CPUIDFeatures[0] = a;
    290                 CPU_CPUIDFeatures[1] = b;
    291                 CPU_CPUIDFeatures[2] = c;
    292                 CPU_CPUIDFeatures[3] = d;
    293 
    294                 /* Check to make sure we can call xgetbv */
    295                 if (c & 0x08000000) {
    296                     /* Call xgetbv to see if YMM (etc) register state is saved */
    297 #if (defined(__GNUC__) || defined(__clang__)) && (defined(i386) || defined(__x86_64__))
    298                     __asm__(".byte 0x0f, 0x01, 0xd0" : "=a" (a) : "c" (0) : "%edx");
    299 #elif defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64)) && (_MSC_FULL_VER >= 160040219) /* VS2010 SP1 */
    300                     a = (int)_xgetbv(0);
    301 #elif (defined(_MSC_VER) && defined(_M_IX86)) || defined(__WATCOMC__)
    302                     __asm
    303                     {
    304                         xor ecx, ecx
    305                         _asm _emit 0x0f _asm _emit 0x01 _asm _emit 0xd0
    306                         mov a, eax
    307                     }
    308 #endif
    309                     CPU_OSSavesYMM = ((a & 6) == 6) ? SDL_TRUE : SDL_FALSE;
    310                     CPU_OSSavesZMM = (CPU_OSSavesYMM && ((a & 0xe0) == 0xe0)) ? SDL_TRUE : SDL_FALSE;
    311                 }
    312             }
    313         }
    314     }
    315 }
    316 
    317 static int
    318 CPU_haveAltiVec(void)
    319 {
    320     volatile int altivec = 0;
    321 #ifndef SDL_CPUINFO_DISABLED
    322 #if (defined(__MACOSX__) && (defined(__ppc__) || defined(__ppc64__))) || (defined(__OpenBSD__) && defined(__powerpc__)) || (defined(__FreeBSD__) && defined(__powerpc__))
    323 #ifdef __OpenBSD__
    324     int selectors[2] = { CTL_MACHDEP, CPU_ALTIVEC };
    325 #elif defined(__FreeBSD__)
    326     int selectors[2] = { CTL_HW, PPC_FEATURE_HAS_ALTIVEC };
    327 #else
    328     int selectors[2] = { CTL_HW, HW_VECTORUNIT };
    329 #endif
    330     int hasVectorUnit = 0;
    331     size_t length = sizeof(hasVectorUnit);
    332     int error = sysctl(selectors, 2, &hasVectorUnit, &length, NULL, 0);
    333     if (0 == error)
    334         altivec = (hasVectorUnit != 0);
    335 #elif SDL_ALTIVEC_BLITTERS && HAVE_SETJMP
    336     void (*handler) (int sig);
    337     handler = signal(SIGILL, illegal_instruction);
    338     if (setjmp(jmpbuf) == 0) {
    339         asm volatile ("mtspr 256, %0\n\t" "vand %%v0, %%v0, %%v0"::"r" (-1));
    340         altivec = 1;
    341     }
    342     signal(SIGILL, handler);
    343 #endif
    344 #endif
    345     return altivec;
    346 }
    347 
    348 #if defined(__ARM_ARCH) && (__ARM_ARCH >= 6)
    349 static int
    350 CPU_haveARMSIMD(void)
    351 {
    352 	return 1;
    353 }
    354 
    355 #elif !defined(__arm__)
    356 static int
    357 CPU_haveARMSIMD(void)
    358 {
    359 	return 0;
    360 }
    361 
    362 #elif defined(__LINUX__)
    363 #include <unistd.h>
    364 #include <sys/types.h>
    365 #include <sys/stat.h>
    366 #include <fcntl.h>
    367 #include <elf.h>
    368 
    369 static int
    370 CPU_haveARMSIMD(void)
    371 {
    372     int arm_simd = 0;
    373     int fd;
    374 
    375     fd = open("/proc/self/auxv", O_RDONLY);
    376     if (fd >= 0)
    377     {
    378         Elf32_auxv_t aux;
    379         while (read(fd, &aux, sizeof aux) == sizeof aux)
    380         {
    381             if (aux.a_type == AT_PLATFORM)
    382             {
    383                 const char *plat = (const char *) aux.a_un.a_val;
    384                 if (plat) {
    385                     arm_simd = strncmp(plat, "v6l", 3) == 0 ||
    386                                strncmp(plat, "v7l", 3) == 0;
    387                 }
    388             }
    389         }
    390         close(fd);
    391     }
    392     return arm_simd;
    393 }
    394 
    395 #elif defined(__RISCOS__)
    396 
    397 static int
    398 CPU_haveARMSIMD(void)
    399 {
    400 	_kernel_swi_regs regs;
    401 	regs.r[0] = 0;
    402 	if (_kernel_swi(OS_PlatformFeatures, &regs, &regs) != NULL)
    403 		return 0;
    404 
    405 	if (!(regs.r[0] & (1<<31)))
    406 		return 0;
    407 
    408 	regs.r[0] = 34;
    409 	regs.r[1] = 29;
    410 	if (_kernel_swi(OS_PlatformFeatures, &regs, &regs) != NULL)
    411 		return 0;
    412 
    413 	return regs.r[0];
    414 }
    415 
    416 #else
    417 static int
    418 CPU_haveARMSIMD(void)
    419 {
    420 #warning SDL_HasARMSIMD is not implemented for this ARM platform. Write me.
    421     return 0;
    422 }
    423 #endif
    424 
    425 #if defined(__LINUX__) && defined(__ARM_ARCH) && !defined(HAVE_GETAUXVAL)
    426 static int
    427 readProcAuxvForNeon(void)
    428 {
    429     int neon = 0;
    430     int kv[2];
    431     const int fd = open("/proc/self/auxv", O_RDONLY);
    432     if (fd != -1) {
    433         while (read(fd, kv, sizeof (kv)) == sizeof (kv)) {
    434             if (kv[0] == AT_HWCAP) {
    435                 neon = ((kv[1] & HWCAP_NEON) == HWCAP_NEON);
    436                 break;
    437             }
    438         }
    439         close(fd);
    440     }
    441     return neon;
    442 }
    443 #endif
    444 
    445 static int
    446 CPU_haveNEON(void)
    447 {
    448 /* The way you detect NEON is a privileged instruction on ARM, so you have
    449    query the OS kernel in a platform-specific way. :/ */
    450 #if defined(SDL_CPUINFO_DISABLED)
    451    return 0; /* disabled */
    452 #elif (defined(__WINDOWS__) || defined(__WINRT__)) && (defined(_M_ARM) || defined(_M_ARM64))
    453 /* Visual Studio, for ARM, doesn't define __ARM_ARCH. Handle this first. */
    454 /* Seems to have been removed */
    455 #  if !defined(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE)
    456 #    define PF_ARM_NEON_INSTRUCTIONS_AVAILABLE 19
    457 #  endif
    458 /* All WinRT ARM devices are required to support NEON, but just in case. */
    459     return IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE) != 0;
    460 #elif defined(__ARM_ARCH) && (__ARM_ARCH >= 8)
    461     return 1;  /* ARMv8 always has non-optional NEON support. */
    462 #elif defined(__APPLE__) && defined(__ARM_ARCH) && (__ARM_ARCH >= 7)
    463     /* (note that sysctlbyname("hw.optional.neon") doesn't work!) */
    464     return 1;  /* all Apple ARMv7 chips and later have NEON. */
    465 #elif defined(__APPLE__)
    466     return 0;  /* assume anything else from Apple doesn't have NEON. */
    467 #elif defined(__OpenBSD__)
    468     return 1;  /* OpenBSD only supports ARMv7 CPUs that have NEON. */
    469 #elif defined(HAVE_ELF_AUX_INFO) && defined(HWCAP_NEON)
    470     unsigned long hasneon = 0;
    471     if (elf_aux_info(AT_HWCAP, (void *)&hasneon, (int)sizeof(hasneon)) != 0)
    472         return 0;
    473     return ((hasneon & HWCAP_NEON) == HWCAP_NEON);
    474 #elif !defined(__arm__)
    475     return 0;  /* not an ARM CPU at all. */
    476 #elif defined(__QNXNTO__)
    477     return SYSPAGE_ENTRY(cpuinfo)->flags & ARM_CPU_FLAG_NEON;
    478 #elif (defined(__LINUX__) || defined(__ANDROID__)) && defined(HAVE_GETAUXVAL)
    479     return ((getauxval(AT_HWCAP) & HWCAP_NEON) == HWCAP_NEON);
    480 #elif defined(__LINUX__)
    481     return readProcAuxvForNeon();
    482 #elif defined(__ANDROID__)
    483     /* Use NDK cpufeatures to read either /proc/self/auxv or /proc/cpuinfo */
    484     {
    485         AndroidCpuFamily cpu_family = android_getCpuFamily();
    486         if (cpu_family == ANDROID_CPU_FAMILY_ARM) {
    487             uint64_t cpu_features = android_getCpuFeatures();
    488             if ((cpu_features & ANDROID_CPU_ARM_FEATURE_NEON) != 0) {
    489                 return 1;
    490             }
    491         }
    492         return 0;
    493     }
    494 #elif defined(__RISCOS__)
    495     /* Use the VFPSupport_Features SWI to access the MVFR registers */
    496     {
    497         _kernel_swi_regs regs;
    498         regs.r[0] = 0;
    499         if (_kernel_swi(VFPSupport_Features, &regs, &regs) == NULL) {
    500             if ((regs.r[2] & 0xFFF000) == 0x111000) {
    501                 return 1;
    502             }
    503         }
    504         return 0;
    505     }
    506 #else
    507 #warning SDL_HasNEON is not implemented for this ARM platform. Write me.
    508     return 0;
    509 #endif
    510 }
    511 
    512 static int
    513 CPU_have3DNow(void)
    514 {
    515     if (CPU_CPUIDMaxFunction > 0) {  /* that is, do we have CPUID at all? */
    516         int a, b, c, d;
    517         cpuid(0x80000000, a, b, c, d);
    518         if (a >= 0x80000001) {
    519             cpuid(0x80000001, a, b, c, d);
    520             return (d & 0x80000000);
    521         }
    522     }
    523     return 0;
    524 }
    525 
    526 #define CPU_haveRDTSC() (CPU_CPUIDFeatures[3] & 0x00000010)
    527 #define CPU_haveMMX() (CPU_CPUIDFeatures[3] & 0x00800000)
    528 #define CPU_haveSSE() (CPU_CPUIDFeatures[3] & 0x02000000)
    529 #define CPU_haveSSE2() (CPU_CPUIDFeatures[3] & 0x04000000)
    530 #define CPU_haveSSE3() (CPU_CPUIDFeatures[2] & 0x00000001)
    531 #define CPU_haveSSE41() (CPU_CPUIDFeatures[2] & 0x00080000)
    532 #define CPU_haveSSE42() (CPU_CPUIDFeatures[2] & 0x00100000)
    533 #define CPU_haveAVX() (CPU_OSSavesYMM && (CPU_CPUIDFeatures[2] & 0x10000000))
    534 
    535 static int
    536 CPU_haveAVX2(void)
    537 {
    538     if (CPU_OSSavesYMM && (CPU_CPUIDMaxFunction >= 7)) {
    539         int a, b, c, d;
    540         (void) a; (void) b; (void) c; (void) d;  /* compiler warnings... */
    541         cpuid(7, a, b, c, d);
    542         return (b & 0x00000020);
    543     }
    544     return 0;
    545 }
    546 
    547 static int
    548 CPU_haveAVX512F(void)
    549 {
    550     if (CPU_OSSavesZMM && (CPU_CPUIDMaxFunction >= 7)) {
    551         int a, b, c, d;
    552         (void) a; (void) b; (void) c; (void) d;  /* compiler warnings... */
    553         cpuid(7, a, b, c, d);
    554         return (b & 0x00010000);
    555     }
    556     return 0;
    557 }
    558 
    559 static int SDL_CPUCount = 0;
    560 
    561 int
    562 SDL_GetCPUCount(void)
    563 {
    564     if (!SDL_CPUCount) {
    565 #ifndef SDL_CPUINFO_DISABLED
    566 #if defined(HAVE_SYSCONF) && defined(_SC_NPROCESSORS_ONLN)
    567         if (SDL_CPUCount <= 0) {
    568             SDL_CPUCount = (int)sysconf(_SC_NPROCESSORS_ONLN);
    569         }
    570 #endif
    571 #ifdef HAVE_SYSCTLBYNAME
    572         if (SDL_CPUCount <= 0) {
    573             size_t size = sizeof(SDL_CPUCount);
    574             sysctlbyname("hw.ncpu", &SDL_CPUCount, &size, NULL, 0);
    575         }
    576 #endif
    577 #ifdef __WIN32__
    578         if (SDL_CPUCount <= 0) {
    579             SYSTEM_INFO info;
    580             GetSystemInfo(&info);
    581             SDL_CPUCount = info.dwNumberOfProcessors;
    582         }
    583 #endif
    584 #ifdef __OS2__
    585         if (SDL_CPUCount <= 0) {
    586             DosQuerySysInfo(QSV_NUMPROCESSORS, QSV_NUMPROCESSORS,
    587                             &SDL_CPUCount, sizeof(SDL_CPUCount) );
    588         }
    589 #endif
    590 #endif
    591         /* There has to be at least 1, right? :) */
    592         if (SDL_CPUCount <= 0) {
    593             SDL_CPUCount = 1;
    594         }
    595     }
    596     return SDL_CPUCount;
    597 }
    598 
    599 /* Oh, such a sweet sweet trick, just not very useful. :) */
    600 static const char *
    601 SDL_GetCPUType(void)
    602 {
    603     static char SDL_CPUType[13];
    604 
    605     if (!SDL_CPUType[0]) {
    606         int i = 0;
    607 
    608         CPU_calcCPUIDFeatures();
    609         if (CPU_CPUIDMaxFunction > 0) {  /* do we have CPUID at all? */
    610             int a, b, c, d;
    611             cpuid(0x00000000, a, b, c, d);
    612             (void) a;
    613             SDL_CPUType[i++] = (char)(b & 0xff); b >>= 8;
    614             SDL_CPUType[i++] = (char)(b & 0xff); b >>= 8;
    615             SDL_CPUType[i++] = (char)(b & 0xff); b >>= 8;
    616             SDL_CPUType[i++] = (char)(b & 0xff);
    617 
    618             SDL_CPUType[i++] = (char)(d & 0xff); d >>= 8;
    619             SDL_CPUType[i++] = (char)(d & 0xff); d >>= 8;
    620             SDL_CPUType[i++] = (char)(d & 0xff); d >>= 8;
    621             SDL_CPUType[i++] = (char)(d & 0xff);
    622 
    623             SDL_CPUType[i++] = (char)(c & 0xff); c >>= 8;
    624             SDL_CPUType[i++] = (char)(c & 0xff); c >>= 8;
    625             SDL_CPUType[i++] = (char)(c & 0xff); c >>= 8;
    626             SDL_CPUType[i++] = (char)(c & 0xff);
    627         }
    628         if (!SDL_CPUType[0]) {
    629             SDL_strlcpy(SDL_CPUType, "Unknown", sizeof(SDL_CPUType));
    630         }
    631     }
    632     return SDL_CPUType;
    633 }
    634 
    635 
    636 #ifdef TEST_MAIN  /* !!! FIXME: only used for test at the moment. */
    637 static const char *
    638 SDL_GetCPUName(void)
    639 {
    640     static char SDL_CPUName[48];
    641 
    642     if (!SDL_CPUName[0]) {
    643         int i = 0;
    644         int a, b, c, d;
    645 
    646         CPU_calcCPUIDFeatures();
    647         if (CPU_CPUIDMaxFunction > 0) {  /* do we have CPUID at all? */
    648             cpuid(0x80000000, a, b, c, d);
    649             if (a >= 0x80000004) {
    650                 cpuid(0x80000002, a, b, c, d);
    651                 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
    652                 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
    653                 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
    654                 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
    655                 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
    656                 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
    657                 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
    658                 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
    659                 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
    660                 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
    661                 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
    662                 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
    663                 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
    664                 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
    665                 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
    666                 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
    667                 cpuid(0x80000003, a, b, c, d);
    668                 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
    669                 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
    670                 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
    671                 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
    672                 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
    673                 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
    674                 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
    675                 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
    676                 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
    677                 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
    678                 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
    679                 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
    680                 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
    681                 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
    682                 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
    683                 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
    684                 cpuid(0x80000004, a, b, c, d);
    685                 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
    686                 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
    687                 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
    688                 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
    689                 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
    690                 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
    691                 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
    692                 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
    693                 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
    694                 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
    695                 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
    696                 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
    697                 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
    698                 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
    699                 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
    700                 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
    701             }
    702         }
    703         if (!SDL_CPUName[0]) {
    704             SDL_strlcpy(SDL_CPUName, "Unknown", sizeof(SDL_CPUName));
    705         }
    706     }
    707     return SDL_CPUName;
    708 }
    709 #endif
    710 
    711 int
    712 SDL_GetCPUCacheLineSize(void)
    713 {
    714     const char *cpuType = SDL_GetCPUType();
    715     int a, b, c, d;
    716     (void) a; (void) b; (void) c; (void) d;
    717    if (SDL_strcmp(cpuType, "GenuineIntel") == 0 || SDL_strcmp(cpuType, "CentaurHauls") == 0 || SDL_strcmp(cpuType, "  Shanghai  ") == 0) {
    718         cpuid(0x00000001, a, b, c, d);
    719         return (((b >> 8) & 0xff) * 8);
    720     } else if (SDL_strcmp(cpuType, "AuthenticAMD") == 0 || SDL_strcmp(cpuType, "HygonGenuine") == 0) {
    721         cpuid(0x80000005, a, b, c, d);
    722         return (c & 0xff);
    723     } else {
    724         /* Just make a guess here... */
    725         return SDL_CACHELINE_SIZE;
    726     }
    727 }
    728 
    729 static Uint32 SDL_CPUFeatures = 0xFFFFFFFF;
    730 static Uint32 SDL_SIMDAlignment = 0xFFFFFFFF;
    731 
    732 static Uint32
    733 SDL_GetCPUFeatures(void)
    734 {
    735     if (SDL_CPUFeatures == 0xFFFFFFFF) {
    736         CPU_calcCPUIDFeatures();
    737         SDL_CPUFeatures = 0;
    738         SDL_SIMDAlignment = sizeof(void *);  /* a good safe base value */
    739         if (CPU_haveRDTSC()) {
    740             SDL_CPUFeatures |= CPU_HAS_RDTSC;
    741         }
    742         if (CPU_haveAltiVec()) {
    743             SDL_CPUFeatures |= CPU_HAS_ALTIVEC;
    744             SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
    745         }
    746         if (CPU_haveMMX()) {
    747             SDL_CPUFeatures |= CPU_HAS_MMX;
    748             SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 8);
    749         }
    750         if (CPU_have3DNow()) {
    751             SDL_CPUFeatures |= CPU_HAS_3DNOW;
    752             SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 8);
    753         }
    754         if (CPU_haveSSE()) {
    755             SDL_CPUFeatures |= CPU_HAS_SSE;
    756             SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
    757         }
    758         if (CPU_haveSSE2()) {
    759             SDL_CPUFeatures |= CPU_HAS_SSE2;
    760             SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
    761         }
    762         if (CPU_haveSSE3()) {
    763             SDL_CPUFeatures |= CPU_HAS_SSE3;
    764             SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
    765         }
    766         if (CPU_haveSSE41()) {
    767             SDL_CPUFeatures |= CPU_HAS_SSE41;
    768             SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
    769         }
    770         if (CPU_haveSSE42()) {
    771             SDL_CPUFeatures |= CPU_HAS_SSE42;
    772             SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
    773         }
    774         if (CPU_haveAVX()) {
    775             SDL_CPUFeatures |= CPU_HAS_AVX;
    776             SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 32);
    777         }
    778         if (CPU_haveAVX2()) {
    779             SDL_CPUFeatures |= CPU_HAS_AVX2;
    780             SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 32);
    781         }
    782         if (CPU_haveAVX512F()) {
    783             SDL_CPUFeatures |= CPU_HAS_AVX512F;
    784             SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 64);
    785         }
    786         if (CPU_haveARMSIMD()) {
    787             SDL_CPUFeatures |= CPU_HAS_ARM_SIMD;
    788             SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
    789         }
    790         if (CPU_haveNEON()) {
    791             SDL_CPUFeatures |= CPU_HAS_NEON;
    792             SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
    793         }
    794     }
    795     return SDL_CPUFeatures;
    796 }
    797 
    798 #define CPU_FEATURE_AVAILABLE(f) ((SDL_GetCPUFeatures() & f) ? SDL_TRUE : SDL_FALSE)
    799 
    800 SDL_bool SDL_HasRDTSC(void)
    801 {
    802     return CPU_FEATURE_AVAILABLE(CPU_HAS_RDTSC);
    803 }
    804 
    805 SDL_bool
    806 SDL_HasAltiVec(void)
    807 {
    808     return CPU_FEATURE_AVAILABLE(CPU_HAS_ALTIVEC);
    809 }
    810 
    811 SDL_bool
    812 SDL_HasMMX(void)
    813 {
    814     return CPU_FEATURE_AVAILABLE(CPU_HAS_MMX);
    815 }
    816 
    817 SDL_bool
    818 SDL_Has3DNow(void)
    819 {
    820     return CPU_FEATURE_AVAILABLE(CPU_HAS_3DNOW);
    821 }
    822 
    823 SDL_bool
    824 SDL_HasSSE(void)
    825 {
    826     return CPU_FEATURE_AVAILABLE(CPU_HAS_SSE);
    827 }
    828 
    829 SDL_bool
    830 SDL_HasSSE2(void)
    831 {
    832     return CPU_FEATURE_AVAILABLE(CPU_HAS_SSE2);
    833 }
    834 
    835 SDL_bool
    836 SDL_HasSSE3(void)
    837 {
    838     return CPU_FEATURE_AVAILABLE(CPU_HAS_SSE3);
    839 }
    840 
    841 SDL_bool
    842 SDL_HasSSE41(void)
    843 {
    844     return CPU_FEATURE_AVAILABLE(CPU_HAS_SSE41);
    845 }
    846 
    847 SDL_bool
    848 SDL_HasSSE42(void)
    849 {
    850     return CPU_FEATURE_AVAILABLE(CPU_HAS_SSE42);
    851 }
    852 
    853 SDL_bool
    854 SDL_HasAVX(void)
    855 {
    856     return CPU_FEATURE_AVAILABLE(CPU_HAS_AVX);
    857 }
    858 
    859 SDL_bool
    860 SDL_HasAVX2(void)
    861 {
    862     return CPU_FEATURE_AVAILABLE(CPU_HAS_AVX2);
    863 }
    864 
    865 SDL_bool
    866 SDL_HasAVX512F(void)
    867 {
    868     return CPU_FEATURE_AVAILABLE(CPU_HAS_AVX512F);
    869 }
    870 
    871 SDL_bool
    872 SDL_HasARMSIMD(void)
    873 {
    874     return CPU_FEATURE_AVAILABLE(CPU_HAS_ARM_SIMD);
    875 }
    876 
    877 SDL_bool
    878 SDL_HasNEON(void)
    879 {
    880     return CPU_FEATURE_AVAILABLE(CPU_HAS_NEON);
    881 }
    882 
    883 static int SDL_SystemRAM = 0;
    884 
    885 int
    886 SDL_GetSystemRAM(void)
    887 {
    888     if (!SDL_SystemRAM) {
    889 #ifndef SDL_CPUINFO_DISABLED
    890 #if defined(HAVE_SYSCONF) && defined(_SC_PHYS_PAGES) && defined(_SC_PAGESIZE)
    891         if (SDL_SystemRAM <= 0) {
    892             SDL_SystemRAM = (int)((Sint64)sysconf(_SC_PHYS_PAGES) * sysconf(_SC_PAGESIZE) / (1024*1024));
    893         }
    894 #endif
    895 #ifdef HAVE_SYSCTLBYNAME
    896         if (SDL_SystemRAM <= 0) {
    897 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || defined(__NetBSD__) || defined(__DragonFly__)
    898 #ifdef HW_REALMEM
    899             int mib[2] = {CTL_HW, HW_REALMEM};
    900 #else
    901             /* might only report up to 2 GiB */
    902             int mib[2] = {CTL_HW, HW_PHYSMEM};
    903 #endif /* HW_REALMEM */
    904 #else
    905             int mib[2] = {CTL_HW, HW_MEMSIZE};
    906 #endif /* __FreeBSD__ || __FreeBSD_kernel__ */
    907             Uint64 memsize = 0;
    908             size_t len = sizeof(memsize);
    909             
    910             if (sysctl(mib, 2, &memsize, &len, NULL, 0) == 0) {
    911                 SDL_SystemRAM = (int)(memsize / (1024*1024));
    912             }
    913         }
    914 #endif
    915 #ifdef __WIN32__
    916         if (SDL_SystemRAM <= 0) {
    917             MEMORYSTATUSEX stat;
    918             stat.dwLength = sizeof(stat);
    919             if (GlobalMemoryStatusEx(&stat)) {
    920                 SDL_SystemRAM = (int)(stat.ullTotalPhys / (1024 * 1024));
    921             }
    922         }
    923 #endif
    924 #ifdef __OS2__
    925         if (SDL_SystemRAM <= 0) {
    926             Uint32 sysram = 0;
    927             DosQuerySysInfo(QSV_TOTPHYSMEM, QSV_TOTPHYSMEM, &sysram, 4);
    928             SDL_SystemRAM = (int) (sysram / 0x100000U);
    929         }
    930 #endif
    931 #ifdef __RISCOS__
    932         if (SDL_SystemRAM <= 0) {
    933             _kernel_swi_regs regs;
    934             regs.r[0] = 0x108;
    935             if (_kernel_swi(OS_Memory, &regs, &regs) == NULL) {
    936                 SDL_SystemRAM = (int)(regs.r[1] * regs.r[2] / (1024 * 1024));
    937             }
    938         }
    939 #endif
    940 #endif
    941     }
    942     return SDL_SystemRAM;
    943 }
    944 
    945 
    946 size_t
    947 SDL_SIMDGetAlignment(void)
    948 {
    949     if (SDL_SIMDAlignment == 0xFFFFFFFF) {
    950         SDL_GetCPUFeatures();  /* make sure this has been calculated */
    951     }
    952     SDL_assert(SDL_SIMDAlignment != 0);
    953     return SDL_SIMDAlignment;
    954 }
    955 
    956 void *
    957 SDL_SIMDAlloc(const size_t len)
    958 {
    959     const size_t alignment = SDL_SIMDGetAlignment();
    960     const size_t padding = alignment - (len % alignment);
    961     const size_t padded = (padding != alignment) ? (len + padding) : len;
    962     Uint8 *retval = NULL;
    963     Uint8 *ptr = (Uint8 *) SDL_malloc(padded + alignment + sizeof (void *));
    964     if (ptr) {
    965         /* store the actual malloc pointer right before our aligned pointer. */
    966         retval = ptr + sizeof (void *);
    967         retval += alignment - (((size_t) retval) % alignment);
    968         *(((void **) retval) - 1) = ptr;
    969     }
    970     return retval;
    971 }
    972 
    973 void *
    974 SDL_SIMDRealloc(void *mem, const size_t len)
    975 {
    976     const size_t alignment = SDL_SIMDGetAlignment();
    977     const size_t padding = alignment - (len % alignment);
    978     const size_t padded = (padding != alignment) ? (len + padding) : len;
    979     Uint8 *retval = (Uint8*) mem;
    980     void *oldmem = mem;
    981     size_t memdiff = 0, ptrdiff;
    982     Uint8 *ptr;
    983 
    984     if (mem) {
    985         void **realptr = (void **) mem;
    986         realptr--;
    987         mem = *(((void **) mem) - 1);
    988 
    989         /* Check the delta between the real pointer and user pointer */
    990         memdiff = ((size_t) oldmem) - ((size_t) mem);
    991     }
    992 
    993     ptr = (Uint8 *) SDL_realloc(mem, padded + alignment + sizeof (void *));
    994 
    995     if (ptr == mem) {
    996         return retval; /* Pointer didn't change, nothing to do */
    997     }
    998     if (ptr == NULL) {
    999         return NULL; /* Out of memory, bail! */
   1000     }
   1001 
   1002     /* Store the actual malloc pointer right before our aligned pointer. */
   1003     retval = ptr + sizeof (void *);
   1004     retval += alignment - (((size_t) retval) % alignment);
   1005 
   1006     /* Make sure the delta is the same! */
   1007     if (mem) {
   1008         ptrdiff = ((size_t) retval) - ((size_t) ptr);
   1009         if (memdiff != ptrdiff) { /* Delta has changed, copy to new offset! */
   1010             oldmem = (void*) (((size_t) ptr) + memdiff);
   1011 
   1012             /* Even though the data past the old `len` is undefined, this is the
   1013              * only length value we have, and it guarantees that we copy all the
   1014              * previous memory anyhow.
   1015              */
   1016             SDL_memmove(retval, oldmem, len);
   1017         }
   1018     }
   1019 
   1020     /* Actually store the malloc pointer, finally. */
   1021     *(((void **) retval) - 1) = ptr;
   1022     return retval;
   1023 }
   1024 
   1025 void
   1026 SDL_SIMDFree(void *ptr)
   1027 {
   1028     if (ptr) {
   1029         void **realptr = (void **) ptr;
   1030         realptr--;
   1031         SDL_free(*(((void **) ptr) - 1));
   1032     }
   1033 }
   1034 
   1035 
   1036 #ifdef TEST_MAIN
   1037 
   1038 #include <stdio.h>
   1039 
   1040 int
   1041 main()
   1042 {
   1043     printf("CPU count: %d\n", SDL_GetCPUCount());
   1044     printf("CPU type: %s\n", SDL_GetCPUType());
   1045     printf("CPU name: %s\n", SDL_GetCPUName());
   1046     printf("CacheLine size: %d\n", SDL_GetCPUCacheLineSize());
   1047     printf("RDTSC: %d\n", SDL_HasRDTSC());
   1048     printf("Altivec: %d\n", SDL_HasAltiVec());
   1049     printf("MMX: %d\n", SDL_HasMMX());
   1050     printf("3DNow: %d\n", SDL_Has3DNow());
   1051     printf("SSE: %d\n", SDL_HasSSE());
   1052     printf("SSE2: %d\n", SDL_HasSSE2());
   1053     printf("SSE3: %d\n", SDL_HasSSE3());
   1054     printf("SSE4.1: %d\n", SDL_HasSSE41());
   1055     printf("SSE4.2: %d\n", SDL_HasSSE42());
   1056     printf("AVX: %d\n", SDL_HasAVX());
   1057     printf("AVX2: %d\n", SDL_HasAVX2());
   1058     printf("AVX-512F: %d\n", SDL_HasAVX512F());
   1059     printf("ARM SIMD: %d\n", SDL_HasARMSIMD());
   1060     printf("NEON: %d\n", SDL_HasNEON());
   1061     printf("RAM: %d MB\n", SDL_GetSystemRAM());
   1062     return 0;
   1063 }
   1064 
   1065 #endif /* TEST_MAIN */
   1066 
   1067 /* vi: set ts=4 sw=4 expandtab: */