duckstation

duckstation, but archived from the revision just before upstream changed it to a proprietary software project, this version is the libre one
git clone https://git.neptards.moe/u3shit/duckstation.git
Log | Files | Refs | README | LICENSE

xxh_x86dispatch.c (30585B)


      1 /*
      2  * xxHash - Extremely Fast Hash algorithm
      3  * Copyright (C) 2020 Yann Collet
      4  *
      5  * BSD 2-Clause License (https://www.opensource.org/licenses/bsd-license.php)
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions are
      9  * met:
     10  *
     11  *    * Redistributions of source code must retain the above copyright
     12  *      notice, this list of conditions and the following disclaimer.
     13  *    * Redistributions in binary form must reproduce the above
     14  *      copyright notice, this list of conditions and the following disclaimer
     15  *      in the documentation and/or other materials provided with the
     16  *      distribution.
     17  *
     18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     19  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     20  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     21  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     22  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     23  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     24  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     28  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     29  *
     30  * You can contact the author at:
     31  *   - xxHash homepage: https://www.xxhash.com
     32  *   - xxHash source repository: https://github.com/Cyan4973/xxHash
     33  */
     34 
     35 
     36 /*!
     37  * @file xxh_x86dispatch.c
     38  *
     39  * Automatic dispatcher code for the @ref xxh3_family on x86-based targets.
     40  *
     41  * Optional add-on.
     42  *
     43  * **Compile this file with the default flags for your target.** Do not compile
     44  * with flags like `-mavx*`, `-march=native`, or `/arch:AVX*`, there will be
     45  * an error. See @ref XXH_X86DISPATCH_ALLOW_AVX for details.
     46  *
     47  * @defgroup dispatch x86 Dispatcher
     48  * @{
     49  */
     50 
     51 #if defined (__cplusplus)
     52 extern "C" {
     53 #endif
     54 
     55 #if defined(__x86_64__) || defined(__i386__) || defined(_M_IX86) || defined(_M_X64)
     56 
     57 /*!
     58  * @def XXH_X86DISPATCH_ALLOW_AVX
     59  * @brief Disables the AVX sanity check.
     60  *
     61  * Don't compile xxh_x86dispatch.c with options like `-mavx*`, `-march=native`,
     62  * or `/arch:AVX*`. It is intended to be compiled for the minimum target, and
     63  * it selectively enables SSE2, AVX2, and AVX512 when it is needed.
     64  *
     65  * Using this option _globally_ allows this feature, and therefore makes it
     66  * undefined behavior to execute on any CPU without said feature.
     67  *
     68  * Even if the source code isn't directly using AVX intrinsics in a function,
     69  * the compiler can still generate AVX code from autovectorization and by
     70  * "upgrading" SSE2 intrinsics to use the VEX prefixes (a.k.a. AVX128).
     71  *
     72  * Use the same flags that you use to compile the rest of the program; this
     73  * file will safely generate SSE2, AVX2, and AVX512 without these flags.
     74  *
     75  * Define XXH_X86DISPATCH_ALLOW_AVX to ignore this check, and feel free to open
     76  * an issue if there is a target in the future where AVX is a default feature.
     77  */
     78 #ifdef XXH_DOXYGEN
     79 #  define XXH_X86DISPATCH_ALLOW_AVX
     80 #endif
     81 
     82 #if defined(__AVX__) && !defined(XXH_X86DISPATCH_ALLOW_AVX)
     83 #  error "Do not compile xxh_x86dispatch.c with AVX enabled! See the comment above."
     84 #endif
     85 
     86 #ifdef __has_include
     87 #  define XXH_HAS_INCLUDE(header) __has_include(header)
     88 #else
     89 #  define XXH_HAS_INCLUDE(header) 0
     90 #endif
     91 
     92 /*!
     93  * @def XXH_DISPATCH_SCALAR
     94  * @brief Enables/dispatching the scalar code path.
     95  *
     96  * If this is defined to 0, SSE2 support is assumed. This reduces code size
     97  * when the scalar path is not needed.
     98  *
     99  * This is automatically defined to 0 when...
    100  *   - SSE2 support is enabled in the compiler
    101  *   - Targeting x86_64
    102  *   - Targeting Android x86
    103  *   - Targeting macOS
    104  */
    105 #ifndef XXH_DISPATCH_SCALAR
    106 #  if defined(__SSE2__) || (defined(_M_IX86_FP) && _M_IX86_FP >= 2) /* SSE2 on by default */ \
    107      || defined(__x86_64__) || defined(_M_X64) /* x86_64 */ \
    108      || defined(__ANDROID__) || defined(__APPLEv__) /* Android or macOS */
    109 #     define XXH_DISPATCH_SCALAR 0 /* disable */
    110 #  else
    111 #     define XXH_DISPATCH_SCALAR 1
    112 #  endif
    113 #endif
    114 /*!
    115  * @def XXH_DISPATCH_AVX2
    116  * @brief Enables/disables dispatching for AVX2.
    117  *
    118  * This is automatically detected if it is not defined.
    119  *  - GCC 4.7 and later are known to support AVX2, but >4.9 is required for
    120  *    to get the AVX2 intrinsics and typedefs without -mavx -mavx2.
    121  *  - Visual Studio 2013 Update 2 and later are known to support AVX2.
    122  *  - The GCC/Clang internal header `<avx2intrin.h>` is detected. While this is
    123  *    not allowed to be included directly, it still appears in the builtin
    124  *    include path and is detectable with `__has_include`.
    125  *
    126  * @see XXH_AVX2
    127  */
    128 #ifndef XXH_DISPATCH_AVX2
    129 #  if (defined(__GNUC__) && (__GNUC__ > 4)) /* GCC 5.0+ */ \
    130    || (defined(_MSC_VER) && _MSC_VER >= 1900 && !defined(__clang__)) /* VS 2015+ */ \
    131    || (defined(_MSC_FULL_VER) && _MSC_FULL_VER >= 180030501 && !defined(__clang__)) /* VS 2013 Update 2 */ \
    132    || (XXH_HAS_INCLUDE(<avx2intrin.h>) && !defined(_MSC_VER)) /* GCC/Clang internal header */
    133 #    define XXH_DISPATCH_AVX2 1   /* enable dispatch towards AVX2 */
    134 #  else
    135 #    define XXH_DISPATCH_AVX2 0
    136 #  endif
    137 #endif /* XXH_DISPATCH_AVX2 */
    138 
    139 /*!
    140  * @def XXH_DISPATCH_AVX512
    141  * @brief Enables/disables dispatching for AVX512.
    142  *
    143  * Automatically detected if one of the following conditions is met:
    144  *  - GCC 4.9 and later are known to support AVX512.
    145  *  - Visual Studio 2017  and later are known to support AVX2.
    146  *  - The GCC/Clang internal header `<avx512fintrin.h>` is detected. While this
    147  *    is not allowed to be included directly, it still appears in the builtin
    148  *    include path and is detectable with `__has_include`.
    149  *
    150  * @see XXH_AVX512
    151  */
    152 #ifndef XXH_DISPATCH_AVX512
    153 #  if (defined(__GNUC__) \
    154        && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 9))) /* GCC 4.9+ */ \
    155    || (defined(_MSC_VER) && _MSC_VER >= 1910 && !defined(__clang__)) /* VS 2017+ */ \
    156    || (XXH_HAS_INCLUDE(<avx512fintrin.h>) && !defined(_MSC_VER)) /* GCC/Clang internal header */
    157 #    define XXH_DISPATCH_AVX512 1   /* enable dispatch towards AVX512 */
    158 #  else
    159 #    define XXH_DISPATCH_AVX512 0
    160 #  endif
    161 #endif /* XXH_DISPATCH_AVX512 */
    162 
    163 /*!
    164  * @def XXH_TARGET_SSE2
    165  * @brief Allows a function to be compiled with SSE2 intrinsics.
    166  *
    167  * Uses `__attribute__((__target__("sse2")))` on GCC to allow SSE2 to be used
    168  * even with `-mno-sse2`.
    169  *
    170  * @def XXH_TARGET_AVX2
    171  * @brief Like @ref XXH_TARGET_SSE2, but for AVX2.
    172  *
    173  * @def XXH_TARGET_AVX512
    174  * @brief Like @ref XXH_TARGET_SSE2, but for AVX512.
    175  */
    176 #if defined(__GNUC__)
    177 #  include <emmintrin.h> /* SSE2 */
    178 #  if XXH_DISPATCH_AVX2 || XXH_DISPATCH_AVX512
    179 #    include <immintrin.h> /* AVX2, AVX512F */
    180 #  endif
    181 #  define XXH_TARGET_SSE2 __attribute__((__target__("sse2")))
    182 #  define XXH_TARGET_AVX2 __attribute__((__target__("avx2")))
    183 #  define XXH_TARGET_AVX512 __attribute__((__target__("avx512f")))
    184 #elif defined(_MSC_VER)
    185 #  include <intrin.h>
    186 #  define XXH_TARGET_SSE2
    187 #  define XXH_TARGET_AVX2
    188 #  define XXH_TARGET_AVX512
    189 #else
    190 #  error "Dispatching is currently not supported for your compiler."
    191 #endif
    192 
    193 #ifdef XXH_DISPATCH_DEBUG
    194 /* debug logging */
    195 #  include <stdio.h>
    196 #  define XXH_debugPrint(str) { fprintf(stderr, "DEBUG: xxHash dispatch: %s \n", str); fflush(NULL); }
    197 #else
    198 #  define XXH_debugPrint(str) ((void)0)
    199 #  undef NDEBUG /* avoid redefinition */
    200 #  define NDEBUG
    201 #endif
    202 #include <assert.h>
    203 
    204 #define XXH_INLINE_ALL
    205 #define XXH_X86DISPATCH
    206 #include "xxhash.h"
    207 
    208 /*
    209  * Support both AT&T and Intel dialects
    210  *
    211  * GCC doesn't convert AT&T syntax to Intel syntax, and will error out if
    212  * compiled with -masm=intel. Instead, it supports dialect switching with
    213  * curly braces: { AT&T syntax | Intel syntax }
    214  *
    215  * Clang's integrated assembler automatically converts AT&T syntax to Intel if
    216  * needed, making the dialect switching useless (it isn't even supported).
    217  *
    218  * Note: Comments are written in the inline assembly itself.
    219  */
    220 #ifdef __clang__
    221 #  define XXH_I_ATT(intel, att) att "\n\t"
    222 #else
    223 #  define XXH_I_ATT(intel, att) "{" att "|" intel "}\n\t"
    224 #endif
    225 
    226 /*!
    227  * @internal
    228  * @brief Runs CPUID.
    229  *
    230  * @param eax, ecx The parameters to pass to CPUID, %eax and %ecx respectively.
    231  * @param abcd The array to store the result in, `{ eax, ebx, ecx, edx }`
    232  */
    233 static void XXH_cpuid(xxh_u32 eax, xxh_u32 ecx, xxh_u32* abcd)
    234 {
    235 #if defined(_MSC_VER)
    236     __cpuidex(abcd, eax, ecx);
    237 #else
    238     xxh_u32 ebx, edx;
    239 # if defined(__i386__) && defined(__PIC__)
    240     __asm__(
    241         "# Call CPUID\n\t"
    242         "#\n\t"
    243         "# On 32-bit x86 with PIC enabled, we are not allowed to overwrite\n\t"
    244         "# EBX, so we use EDI instead.\n\t"
    245         XXH_I_ATT("mov     edi, ebx",   "movl    %%ebx, %%edi")
    246         XXH_I_ATT("cpuid",              "cpuid"               )
    247         XXH_I_ATT("xchg    edi, ebx",   "xchgl   %%ebx, %%edi")
    248         : "=D" (ebx),
    249 # else
    250     __asm__(
    251         "# Call CPUID\n\t"
    252         XXH_I_ATT("cpuid",              "cpuid")
    253         : "=b" (ebx),
    254 # endif
    255               "+a" (eax), "+c" (ecx), "=d" (edx));
    256     abcd[0] = eax;
    257     abcd[1] = ebx;
    258     abcd[2] = ecx;
    259     abcd[3] = edx;
    260 #endif
    261 }
    262 
    263 /*
    264  * Modified version of Intel's guide
    265  * https://software.intel.com/en-us/articles/how-to-detect-new-instruction-support-in-the-4th-generation-intel-core-processor-family
    266  */
    267 
    268 #if XXH_DISPATCH_AVX2 || XXH_DISPATCH_AVX512
    269 /*!
    270  * @internal
    271  * @brief Runs `XGETBV`.
    272  *
    273  * While the CPU may support AVX2, the operating system might not properly save
    274  * the full YMM/ZMM registers.
    275  *
    276  * xgetbv is used for detecting this: Any compliant operating system will define
    277  * a set of flags in the xcr0 register indicating how it saves the AVX registers.
    278  *
    279  * You can manually disable this flag on Windows by running, as admin:
    280  *
    281  *   bcdedit.exe /set xsavedisable 1
    282  *
    283  * and rebooting. Run the same command with 0 to re-enable it.
    284  */
    285 static xxh_u64 XXH_xgetbv(void)
    286 {
    287 #if defined(_MSC_VER)
    288     return _xgetbv(0);  /* min VS2010 SP1 compiler is required */
    289 #else
    290     xxh_u32 xcr0_lo, xcr0_hi;
    291     __asm__(
    292         "# Call XGETBV\n\t"
    293         "#\n\t"
    294         "# Older assemblers (e.g. macOS's ancient GAS version) don't support\n\t"
    295         "# the XGETBV opcode, so we encode it by hand instead.\n\t"
    296         "# See <https://github.com/asmjit/asmjit/issues/78> for details.\n\t"
    297         ".byte   0x0f, 0x01, 0xd0\n\t"
    298        : "=a" (xcr0_lo), "=d" (xcr0_hi) : "c" (0));
    299     return xcr0_lo | ((xxh_u64)xcr0_hi << 32);
    300 #endif
    301 }
    302 #endif
    303 
    304 #define XXH_SSE2_CPUID_MASK (1 << 26)
    305 #define XXH_OSXSAVE_CPUID_MASK ((1 << 26) | (1 << 27))
    306 #define XXH_AVX2_CPUID_MASK (1 << 5)
    307 #define XXH_AVX2_XGETBV_MASK ((1 << 2) | (1 << 1))
    308 #define XXH_AVX512F_CPUID_MASK (1 << 16)
    309 #define XXH_AVX512F_XGETBV_MASK ((7 << 5) | (1 << 2) | (1 << 1))
    310 
    311 /*!
    312  * @internal
    313  * @brief Returns the best XXH3 implementation.
    314  *
    315  * Runs various CPUID/XGETBV tests to try and determine the best implementation.
    316  *
    317  * @ret The best @ref XXH_VECTOR implementation.
    318  * @see XXH_VECTOR_TYPES
    319  */
    320 static int XXH_featureTest(void)
    321 {
    322     xxh_u32 abcd[4];
    323     xxh_u32 max_leaves;
    324     int best = XXH_SCALAR;
    325 #if XXH_DISPATCH_AVX2 || XXH_DISPATCH_AVX512
    326     xxh_u64 xgetbv_val;
    327 #endif
    328 #if defined(__GNUC__) && defined(__i386__)
    329     xxh_u32 cpuid_supported;
    330     __asm__(
    331         "# For the sake of ruthless backwards compatibility, check if CPUID\n\t"
    332         "# is supported in the EFLAGS on i386.\n\t"
    333         "# This is not necessary on x86_64 - CPUID is mandatory.\n\t"
    334         "#   The ID flag (bit 21) in the EFLAGS register indicates support\n\t"
    335         "#   for the CPUID instruction. If a software procedure can set and\n\t"
    336         "#   clear this flag, the processor executing the procedure supports\n\t"
    337         "#   the CPUID instruction.\n\t"
    338         "#   <https://c9x.me/x86/html/file_module_x86_id_45.html>\n\t"
    339         "#\n\t"
    340         "# Routine is from <https://wiki.osdev.org/CPUID>.\n\t"
    341 
    342         "# Save EFLAGS\n\t"
    343         XXH_I_ATT("pushfd",                           "pushfl"                    )
    344         "# Store EFLAGS\n\t"
    345         XXH_I_ATT("pushfd",                           "pushfl"                    )
    346         "# Invert the ID bit in stored EFLAGS\n\t"
    347         XXH_I_ATT("xor     dword ptr[esp], 0x200000", "xorl    $0x200000, (%%esp)")
    348         "# Load stored EFLAGS (with ID bit inverted)\n\t"
    349         XXH_I_ATT("popfd",                            "popfl"                     )
    350         "# Store EFLAGS again (ID bit may or not be inverted)\n\t"
    351         XXH_I_ATT("pushfd",                           "pushfl"                    )
    352         "# eax = modified EFLAGS (ID bit may or may not be inverted)\n\t"
    353         XXH_I_ATT("pop     eax",                      "popl    %%eax"             )
    354         "# eax = whichever bits were changed\n\t"
    355         XXH_I_ATT("xor     eax, dword ptr[esp]",      "xorl    (%%esp), %%eax"    )
    356         "# Restore original EFLAGS\n\t"
    357         XXH_I_ATT("popfd",                            "popfl"                     )
    358         "# eax = zero if ID bit can't be changed, else non-zero\n\t"
    359         XXH_I_ATT("and     eax, 0x200000",            "andl    $0x200000, %%eax"  )
    360         : "=a" (cpuid_supported) :: "cc");
    361 
    362     if (XXH_unlikely(!cpuid_supported)) {
    363         XXH_debugPrint("CPUID support is not detected!");
    364         return best;
    365     }
    366 
    367 #endif
    368     /* Check how many CPUID pages we have */
    369     XXH_cpuid(0, 0, abcd);
    370     max_leaves = abcd[0];
    371 
    372     /* Shouldn't happen on hardware, but happens on some QEMU configs. */
    373     if (XXH_unlikely(max_leaves == 0)) {
    374         XXH_debugPrint("Max CPUID leaves == 0!");
    375         return best;
    376     }
    377 
    378     /* Check for SSE2, OSXSAVE and xgetbv */
    379     XXH_cpuid(1, 0, abcd);
    380 
    381     /*
    382      * Test for SSE2. The check is redundant on x86_64, but it doesn't hurt.
    383      */
    384     if (XXH_unlikely((abcd[3] & XXH_SSE2_CPUID_MASK) != XXH_SSE2_CPUID_MASK))
    385         return best;
    386 
    387     XXH_debugPrint("SSE2 support detected.");
    388 
    389     best = XXH_SSE2;
    390 #if XXH_DISPATCH_AVX2 || XXH_DISPATCH_AVX512
    391     /* Make sure we have enough leaves */
    392     if (XXH_unlikely(max_leaves < 7))
    393         return best;
    394 
    395     /* Test for OSXSAVE and XGETBV */
    396     if ((abcd[2] & XXH_OSXSAVE_CPUID_MASK) != XXH_OSXSAVE_CPUID_MASK)
    397         return best;
    398 
    399     /* CPUID check for AVX features */
    400     XXH_cpuid(7, 0, abcd);
    401 
    402     xgetbv_val = XXH_xgetbv();
    403 #if XXH_DISPATCH_AVX2
    404     /* Validate that AVX2 is supported by the CPU */
    405     if ((abcd[1] & XXH_AVX2_CPUID_MASK) != XXH_AVX2_CPUID_MASK)
    406         return best;
    407 
    408     /* Validate that the OS supports YMM registers */
    409     if ((xgetbv_val & XXH_AVX2_XGETBV_MASK) != XXH_AVX2_XGETBV_MASK) {
    410         XXH_debugPrint("AVX2 supported by the CPU, but not the OS.");
    411         return best;
    412     }
    413 
    414     /* AVX2 supported */
    415     XXH_debugPrint("AVX2 support detected.");
    416     best = XXH_AVX2;
    417 #endif
    418 #if XXH_DISPATCH_AVX512
    419     /* Check if AVX512F is supported by the CPU */
    420     if ((abcd[1] & XXH_AVX512F_CPUID_MASK) != XXH_AVX512F_CPUID_MASK) {
    421         XXH_debugPrint("AVX512F not supported by CPU");
    422         return best;
    423     }
    424 
    425     /* Validate that the OS supports ZMM registers */
    426     if ((xgetbv_val & XXH_AVX512F_XGETBV_MASK) != XXH_AVX512F_XGETBV_MASK) {
    427         XXH_debugPrint("AVX512F supported by the CPU, but not the OS.");
    428         return best;
    429     }
    430 
    431     /* AVX512F supported */
    432     XXH_debugPrint("AVX512F support detected.");
    433     best = XXH_AVX512;
    434 #endif
    435 #endif
    436     return best;
    437 }
    438 
    439 
    440 /* ===   Vector implementations   === */
    441 
    442 /*!
    443  * @internal
    444  * @brief Defines the various dispatch functions.
    445  *
    446  * TODO: Consolidate?
    447  *
    448  * @param suffix The suffix for the functions, e.g. sse2 or scalar
    449  * @param target XXH_TARGET_* or empty.
    450  */
    451 #define XXH_DEFINE_DISPATCH_FUNCS(suffix, target)                             \
    452                                                                               \
    453 /* ===   XXH3, default variants   === */                                      \
    454                                                                               \
    455 XXH_NO_INLINE target XXH64_hash_t                                             \
    456 XXHL64_default_##suffix(const void* XXH_RESTRICT input, size_t len)           \
    457 {                                                                             \
    458     return XXH3_hashLong_64b_internal(                                        \
    459                input, len, XXH3_kSecret, sizeof(XXH3_kSecret),                \
    460                XXH3_accumulate_512_##suffix, XXH3_scrambleAcc_##suffix        \
    461     );                                                                        \
    462 }                                                                             \
    463                                                                               \
    464 /* ===   XXH3, Seeded variants   === */                                       \
    465                                                                               \
    466 XXH_NO_INLINE target XXH64_hash_t                                             \
    467 XXHL64_seed_##suffix(const void* XXH_RESTRICT input, size_t len,              \
    468                      XXH64_hash_t seed)                                       \
    469 {                                                                             \
    470     return XXH3_hashLong_64b_withSeed_internal(                               \
    471                     input, len, seed, XXH3_accumulate_512_##suffix,           \
    472                     XXH3_scrambleAcc_##suffix, XXH3_initCustomSecret_##suffix \
    473     );                                                                        \
    474 }                                                                             \
    475                                                                               \
    476 /* ===   XXH3, Secret variants   === */                                       \
    477                                                                               \
    478 XXH_NO_INLINE target XXH64_hash_t                                             \
    479 XXHL64_secret_##suffix(const void* XXH_RESTRICT input, size_t len,            \
    480                        const void* secret, size_t secretLen)                  \
    481 {                                                                             \
    482     return XXH3_hashLong_64b_internal(                                        \
    483                     input, len, secret, secretLen,                            \
    484                     XXH3_accumulate_512_##suffix, XXH3_scrambleAcc_##suffix   \
    485     );                                                                        \
    486 }                                                                             \
    487                                                                               \
    488 /* ===   XXH3 update variants   === */                                        \
    489                                                                               \
    490 XXH_NO_INLINE target XXH_errorcode                                            \
    491 XXH3_update_##suffix(XXH3_state_t* state, const void* input, size_t len)      \
    492 {                                                                             \
    493     return XXH3_update(state, (const xxh_u8*)input, len,                      \
    494                     XXH3_accumulate_512_##suffix, XXH3_scrambleAcc_##suffix); \
    495 }                                                                             \
    496                                                                               \
    497 /* ===   XXH128 default variants   === */                                     \
    498                                                                               \
    499 XXH_NO_INLINE target XXH128_hash_t                                            \
    500 XXHL128_default_##suffix(const void* XXH_RESTRICT input, size_t len)          \
    501 {                                                                             \
    502     return XXH3_hashLong_128b_internal(                                       \
    503                     input, len, XXH3_kSecret, sizeof(XXH3_kSecret),           \
    504                     XXH3_accumulate_512_##suffix, XXH3_scrambleAcc_##suffix   \
    505     );                                                                        \
    506 }                                                                             \
    507                                                                               \
    508 /* ===   XXH128 Secret variants   === */                                      \
    509                                                                               \
    510 XXH_NO_INLINE target XXH128_hash_t                                            \
    511 XXHL128_secret_##suffix(const void* XXH_RESTRICT input, size_t len,           \
    512                         const void* XXH_RESTRICT secret, size_t secretLen)    \
    513 {                                                                             \
    514     return XXH3_hashLong_128b_internal(                                       \
    515                     input, len, (const xxh_u8*)secret, secretLen,             \
    516                     XXH3_accumulate_512_##suffix, XXH3_scrambleAcc_##suffix); \
    517 }                                                                             \
    518                                                                               \
    519 /* ===   XXH128 Seeded variants   === */                                      \
    520                                                                               \
    521 XXH_NO_INLINE target XXH128_hash_t                                            \
    522 XXHL128_seed_##suffix(const void* XXH_RESTRICT input, size_t len,             \
    523                       XXH64_hash_t seed)                                      \
    524 {                                                                             \
    525     return XXH3_hashLong_128b_withSeed_internal(input, len, seed,             \
    526                     XXH3_accumulate_512_##suffix, XXH3_scrambleAcc_##suffix,  \
    527                     XXH3_initCustomSecret_##suffix);                          \
    528 }
    529 
    530 /* End XXH_DEFINE_DISPATCH_FUNCS */
    531 
    532 #if XXH_DISPATCH_SCALAR
    533 XXH_DEFINE_DISPATCH_FUNCS(scalar, /* nothing */)
    534 #endif
    535 XXH_DEFINE_DISPATCH_FUNCS(sse2, XXH_TARGET_SSE2)
    536 #if XXH_DISPATCH_AVX2
    537 XXH_DEFINE_DISPATCH_FUNCS(avx2, XXH_TARGET_AVX2)
    538 #endif
    539 #if XXH_DISPATCH_AVX512
    540 XXH_DEFINE_DISPATCH_FUNCS(avx512, XXH_TARGET_AVX512)
    541 #endif
    542 #undef XXH_DEFINE_DISPATCH_FUNCS
    543 
    544 /* ====    Dispatchers    ==== */
    545 
    546 typedef XXH64_hash_t (*XXH3_dispatchx86_hashLong64_default)(const void* XXH_RESTRICT, size_t);
    547 
    548 typedef XXH64_hash_t (*XXH3_dispatchx86_hashLong64_withSeed)(const void* XXH_RESTRICT, size_t, XXH64_hash_t);
    549 
    550 typedef XXH64_hash_t (*XXH3_dispatchx86_hashLong64_withSecret)(const void* XXH_RESTRICT, size_t, const void* XXH_RESTRICT, size_t);
    551 
    552 typedef XXH_errorcode (*XXH3_dispatchx86_update)(XXH3_state_t*, const void*, size_t);
    553 
    554 typedef struct {
    555     XXH3_dispatchx86_hashLong64_default    hashLong64_default;
    556     XXH3_dispatchx86_hashLong64_withSeed   hashLong64_seed;
    557     XXH3_dispatchx86_hashLong64_withSecret hashLong64_secret;
    558     XXH3_dispatchx86_update                update;
    559 } XXH_dispatchFunctions_s;
    560 
    561 #define XXH_NB_DISPATCHES 4
    562 
    563 /*!
    564  * @internal
    565  * @brief Table of dispatchers for @ref XXH3_64bits().
    566  *
    567  * @pre The indices must match @ref XXH_VECTOR_TYPE.
    568  */
    569 static const XXH_dispatchFunctions_s XXH_kDispatch[XXH_NB_DISPATCHES] = {
    570 #if XXH_DISPATCH_SCALAR
    571     /* Scalar */ { XXHL64_default_scalar, XXHL64_seed_scalar, XXHL64_secret_scalar, XXH3_update_scalar },
    572 #else
    573     /* Scalar */ { NULL, NULL, NULL, NULL },
    574 #endif
    575     /* SSE2   */ { XXHL64_default_sse2,   XXHL64_seed_sse2,   XXHL64_secret_sse2,   XXH3_update_sse2 },
    576 #if XXH_DISPATCH_AVX2
    577     /* AVX2   */ { XXHL64_default_avx2,   XXHL64_seed_avx2,   XXHL64_secret_avx2,   XXH3_update_avx2 },
    578 #else
    579     /* AVX2   */ { NULL, NULL, NULL, NULL },
    580 #endif
    581 #if XXH_DISPATCH_AVX512
    582     /* AVX512 */ { XXHL64_default_avx512, XXHL64_seed_avx512, XXHL64_secret_avx512, XXH3_update_avx512 }
    583 #else
    584     /* AVX512 */ { NULL, NULL, NULL, NULL }
    585 #endif
    586 };
    587 /*!
    588  * @internal
    589  * @brief The selected dispatch table for @ref XXH3_64bits().
    590  */
    591 static XXH_dispatchFunctions_s XXH_g_dispatch = { NULL, NULL, NULL, NULL };
    592 
    593 
    594 typedef XXH128_hash_t (*XXH3_dispatchx86_hashLong128_default)(const void* XXH_RESTRICT, size_t);
    595 
    596 typedef XXH128_hash_t (*XXH3_dispatchx86_hashLong128_withSeed)(const void* XXH_RESTRICT, size_t, XXH64_hash_t);
    597 
    598 typedef XXH128_hash_t (*XXH3_dispatchx86_hashLong128_withSecret)(const void* XXH_RESTRICT, size_t, const void* XXH_RESTRICT, size_t);
    599 
    600 typedef struct {
    601     XXH3_dispatchx86_hashLong128_default    hashLong128_default;
    602     XXH3_dispatchx86_hashLong128_withSeed   hashLong128_seed;
    603     XXH3_dispatchx86_hashLong128_withSecret hashLong128_secret;
    604     XXH3_dispatchx86_update                 update;
    605 } XXH_dispatch128Functions_s;
    606 
    607 
    608 /*!
    609  * @internal
    610  * @brief Table of dispatchers for @ref XXH3_128bits().
    611  *
    612  * @pre The indices must match @ref XXH_VECTOR_TYPE.
    613  */
    614 static const XXH_dispatch128Functions_s XXH_kDispatch128[XXH_NB_DISPATCHES] = {
    615 #if XXH_DISPATCH_SCALAR
    616     /* Scalar */ { XXHL128_default_scalar, XXHL128_seed_scalar, XXHL128_secret_scalar, XXH3_update_scalar },
    617 #else
    618     /* Scalar */ { NULL, NULL, NULL, NULL },
    619 #endif
    620     /* SSE2   */ { XXHL128_default_sse2,   XXHL128_seed_sse2,   XXHL128_secret_sse2,   XXH3_update_sse2 },
    621 #if XXH_DISPATCH_AVX2
    622     /* AVX2   */ { XXHL128_default_avx2,   XXHL128_seed_avx2,   XXHL128_secret_avx2,   XXH3_update_avx2 },
    623 #else
    624     /* AVX2   */ { NULL, NULL, NULL, NULL },
    625 #endif
    626 #if XXH_DISPATCH_AVX512
    627     /* AVX512 */ { XXHL128_default_avx512, XXHL128_seed_avx512, XXHL128_secret_avx512, XXH3_update_avx512 }
    628 #else
    629     /* AVX512 */ { NULL, NULL, NULL, NULL }
    630 #endif
    631 };
    632 
    633 /*!
    634  * @internal
    635  * @brief The selected dispatch table for @ref XXH3_64bits().
    636  */
    637 static XXH_dispatch128Functions_s XXH_g_dispatch128 = { NULL, NULL, NULL, NULL };
    638 
    639 /*!
    640  * @internal
    641  * @brief Runs a CPUID check and sets the correct dispatch tables.
    642  */
    643 static void XXH_setDispatch(void)
    644 {
    645     int vecID = XXH_featureTest();
    646     XXH_STATIC_ASSERT(XXH_AVX512 == XXH_NB_DISPATCHES-1);
    647     assert(XXH_SCALAR <= vecID && vecID <= XXH_AVX512);
    648 #if !XXH_DISPATCH_SCALAR
    649     assert(vecID != XXH_SCALAR);
    650 #endif
    651 #if !XXH_DISPATCH_AVX512
    652     assert(vecID != XXH_AVX512);
    653 #endif
    654 #if !XXH_DISPATCH_AVX2
    655     assert(vecID != XXH_AVX2);
    656 #endif
    657     XXH_g_dispatch = XXH_kDispatch[vecID];
    658     XXH_g_dispatch128 = XXH_kDispatch128[vecID];
    659 }
    660 
    661 
    662 /* ====    XXH3 public functions    ==== */
    663 
    664 static XXH64_hash_t
    665 XXH3_hashLong_64b_defaultSecret_selection(const void* input, size_t len,
    666                                           XXH64_hash_t seed64, const xxh_u8* secret, size_t secretLen)
    667 {
    668     (void)seed64; (void)secret; (void)secretLen;
    669     if (XXH_g_dispatch.hashLong64_default == NULL) XXH_setDispatch();
    670     return XXH_g_dispatch.hashLong64_default(input, len);
    671 }
    672 
    673 XXH64_hash_t XXH3_64bits_dispatch(const void* input, size_t len)
    674 {
    675     return XXH3_64bits_internal(input, len, 0, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_hashLong_64b_defaultSecret_selection);
    676 }
    677 
    678 static XXH64_hash_t
    679 XXH3_hashLong_64b_withSeed_selection(const void* input, size_t len,
    680                                      XXH64_hash_t seed64, const xxh_u8* secret, size_t secretLen)
    681 {
    682     (void)secret; (void)secretLen;
    683     if (XXH_g_dispatch.hashLong64_seed == NULL) XXH_setDispatch();
    684     return XXH_g_dispatch.hashLong64_seed(input, len, seed64);
    685 }
    686 
    687 XXH64_hash_t XXH3_64bits_withSeed_dispatch(const void* input, size_t len, XXH64_hash_t seed)
    688 {
    689     return XXH3_64bits_internal(input, len, seed, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_hashLong_64b_withSeed_selection);
    690 }
    691 
    692 static XXH64_hash_t
    693 XXH3_hashLong_64b_withSecret_selection(const void* input, size_t len,
    694                                        XXH64_hash_t seed64, const xxh_u8* secret, size_t secretLen)
    695 {
    696     (void)seed64;
    697     if (XXH_g_dispatch.hashLong64_secret == NULL) XXH_setDispatch();
    698     return XXH_g_dispatch.hashLong64_secret(input, len, secret, secretLen);
    699 }
    700 
    701 XXH64_hash_t XXH3_64bits_withSecret_dispatch(const void* input, size_t len, const void* secret, size_t secretLen)
    702 {
    703     return XXH3_64bits_internal(input, len, 0, secret, secretLen, XXH3_hashLong_64b_withSecret_selection);
    704 }
    705 
    706 XXH_errorcode
    707 XXH3_64bits_update_dispatch(XXH3_state_t* state, const void* input, size_t len)
    708 {
    709     if (XXH_g_dispatch.update == NULL) XXH_setDispatch();
    710     return XXH_g_dispatch.update(state, (const xxh_u8*)input, len);
    711 }
    712 
    713 
    714 /* ====    XXH128 public functions    ==== */
    715 
    716 static XXH128_hash_t
    717 XXH3_hashLong_128b_defaultSecret_selection(const void* input, size_t len,
    718                                            XXH64_hash_t seed64, const void* secret, size_t secretLen)
    719 {
    720     (void)seed64; (void)secret; (void)secretLen;
    721     if (XXH_g_dispatch128.hashLong128_default == NULL) XXH_setDispatch();
    722     return XXH_g_dispatch128.hashLong128_default(input, len);
    723 }
    724 
    725 XXH128_hash_t XXH3_128bits_dispatch(const void* input, size_t len)
    726 {
    727     return XXH3_128bits_internal(input, len, 0, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_hashLong_128b_defaultSecret_selection);
    728 }
    729 
    730 static XXH128_hash_t
    731 XXH3_hashLong_128b_withSeed_selection(const void* input, size_t len,
    732                                      XXH64_hash_t seed64, const void* secret, size_t secretLen)
    733 {
    734     (void)secret; (void)secretLen;
    735     if (XXH_g_dispatch128.hashLong128_seed == NULL) XXH_setDispatch();
    736     return XXH_g_dispatch128.hashLong128_seed(input, len, seed64);
    737 }
    738 
    739 XXH128_hash_t XXH3_128bits_withSeed_dispatch(const void* input, size_t len, XXH64_hash_t seed)
    740 {
    741     return XXH3_128bits_internal(input, len, seed, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_hashLong_128b_withSeed_selection);
    742 }
    743 
    744 static XXH128_hash_t
    745 XXH3_hashLong_128b_withSecret_selection(const void* input, size_t len,
    746                                         XXH64_hash_t seed64, const void* secret, size_t secretLen)
    747 {
    748     (void)seed64;
    749     if (XXH_g_dispatch128.hashLong128_secret == NULL) XXH_setDispatch();
    750     return XXH_g_dispatch128.hashLong128_secret(input, len, secret, secretLen);
    751 }
    752 
    753 XXH128_hash_t XXH3_128bits_withSecret_dispatch(const void* input, size_t len, const void* secret, size_t secretLen)
    754 {
    755     return XXH3_128bits_internal(input, len, 0, secret, secretLen, XXH3_hashLong_128b_withSecret_selection);
    756 }
    757 
    758 XXH_errorcode
    759 XXH3_128bits_update_dispatch(XXH3_state_t* state, const void* input, size_t len)
    760 {
    761     if (XXH_g_dispatch128.update == NULL) XXH_setDispatch();
    762     return XXH_g_dispatch128.update(state, (const xxh_u8*)input, len);
    763 }
    764 
    765 #endif // defined(__x86_64__) || defined(__i386__) || defined(_M_IX86) || defined(_M_X64)
    766 
    767 #if defined (__cplusplus)
    768 }
    769 #endif
    770 /*! @} */