xxh_x86dispatch.c (30585B)
1 /* 2 * xxHash - Extremely Fast Hash algorithm 3 * Copyright (C) 2020 Yann Collet 4 * 5 * BSD 2-Clause License (https://www.opensource.org/licenses/bsd-license.php) 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions are 9 * met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above 14 * copyright notice, this list of conditions and the following disclaimer 15 * in the documentation and/or other materials provided with the 16 * distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 * 30 * You can contact the author at: 31 * - xxHash homepage: https://www.xxhash.com 32 * - xxHash source repository: https://github.com/Cyan4973/xxHash 33 */ 34 35 36 /*! 37 * @file xxh_x86dispatch.c 38 * 39 * Automatic dispatcher code for the @ref xxh3_family on x86-based targets. 40 * 41 * Optional add-on. 42 * 43 * **Compile this file with the default flags for your target.** Do not compile 44 * with flags like `-mavx*`, `-march=native`, or `/arch:AVX*`, there will be 45 * an error. See @ref XXH_X86DISPATCH_ALLOW_AVX for details. 46 * 47 * @defgroup dispatch x86 Dispatcher 48 * @{ 49 */ 50 51 #if defined (__cplusplus) 52 extern "C" { 53 #endif 54 55 #if defined(__x86_64__) || defined(__i386__) || defined(_M_IX86) || defined(_M_X64) 56 57 /*! 58 * @def XXH_X86DISPATCH_ALLOW_AVX 59 * @brief Disables the AVX sanity check. 60 * 61 * Don't compile xxh_x86dispatch.c with options like `-mavx*`, `-march=native`, 62 * or `/arch:AVX*`. It is intended to be compiled for the minimum target, and 63 * it selectively enables SSE2, AVX2, and AVX512 when it is needed. 64 * 65 * Using this option _globally_ allows this feature, and therefore makes it 66 * undefined behavior to execute on any CPU without said feature. 67 * 68 * Even if the source code isn't directly using AVX intrinsics in a function, 69 * the compiler can still generate AVX code from autovectorization and by 70 * "upgrading" SSE2 intrinsics to use the VEX prefixes (a.k.a. AVX128). 71 * 72 * Use the same flags that you use to compile the rest of the program; this 73 * file will safely generate SSE2, AVX2, and AVX512 without these flags. 74 * 75 * Define XXH_X86DISPATCH_ALLOW_AVX to ignore this check, and feel free to open 76 * an issue if there is a target in the future where AVX is a default feature. 77 */ 78 #ifdef XXH_DOXYGEN 79 # define XXH_X86DISPATCH_ALLOW_AVX 80 #endif 81 82 #if defined(__AVX__) && !defined(XXH_X86DISPATCH_ALLOW_AVX) 83 # error "Do not compile xxh_x86dispatch.c with AVX enabled! See the comment above." 84 #endif 85 86 #ifdef __has_include 87 # define XXH_HAS_INCLUDE(header) __has_include(header) 88 #else 89 # define XXH_HAS_INCLUDE(header) 0 90 #endif 91 92 /*! 93 * @def XXH_DISPATCH_SCALAR 94 * @brief Enables/dispatching the scalar code path. 95 * 96 * If this is defined to 0, SSE2 support is assumed. This reduces code size 97 * when the scalar path is not needed. 98 * 99 * This is automatically defined to 0 when... 100 * - SSE2 support is enabled in the compiler 101 * - Targeting x86_64 102 * - Targeting Android x86 103 * - Targeting macOS 104 */ 105 #ifndef XXH_DISPATCH_SCALAR 106 # if defined(__SSE2__) || (defined(_M_IX86_FP) && _M_IX86_FP >= 2) /* SSE2 on by default */ \ 107 || defined(__x86_64__) || defined(_M_X64) /* x86_64 */ \ 108 || defined(__ANDROID__) || defined(__APPLEv__) /* Android or macOS */ 109 # define XXH_DISPATCH_SCALAR 0 /* disable */ 110 # else 111 # define XXH_DISPATCH_SCALAR 1 112 # endif 113 #endif 114 /*! 115 * @def XXH_DISPATCH_AVX2 116 * @brief Enables/disables dispatching for AVX2. 117 * 118 * This is automatically detected if it is not defined. 119 * - GCC 4.7 and later are known to support AVX2, but >4.9 is required for 120 * to get the AVX2 intrinsics and typedefs without -mavx -mavx2. 121 * - Visual Studio 2013 Update 2 and later are known to support AVX2. 122 * - The GCC/Clang internal header `<avx2intrin.h>` is detected. While this is 123 * not allowed to be included directly, it still appears in the builtin 124 * include path and is detectable with `__has_include`. 125 * 126 * @see XXH_AVX2 127 */ 128 #ifndef XXH_DISPATCH_AVX2 129 # if (defined(__GNUC__) && (__GNUC__ > 4)) /* GCC 5.0+ */ \ 130 || (defined(_MSC_VER) && _MSC_VER >= 1900 && !defined(__clang__)) /* VS 2015+ */ \ 131 || (defined(_MSC_FULL_VER) && _MSC_FULL_VER >= 180030501 && !defined(__clang__)) /* VS 2013 Update 2 */ \ 132 || (XXH_HAS_INCLUDE(<avx2intrin.h>) && !defined(_MSC_VER)) /* GCC/Clang internal header */ 133 # define XXH_DISPATCH_AVX2 1 /* enable dispatch towards AVX2 */ 134 # else 135 # define XXH_DISPATCH_AVX2 0 136 # endif 137 #endif /* XXH_DISPATCH_AVX2 */ 138 139 /*! 140 * @def XXH_DISPATCH_AVX512 141 * @brief Enables/disables dispatching for AVX512. 142 * 143 * Automatically detected if one of the following conditions is met: 144 * - GCC 4.9 and later are known to support AVX512. 145 * - Visual Studio 2017 and later are known to support AVX2. 146 * - The GCC/Clang internal header `<avx512fintrin.h>` is detected. While this 147 * is not allowed to be included directly, it still appears in the builtin 148 * include path and is detectable with `__has_include`. 149 * 150 * @see XXH_AVX512 151 */ 152 #ifndef XXH_DISPATCH_AVX512 153 # if (defined(__GNUC__) \ 154 && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 9))) /* GCC 4.9+ */ \ 155 || (defined(_MSC_VER) && _MSC_VER >= 1910 && !defined(__clang__)) /* VS 2017+ */ \ 156 || (XXH_HAS_INCLUDE(<avx512fintrin.h>) && !defined(_MSC_VER)) /* GCC/Clang internal header */ 157 # define XXH_DISPATCH_AVX512 1 /* enable dispatch towards AVX512 */ 158 # else 159 # define XXH_DISPATCH_AVX512 0 160 # endif 161 #endif /* XXH_DISPATCH_AVX512 */ 162 163 /*! 164 * @def XXH_TARGET_SSE2 165 * @brief Allows a function to be compiled with SSE2 intrinsics. 166 * 167 * Uses `__attribute__((__target__("sse2")))` on GCC to allow SSE2 to be used 168 * even with `-mno-sse2`. 169 * 170 * @def XXH_TARGET_AVX2 171 * @brief Like @ref XXH_TARGET_SSE2, but for AVX2. 172 * 173 * @def XXH_TARGET_AVX512 174 * @brief Like @ref XXH_TARGET_SSE2, but for AVX512. 175 */ 176 #if defined(__GNUC__) 177 # include <emmintrin.h> /* SSE2 */ 178 # if XXH_DISPATCH_AVX2 || XXH_DISPATCH_AVX512 179 # include <immintrin.h> /* AVX2, AVX512F */ 180 # endif 181 # define XXH_TARGET_SSE2 __attribute__((__target__("sse2"))) 182 # define XXH_TARGET_AVX2 __attribute__((__target__("avx2"))) 183 # define XXH_TARGET_AVX512 __attribute__((__target__("avx512f"))) 184 #elif defined(_MSC_VER) 185 # include <intrin.h> 186 # define XXH_TARGET_SSE2 187 # define XXH_TARGET_AVX2 188 # define XXH_TARGET_AVX512 189 #else 190 # error "Dispatching is currently not supported for your compiler." 191 #endif 192 193 #ifdef XXH_DISPATCH_DEBUG 194 /* debug logging */ 195 # include <stdio.h> 196 # define XXH_debugPrint(str) { fprintf(stderr, "DEBUG: xxHash dispatch: %s \n", str); fflush(NULL); } 197 #else 198 # define XXH_debugPrint(str) ((void)0) 199 # undef NDEBUG /* avoid redefinition */ 200 # define NDEBUG 201 #endif 202 #include <assert.h> 203 204 #define XXH_INLINE_ALL 205 #define XXH_X86DISPATCH 206 #include "xxhash.h" 207 208 /* 209 * Support both AT&T and Intel dialects 210 * 211 * GCC doesn't convert AT&T syntax to Intel syntax, and will error out if 212 * compiled with -masm=intel. Instead, it supports dialect switching with 213 * curly braces: { AT&T syntax | Intel syntax } 214 * 215 * Clang's integrated assembler automatically converts AT&T syntax to Intel if 216 * needed, making the dialect switching useless (it isn't even supported). 217 * 218 * Note: Comments are written in the inline assembly itself. 219 */ 220 #ifdef __clang__ 221 # define XXH_I_ATT(intel, att) att "\n\t" 222 #else 223 # define XXH_I_ATT(intel, att) "{" att "|" intel "}\n\t" 224 #endif 225 226 /*! 227 * @internal 228 * @brief Runs CPUID. 229 * 230 * @param eax, ecx The parameters to pass to CPUID, %eax and %ecx respectively. 231 * @param abcd The array to store the result in, `{ eax, ebx, ecx, edx }` 232 */ 233 static void XXH_cpuid(xxh_u32 eax, xxh_u32 ecx, xxh_u32* abcd) 234 { 235 #if defined(_MSC_VER) 236 __cpuidex(abcd, eax, ecx); 237 #else 238 xxh_u32 ebx, edx; 239 # if defined(__i386__) && defined(__PIC__) 240 __asm__( 241 "# Call CPUID\n\t" 242 "#\n\t" 243 "# On 32-bit x86 with PIC enabled, we are not allowed to overwrite\n\t" 244 "# EBX, so we use EDI instead.\n\t" 245 XXH_I_ATT("mov edi, ebx", "movl %%ebx, %%edi") 246 XXH_I_ATT("cpuid", "cpuid" ) 247 XXH_I_ATT("xchg edi, ebx", "xchgl %%ebx, %%edi") 248 : "=D" (ebx), 249 # else 250 __asm__( 251 "# Call CPUID\n\t" 252 XXH_I_ATT("cpuid", "cpuid") 253 : "=b" (ebx), 254 # endif 255 "+a" (eax), "+c" (ecx), "=d" (edx)); 256 abcd[0] = eax; 257 abcd[1] = ebx; 258 abcd[2] = ecx; 259 abcd[3] = edx; 260 #endif 261 } 262 263 /* 264 * Modified version of Intel's guide 265 * https://software.intel.com/en-us/articles/how-to-detect-new-instruction-support-in-the-4th-generation-intel-core-processor-family 266 */ 267 268 #if XXH_DISPATCH_AVX2 || XXH_DISPATCH_AVX512 269 /*! 270 * @internal 271 * @brief Runs `XGETBV`. 272 * 273 * While the CPU may support AVX2, the operating system might not properly save 274 * the full YMM/ZMM registers. 275 * 276 * xgetbv is used for detecting this: Any compliant operating system will define 277 * a set of flags in the xcr0 register indicating how it saves the AVX registers. 278 * 279 * You can manually disable this flag on Windows by running, as admin: 280 * 281 * bcdedit.exe /set xsavedisable 1 282 * 283 * and rebooting. Run the same command with 0 to re-enable it. 284 */ 285 static xxh_u64 XXH_xgetbv(void) 286 { 287 #if defined(_MSC_VER) 288 return _xgetbv(0); /* min VS2010 SP1 compiler is required */ 289 #else 290 xxh_u32 xcr0_lo, xcr0_hi; 291 __asm__( 292 "# Call XGETBV\n\t" 293 "#\n\t" 294 "# Older assemblers (e.g. macOS's ancient GAS version) don't support\n\t" 295 "# the XGETBV opcode, so we encode it by hand instead.\n\t" 296 "# See <https://github.com/asmjit/asmjit/issues/78> for details.\n\t" 297 ".byte 0x0f, 0x01, 0xd0\n\t" 298 : "=a" (xcr0_lo), "=d" (xcr0_hi) : "c" (0)); 299 return xcr0_lo | ((xxh_u64)xcr0_hi << 32); 300 #endif 301 } 302 #endif 303 304 #define XXH_SSE2_CPUID_MASK (1 << 26) 305 #define XXH_OSXSAVE_CPUID_MASK ((1 << 26) | (1 << 27)) 306 #define XXH_AVX2_CPUID_MASK (1 << 5) 307 #define XXH_AVX2_XGETBV_MASK ((1 << 2) | (1 << 1)) 308 #define XXH_AVX512F_CPUID_MASK (1 << 16) 309 #define XXH_AVX512F_XGETBV_MASK ((7 << 5) | (1 << 2) | (1 << 1)) 310 311 /*! 312 * @internal 313 * @brief Returns the best XXH3 implementation. 314 * 315 * Runs various CPUID/XGETBV tests to try and determine the best implementation. 316 * 317 * @ret The best @ref XXH_VECTOR implementation. 318 * @see XXH_VECTOR_TYPES 319 */ 320 static int XXH_featureTest(void) 321 { 322 xxh_u32 abcd[4]; 323 xxh_u32 max_leaves; 324 int best = XXH_SCALAR; 325 #if XXH_DISPATCH_AVX2 || XXH_DISPATCH_AVX512 326 xxh_u64 xgetbv_val; 327 #endif 328 #if defined(__GNUC__) && defined(__i386__) 329 xxh_u32 cpuid_supported; 330 __asm__( 331 "# For the sake of ruthless backwards compatibility, check if CPUID\n\t" 332 "# is supported in the EFLAGS on i386.\n\t" 333 "# This is not necessary on x86_64 - CPUID is mandatory.\n\t" 334 "# The ID flag (bit 21) in the EFLAGS register indicates support\n\t" 335 "# for the CPUID instruction. If a software procedure can set and\n\t" 336 "# clear this flag, the processor executing the procedure supports\n\t" 337 "# the CPUID instruction.\n\t" 338 "# <https://c9x.me/x86/html/file_module_x86_id_45.html>\n\t" 339 "#\n\t" 340 "# Routine is from <https://wiki.osdev.org/CPUID>.\n\t" 341 342 "# Save EFLAGS\n\t" 343 XXH_I_ATT("pushfd", "pushfl" ) 344 "# Store EFLAGS\n\t" 345 XXH_I_ATT("pushfd", "pushfl" ) 346 "# Invert the ID bit in stored EFLAGS\n\t" 347 XXH_I_ATT("xor dword ptr[esp], 0x200000", "xorl $0x200000, (%%esp)") 348 "# Load stored EFLAGS (with ID bit inverted)\n\t" 349 XXH_I_ATT("popfd", "popfl" ) 350 "# Store EFLAGS again (ID bit may or not be inverted)\n\t" 351 XXH_I_ATT("pushfd", "pushfl" ) 352 "# eax = modified EFLAGS (ID bit may or may not be inverted)\n\t" 353 XXH_I_ATT("pop eax", "popl %%eax" ) 354 "# eax = whichever bits were changed\n\t" 355 XXH_I_ATT("xor eax, dword ptr[esp]", "xorl (%%esp), %%eax" ) 356 "# Restore original EFLAGS\n\t" 357 XXH_I_ATT("popfd", "popfl" ) 358 "# eax = zero if ID bit can't be changed, else non-zero\n\t" 359 XXH_I_ATT("and eax, 0x200000", "andl $0x200000, %%eax" ) 360 : "=a" (cpuid_supported) :: "cc"); 361 362 if (XXH_unlikely(!cpuid_supported)) { 363 XXH_debugPrint("CPUID support is not detected!"); 364 return best; 365 } 366 367 #endif 368 /* Check how many CPUID pages we have */ 369 XXH_cpuid(0, 0, abcd); 370 max_leaves = abcd[0]; 371 372 /* Shouldn't happen on hardware, but happens on some QEMU configs. */ 373 if (XXH_unlikely(max_leaves == 0)) { 374 XXH_debugPrint("Max CPUID leaves == 0!"); 375 return best; 376 } 377 378 /* Check for SSE2, OSXSAVE and xgetbv */ 379 XXH_cpuid(1, 0, abcd); 380 381 /* 382 * Test for SSE2. The check is redundant on x86_64, but it doesn't hurt. 383 */ 384 if (XXH_unlikely((abcd[3] & XXH_SSE2_CPUID_MASK) != XXH_SSE2_CPUID_MASK)) 385 return best; 386 387 XXH_debugPrint("SSE2 support detected."); 388 389 best = XXH_SSE2; 390 #if XXH_DISPATCH_AVX2 || XXH_DISPATCH_AVX512 391 /* Make sure we have enough leaves */ 392 if (XXH_unlikely(max_leaves < 7)) 393 return best; 394 395 /* Test for OSXSAVE and XGETBV */ 396 if ((abcd[2] & XXH_OSXSAVE_CPUID_MASK) != XXH_OSXSAVE_CPUID_MASK) 397 return best; 398 399 /* CPUID check for AVX features */ 400 XXH_cpuid(7, 0, abcd); 401 402 xgetbv_val = XXH_xgetbv(); 403 #if XXH_DISPATCH_AVX2 404 /* Validate that AVX2 is supported by the CPU */ 405 if ((abcd[1] & XXH_AVX2_CPUID_MASK) != XXH_AVX2_CPUID_MASK) 406 return best; 407 408 /* Validate that the OS supports YMM registers */ 409 if ((xgetbv_val & XXH_AVX2_XGETBV_MASK) != XXH_AVX2_XGETBV_MASK) { 410 XXH_debugPrint("AVX2 supported by the CPU, but not the OS."); 411 return best; 412 } 413 414 /* AVX2 supported */ 415 XXH_debugPrint("AVX2 support detected."); 416 best = XXH_AVX2; 417 #endif 418 #if XXH_DISPATCH_AVX512 419 /* Check if AVX512F is supported by the CPU */ 420 if ((abcd[1] & XXH_AVX512F_CPUID_MASK) != XXH_AVX512F_CPUID_MASK) { 421 XXH_debugPrint("AVX512F not supported by CPU"); 422 return best; 423 } 424 425 /* Validate that the OS supports ZMM registers */ 426 if ((xgetbv_val & XXH_AVX512F_XGETBV_MASK) != XXH_AVX512F_XGETBV_MASK) { 427 XXH_debugPrint("AVX512F supported by the CPU, but not the OS."); 428 return best; 429 } 430 431 /* AVX512F supported */ 432 XXH_debugPrint("AVX512F support detected."); 433 best = XXH_AVX512; 434 #endif 435 #endif 436 return best; 437 } 438 439 440 /* === Vector implementations === */ 441 442 /*! 443 * @internal 444 * @brief Defines the various dispatch functions. 445 * 446 * TODO: Consolidate? 447 * 448 * @param suffix The suffix for the functions, e.g. sse2 or scalar 449 * @param target XXH_TARGET_* or empty. 450 */ 451 #define XXH_DEFINE_DISPATCH_FUNCS(suffix, target) \ 452 \ 453 /* === XXH3, default variants === */ \ 454 \ 455 XXH_NO_INLINE target XXH64_hash_t \ 456 XXHL64_default_##suffix(const void* XXH_RESTRICT input, size_t len) \ 457 { \ 458 return XXH3_hashLong_64b_internal( \ 459 input, len, XXH3_kSecret, sizeof(XXH3_kSecret), \ 460 XXH3_accumulate_512_##suffix, XXH3_scrambleAcc_##suffix \ 461 ); \ 462 } \ 463 \ 464 /* === XXH3, Seeded variants === */ \ 465 \ 466 XXH_NO_INLINE target XXH64_hash_t \ 467 XXHL64_seed_##suffix(const void* XXH_RESTRICT input, size_t len, \ 468 XXH64_hash_t seed) \ 469 { \ 470 return XXH3_hashLong_64b_withSeed_internal( \ 471 input, len, seed, XXH3_accumulate_512_##suffix, \ 472 XXH3_scrambleAcc_##suffix, XXH3_initCustomSecret_##suffix \ 473 ); \ 474 } \ 475 \ 476 /* === XXH3, Secret variants === */ \ 477 \ 478 XXH_NO_INLINE target XXH64_hash_t \ 479 XXHL64_secret_##suffix(const void* XXH_RESTRICT input, size_t len, \ 480 const void* secret, size_t secretLen) \ 481 { \ 482 return XXH3_hashLong_64b_internal( \ 483 input, len, secret, secretLen, \ 484 XXH3_accumulate_512_##suffix, XXH3_scrambleAcc_##suffix \ 485 ); \ 486 } \ 487 \ 488 /* === XXH3 update variants === */ \ 489 \ 490 XXH_NO_INLINE target XXH_errorcode \ 491 XXH3_update_##suffix(XXH3_state_t* state, const void* input, size_t len) \ 492 { \ 493 return XXH3_update(state, (const xxh_u8*)input, len, \ 494 XXH3_accumulate_512_##suffix, XXH3_scrambleAcc_##suffix); \ 495 } \ 496 \ 497 /* === XXH128 default variants === */ \ 498 \ 499 XXH_NO_INLINE target XXH128_hash_t \ 500 XXHL128_default_##suffix(const void* XXH_RESTRICT input, size_t len) \ 501 { \ 502 return XXH3_hashLong_128b_internal( \ 503 input, len, XXH3_kSecret, sizeof(XXH3_kSecret), \ 504 XXH3_accumulate_512_##suffix, XXH3_scrambleAcc_##suffix \ 505 ); \ 506 } \ 507 \ 508 /* === XXH128 Secret variants === */ \ 509 \ 510 XXH_NO_INLINE target XXH128_hash_t \ 511 XXHL128_secret_##suffix(const void* XXH_RESTRICT input, size_t len, \ 512 const void* XXH_RESTRICT secret, size_t secretLen) \ 513 { \ 514 return XXH3_hashLong_128b_internal( \ 515 input, len, (const xxh_u8*)secret, secretLen, \ 516 XXH3_accumulate_512_##suffix, XXH3_scrambleAcc_##suffix); \ 517 } \ 518 \ 519 /* === XXH128 Seeded variants === */ \ 520 \ 521 XXH_NO_INLINE target XXH128_hash_t \ 522 XXHL128_seed_##suffix(const void* XXH_RESTRICT input, size_t len, \ 523 XXH64_hash_t seed) \ 524 { \ 525 return XXH3_hashLong_128b_withSeed_internal(input, len, seed, \ 526 XXH3_accumulate_512_##suffix, XXH3_scrambleAcc_##suffix, \ 527 XXH3_initCustomSecret_##suffix); \ 528 } 529 530 /* End XXH_DEFINE_DISPATCH_FUNCS */ 531 532 #if XXH_DISPATCH_SCALAR 533 XXH_DEFINE_DISPATCH_FUNCS(scalar, /* nothing */) 534 #endif 535 XXH_DEFINE_DISPATCH_FUNCS(sse2, XXH_TARGET_SSE2) 536 #if XXH_DISPATCH_AVX2 537 XXH_DEFINE_DISPATCH_FUNCS(avx2, XXH_TARGET_AVX2) 538 #endif 539 #if XXH_DISPATCH_AVX512 540 XXH_DEFINE_DISPATCH_FUNCS(avx512, XXH_TARGET_AVX512) 541 #endif 542 #undef XXH_DEFINE_DISPATCH_FUNCS 543 544 /* ==== Dispatchers ==== */ 545 546 typedef XXH64_hash_t (*XXH3_dispatchx86_hashLong64_default)(const void* XXH_RESTRICT, size_t); 547 548 typedef XXH64_hash_t (*XXH3_dispatchx86_hashLong64_withSeed)(const void* XXH_RESTRICT, size_t, XXH64_hash_t); 549 550 typedef XXH64_hash_t (*XXH3_dispatchx86_hashLong64_withSecret)(const void* XXH_RESTRICT, size_t, const void* XXH_RESTRICT, size_t); 551 552 typedef XXH_errorcode (*XXH3_dispatchx86_update)(XXH3_state_t*, const void*, size_t); 553 554 typedef struct { 555 XXH3_dispatchx86_hashLong64_default hashLong64_default; 556 XXH3_dispatchx86_hashLong64_withSeed hashLong64_seed; 557 XXH3_dispatchx86_hashLong64_withSecret hashLong64_secret; 558 XXH3_dispatchx86_update update; 559 } XXH_dispatchFunctions_s; 560 561 #define XXH_NB_DISPATCHES 4 562 563 /*! 564 * @internal 565 * @brief Table of dispatchers for @ref XXH3_64bits(). 566 * 567 * @pre The indices must match @ref XXH_VECTOR_TYPE. 568 */ 569 static const XXH_dispatchFunctions_s XXH_kDispatch[XXH_NB_DISPATCHES] = { 570 #if XXH_DISPATCH_SCALAR 571 /* Scalar */ { XXHL64_default_scalar, XXHL64_seed_scalar, XXHL64_secret_scalar, XXH3_update_scalar }, 572 #else 573 /* Scalar */ { NULL, NULL, NULL, NULL }, 574 #endif 575 /* SSE2 */ { XXHL64_default_sse2, XXHL64_seed_sse2, XXHL64_secret_sse2, XXH3_update_sse2 }, 576 #if XXH_DISPATCH_AVX2 577 /* AVX2 */ { XXHL64_default_avx2, XXHL64_seed_avx2, XXHL64_secret_avx2, XXH3_update_avx2 }, 578 #else 579 /* AVX2 */ { NULL, NULL, NULL, NULL }, 580 #endif 581 #if XXH_DISPATCH_AVX512 582 /* AVX512 */ { XXHL64_default_avx512, XXHL64_seed_avx512, XXHL64_secret_avx512, XXH3_update_avx512 } 583 #else 584 /* AVX512 */ { NULL, NULL, NULL, NULL } 585 #endif 586 }; 587 /*! 588 * @internal 589 * @brief The selected dispatch table for @ref XXH3_64bits(). 590 */ 591 static XXH_dispatchFunctions_s XXH_g_dispatch = { NULL, NULL, NULL, NULL }; 592 593 594 typedef XXH128_hash_t (*XXH3_dispatchx86_hashLong128_default)(const void* XXH_RESTRICT, size_t); 595 596 typedef XXH128_hash_t (*XXH3_dispatchx86_hashLong128_withSeed)(const void* XXH_RESTRICT, size_t, XXH64_hash_t); 597 598 typedef XXH128_hash_t (*XXH3_dispatchx86_hashLong128_withSecret)(const void* XXH_RESTRICT, size_t, const void* XXH_RESTRICT, size_t); 599 600 typedef struct { 601 XXH3_dispatchx86_hashLong128_default hashLong128_default; 602 XXH3_dispatchx86_hashLong128_withSeed hashLong128_seed; 603 XXH3_dispatchx86_hashLong128_withSecret hashLong128_secret; 604 XXH3_dispatchx86_update update; 605 } XXH_dispatch128Functions_s; 606 607 608 /*! 609 * @internal 610 * @brief Table of dispatchers for @ref XXH3_128bits(). 611 * 612 * @pre The indices must match @ref XXH_VECTOR_TYPE. 613 */ 614 static const XXH_dispatch128Functions_s XXH_kDispatch128[XXH_NB_DISPATCHES] = { 615 #if XXH_DISPATCH_SCALAR 616 /* Scalar */ { XXHL128_default_scalar, XXHL128_seed_scalar, XXHL128_secret_scalar, XXH3_update_scalar }, 617 #else 618 /* Scalar */ { NULL, NULL, NULL, NULL }, 619 #endif 620 /* SSE2 */ { XXHL128_default_sse2, XXHL128_seed_sse2, XXHL128_secret_sse2, XXH3_update_sse2 }, 621 #if XXH_DISPATCH_AVX2 622 /* AVX2 */ { XXHL128_default_avx2, XXHL128_seed_avx2, XXHL128_secret_avx2, XXH3_update_avx2 }, 623 #else 624 /* AVX2 */ { NULL, NULL, NULL, NULL }, 625 #endif 626 #if XXH_DISPATCH_AVX512 627 /* AVX512 */ { XXHL128_default_avx512, XXHL128_seed_avx512, XXHL128_secret_avx512, XXH3_update_avx512 } 628 #else 629 /* AVX512 */ { NULL, NULL, NULL, NULL } 630 #endif 631 }; 632 633 /*! 634 * @internal 635 * @brief The selected dispatch table for @ref XXH3_64bits(). 636 */ 637 static XXH_dispatch128Functions_s XXH_g_dispatch128 = { NULL, NULL, NULL, NULL }; 638 639 /*! 640 * @internal 641 * @brief Runs a CPUID check and sets the correct dispatch tables. 642 */ 643 static void XXH_setDispatch(void) 644 { 645 int vecID = XXH_featureTest(); 646 XXH_STATIC_ASSERT(XXH_AVX512 == XXH_NB_DISPATCHES-1); 647 assert(XXH_SCALAR <= vecID && vecID <= XXH_AVX512); 648 #if !XXH_DISPATCH_SCALAR 649 assert(vecID != XXH_SCALAR); 650 #endif 651 #if !XXH_DISPATCH_AVX512 652 assert(vecID != XXH_AVX512); 653 #endif 654 #if !XXH_DISPATCH_AVX2 655 assert(vecID != XXH_AVX2); 656 #endif 657 XXH_g_dispatch = XXH_kDispatch[vecID]; 658 XXH_g_dispatch128 = XXH_kDispatch128[vecID]; 659 } 660 661 662 /* ==== XXH3 public functions ==== */ 663 664 static XXH64_hash_t 665 XXH3_hashLong_64b_defaultSecret_selection(const void* input, size_t len, 666 XXH64_hash_t seed64, const xxh_u8* secret, size_t secretLen) 667 { 668 (void)seed64; (void)secret; (void)secretLen; 669 if (XXH_g_dispatch.hashLong64_default == NULL) XXH_setDispatch(); 670 return XXH_g_dispatch.hashLong64_default(input, len); 671 } 672 673 XXH64_hash_t XXH3_64bits_dispatch(const void* input, size_t len) 674 { 675 return XXH3_64bits_internal(input, len, 0, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_hashLong_64b_defaultSecret_selection); 676 } 677 678 static XXH64_hash_t 679 XXH3_hashLong_64b_withSeed_selection(const void* input, size_t len, 680 XXH64_hash_t seed64, const xxh_u8* secret, size_t secretLen) 681 { 682 (void)secret; (void)secretLen; 683 if (XXH_g_dispatch.hashLong64_seed == NULL) XXH_setDispatch(); 684 return XXH_g_dispatch.hashLong64_seed(input, len, seed64); 685 } 686 687 XXH64_hash_t XXH3_64bits_withSeed_dispatch(const void* input, size_t len, XXH64_hash_t seed) 688 { 689 return XXH3_64bits_internal(input, len, seed, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_hashLong_64b_withSeed_selection); 690 } 691 692 static XXH64_hash_t 693 XXH3_hashLong_64b_withSecret_selection(const void* input, size_t len, 694 XXH64_hash_t seed64, const xxh_u8* secret, size_t secretLen) 695 { 696 (void)seed64; 697 if (XXH_g_dispatch.hashLong64_secret == NULL) XXH_setDispatch(); 698 return XXH_g_dispatch.hashLong64_secret(input, len, secret, secretLen); 699 } 700 701 XXH64_hash_t XXH3_64bits_withSecret_dispatch(const void* input, size_t len, const void* secret, size_t secretLen) 702 { 703 return XXH3_64bits_internal(input, len, 0, secret, secretLen, XXH3_hashLong_64b_withSecret_selection); 704 } 705 706 XXH_errorcode 707 XXH3_64bits_update_dispatch(XXH3_state_t* state, const void* input, size_t len) 708 { 709 if (XXH_g_dispatch.update == NULL) XXH_setDispatch(); 710 return XXH_g_dispatch.update(state, (const xxh_u8*)input, len); 711 } 712 713 714 /* ==== XXH128 public functions ==== */ 715 716 static XXH128_hash_t 717 XXH3_hashLong_128b_defaultSecret_selection(const void* input, size_t len, 718 XXH64_hash_t seed64, const void* secret, size_t secretLen) 719 { 720 (void)seed64; (void)secret; (void)secretLen; 721 if (XXH_g_dispatch128.hashLong128_default == NULL) XXH_setDispatch(); 722 return XXH_g_dispatch128.hashLong128_default(input, len); 723 } 724 725 XXH128_hash_t XXH3_128bits_dispatch(const void* input, size_t len) 726 { 727 return XXH3_128bits_internal(input, len, 0, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_hashLong_128b_defaultSecret_selection); 728 } 729 730 static XXH128_hash_t 731 XXH3_hashLong_128b_withSeed_selection(const void* input, size_t len, 732 XXH64_hash_t seed64, const void* secret, size_t secretLen) 733 { 734 (void)secret; (void)secretLen; 735 if (XXH_g_dispatch128.hashLong128_seed == NULL) XXH_setDispatch(); 736 return XXH_g_dispatch128.hashLong128_seed(input, len, seed64); 737 } 738 739 XXH128_hash_t XXH3_128bits_withSeed_dispatch(const void* input, size_t len, XXH64_hash_t seed) 740 { 741 return XXH3_128bits_internal(input, len, seed, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_hashLong_128b_withSeed_selection); 742 } 743 744 static XXH128_hash_t 745 XXH3_hashLong_128b_withSecret_selection(const void* input, size_t len, 746 XXH64_hash_t seed64, const void* secret, size_t secretLen) 747 { 748 (void)seed64; 749 if (XXH_g_dispatch128.hashLong128_secret == NULL) XXH_setDispatch(); 750 return XXH_g_dispatch128.hashLong128_secret(input, len, secret, secretLen); 751 } 752 753 XXH128_hash_t XXH3_128bits_withSecret_dispatch(const void* input, size_t len, const void* secret, size_t secretLen) 754 { 755 return XXH3_128bits_internal(input, len, 0, secret, secretLen, XXH3_hashLong_128b_withSecret_selection); 756 } 757 758 XXH_errorcode 759 XXH3_128bits_update_dispatch(XXH3_state_t* state, const void* input, size_t len) 760 { 761 if (XXH_g_dispatch128.update == NULL) XXH_setDispatch(); 762 return XXH_g_dispatch128.update(state, (const xxh_u8*)input, len); 763 } 764 765 #endif // defined(__x86_64__) || defined(__i386__) || defined(_M_IX86) || defined(_M_X64) 766 767 #if defined (__cplusplus) 768 } 769 #endif 770 /*! @} */