xorshift128plus-inl.h (3059B)
1 // Copyright (c) the JPEG XL Project Authors. All rights reserved. 2 // 3 // Use of this source code is governed by a BSD-style 4 // license that can be found in the LICENSE file. 5 6 // Fast but weak random generator. 7 8 #if defined(LIB_JXL_XORSHIFT128PLUS_INL_H_) == defined(HWY_TARGET_TOGGLE) 9 #ifdef LIB_JXL_XORSHIFT128PLUS_INL_H_ 10 #undef LIB_JXL_XORSHIFT128PLUS_INL_H_ 11 #else 12 #define LIB_JXL_XORSHIFT128PLUS_INL_H_ 13 #endif 14 15 #include <stddef.h> 16 17 #include <hwy/highway.h> 18 HWY_BEFORE_NAMESPACE(); 19 namespace jxl { 20 namespace HWY_NAMESPACE { 21 namespace { 22 23 // These templates are not found via ADL. 24 using hwy::HWY_NAMESPACE::Add; 25 using hwy::HWY_NAMESPACE::ShiftLeft; 26 using hwy::HWY_NAMESPACE::ShiftRight; 27 using hwy::HWY_NAMESPACE::Xor; 28 29 // Adapted from https://github.com/vpxyz/xorshift/blob/master/xorshift128plus/ 30 // (MIT-license) 31 class Xorshift128Plus { 32 public: 33 // 8 independent generators (= single iteration for AVX-512) 34 enum { N = 8 }; 35 36 explicit HWY_MAYBE_UNUSED Xorshift128Plus(const uint64_t seed) { 37 // Init state using SplitMix64 generator 38 s0_[0] = SplitMix64(seed + 0x9E3779B97F4A7C15ull); 39 s1_[0] = SplitMix64(s0_[0]); 40 for (size_t i = 1; i < N; ++i) { 41 s0_[i] = SplitMix64(s1_[i - 1]); 42 s1_[i] = SplitMix64(s0_[i]); 43 } 44 } 45 46 HWY_MAYBE_UNUSED Xorshift128Plus(const uint32_t seed1, const uint32_t seed2, 47 const uint32_t seed3, const uint32_t seed4) { 48 // Init state using SplitMix64 generator 49 s0_[0] = SplitMix64(((static_cast<uint64_t>(seed1) << 32) + seed2) + 50 0x9E3779B97F4A7C15ull); 51 s1_[0] = SplitMix64(((static_cast<uint64_t>(seed3) << 32) + seed4) + 52 0x9E3779B97F4A7C15ull); 53 for (size_t i = 1; i < N; ++i) { 54 s0_[i] = SplitMix64(s0_[i - 1]); 55 s1_[i] = SplitMix64(s1_[i - 1]); 56 } 57 } 58 59 HWY_INLINE HWY_MAYBE_UNUSED void Fill(uint64_t* HWY_RESTRICT random_bits) { 60 #if HWY_CAP_INTEGER64 61 const HWY_FULL(uint64_t) d; 62 for (size_t i = 0; i < N; i += Lanes(d)) { 63 auto s1 = Load(d, s0_ + i); 64 const auto s0 = Load(d, s1_ + i); 65 const auto bits = Add(s1, s0); // b, c 66 Store(s0, d, s0_ + i); 67 s1 = Xor(s1, ShiftLeft<23>(s1)); 68 Store(bits, d, random_bits + i); 69 s1 = Xor(s1, Xor(s0, Xor(ShiftRight<18>(s1), ShiftRight<5>(s0)))); 70 Store(s1, d, s1_ + i); 71 } 72 #else 73 for (size_t i = 0; i < N; ++i) { 74 auto s1 = s0_[i]; 75 const auto s0 = s1_[i]; 76 const auto bits = s1 + s0; // b, c 77 s0_[i] = s0; 78 s1 ^= s1 << 23; 79 random_bits[i] = bits; 80 s1 ^= s0 ^ (s1 >> 18) ^ (s0 >> 5); 81 s1_[i] = s1; 82 } 83 #endif 84 } 85 86 private: 87 static uint64_t SplitMix64(uint64_t z) { 88 z = (z ^ (z >> 30)) * 0xBF58476D1CE4E5B9ull; 89 z = (z ^ (z >> 27)) * 0x94D049BB133111EBull; 90 return z ^ (z >> 31); 91 } 92 93 HWY_ALIGN uint64_t s0_[N]; 94 HWY_ALIGN uint64_t s1_[N]; 95 }; 96 97 } // namespace 98 // NOLINTNEXTLINE(google-readability-namespace-comments) 99 } // namespace HWY_NAMESPACE 100 } // namespace jxl 101 HWY_AFTER_NAMESPACE(); 102 103 #endif // LIB_JXL_XORSHIFT128PLUS_INL_H_