libjxl

FORK: libjxl patches used on blog
git clone https://git.neptards.moe/blog/libjxl.git
Log | Files | Refs | Submodules | README | LICENSE

xorshift128plus-inl.h (3059B)


      1 // Copyright (c) the JPEG XL Project Authors. All rights reserved.
      2 //
      3 // Use of this source code is governed by a BSD-style
      4 // license that can be found in the LICENSE file.
      5 
      6 // Fast but weak random generator.
      7 
      8 #if defined(LIB_JXL_XORSHIFT128PLUS_INL_H_) == defined(HWY_TARGET_TOGGLE)
      9 #ifdef LIB_JXL_XORSHIFT128PLUS_INL_H_
     10 #undef LIB_JXL_XORSHIFT128PLUS_INL_H_
     11 #else
     12 #define LIB_JXL_XORSHIFT128PLUS_INL_H_
     13 #endif
     14 
     15 #include <stddef.h>
     16 
     17 #include <hwy/highway.h>
     18 HWY_BEFORE_NAMESPACE();
     19 namespace jxl {
     20 namespace HWY_NAMESPACE {
     21 namespace {
     22 
     23 // These templates are not found via ADL.
     24 using hwy::HWY_NAMESPACE::Add;
     25 using hwy::HWY_NAMESPACE::ShiftLeft;
     26 using hwy::HWY_NAMESPACE::ShiftRight;
     27 using hwy::HWY_NAMESPACE::Xor;
     28 
     29 // Adapted from https://github.com/vpxyz/xorshift/blob/master/xorshift128plus/
     30 // (MIT-license)
     31 class Xorshift128Plus {
     32  public:
     33   // 8 independent generators (= single iteration for AVX-512)
     34   enum { N = 8 };
     35 
     36   explicit HWY_MAYBE_UNUSED Xorshift128Plus(const uint64_t seed) {
     37     // Init state using SplitMix64 generator
     38     s0_[0] = SplitMix64(seed + 0x9E3779B97F4A7C15ull);
     39     s1_[0] = SplitMix64(s0_[0]);
     40     for (size_t i = 1; i < N; ++i) {
     41       s0_[i] = SplitMix64(s1_[i - 1]);
     42       s1_[i] = SplitMix64(s0_[i]);
     43     }
     44   }
     45 
     46   HWY_MAYBE_UNUSED Xorshift128Plus(const uint32_t seed1, const uint32_t seed2,
     47                                    const uint32_t seed3, const uint32_t seed4) {
     48     // Init state using SplitMix64 generator
     49     s0_[0] = SplitMix64(((static_cast<uint64_t>(seed1) << 32) + seed2) +
     50                         0x9E3779B97F4A7C15ull);
     51     s1_[0] = SplitMix64(((static_cast<uint64_t>(seed3) << 32) + seed4) +
     52                         0x9E3779B97F4A7C15ull);
     53     for (size_t i = 1; i < N; ++i) {
     54       s0_[i] = SplitMix64(s0_[i - 1]);
     55       s1_[i] = SplitMix64(s1_[i - 1]);
     56     }
     57   }
     58 
     59   HWY_INLINE HWY_MAYBE_UNUSED void Fill(uint64_t* HWY_RESTRICT random_bits) {
     60 #if HWY_CAP_INTEGER64
     61     const HWY_FULL(uint64_t) d;
     62     for (size_t i = 0; i < N; i += Lanes(d)) {
     63       auto s1 = Load(d, s0_ + i);
     64       const auto s0 = Load(d, s1_ + i);
     65       const auto bits = Add(s1, s0);  // b, c
     66       Store(s0, d, s0_ + i);
     67       s1 = Xor(s1, ShiftLeft<23>(s1));
     68       Store(bits, d, random_bits + i);
     69       s1 = Xor(s1, Xor(s0, Xor(ShiftRight<18>(s1), ShiftRight<5>(s0))));
     70       Store(s1, d, s1_ + i);
     71     }
     72 #else
     73     for (size_t i = 0; i < N; ++i) {
     74       auto s1 = s0_[i];
     75       const auto s0 = s1_[i];
     76       const auto bits = s1 + s0;  // b, c
     77       s0_[i] = s0;
     78       s1 ^= s1 << 23;
     79       random_bits[i] = bits;
     80       s1 ^= s0 ^ (s1 >> 18) ^ (s0 >> 5);
     81       s1_[i] = s1;
     82     }
     83 #endif
     84   }
     85 
     86  private:
     87   static uint64_t SplitMix64(uint64_t z) {
     88     z = (z ^ (z >> 30)) * 0xBF58476D1CE4E5B9ull;
     89     z = (z ^ (z >> 27)) * 0x94D049BB133111EBull;
     90     return z ^ (z >> 31);
     91   }
     92 
     93   HWY_ALIGN uint64_t s0_[N];
     94   HWY_ALIGN uint64_t s1_[N];
     95 };
     96 
     97 }  // namespace
     98 // NOLINTNEXTLINE(google-readability-namespace-comments)
     99 }  // namespace HWY_NAMESPACE
    100 }  // namespace jxl
    101 HWY_AFTER_NAMESPACE();
    102 
    103 #endif  // LIB_JXL_XORSHIFT128PLUS_INL_H_