libjxl

FORK: libjxl patches used on blog
git clone https://git.neptards.moe/blog/libjxl.git
Log | Files | Refs | Submodules | README | LICENSE

fast_dct_test.cc (14767B)


      1 // Copyright (c) the JPEG XL Project Authors. All rights reserved.
      2 //
      3 // Use of this source code is governed by a BSD-style
      4 // license that can be found in the LICENSE file.
      5 
      6 #include <numeric>
      7 
      8 #undef HWY_TARGET_INCLUDE
      9 #define HWY_TARGET_INCLUDE "lib/jxl/fast_dct_test.cc"
     10 #include <hwy/foreach_target.h>
     11 
     12 #include "lib/jxl/base/random.h"
     13 #include "lib/jxl/dct-inl.h"
     14 #include "lib/jxl/fast_dct-inl.h"
     15 #include "lib/jxl/testing.h"
     16 #include "lib/jxl/transpose-inl.h"
     17 
     18 // Test utils
     19 #include <hwy/highway.h>
     20 #include <hwy/tests/hwy_gtest.h>
     21 HWY_BEFORE_NAMESPACE();
     22 namespace jxl {
     23 
     24 namespace HWY_NAMESPACE {
     25 namespace {
     26 
     27 void BenchmarkFloatIDCT32x32() { TestFloatIDCT<32, 32>(); }
     28 void BenchmarkFastIDCT32x32() { TestFastIDCT<32, 32>(); }
     29 
     30 template <size_t N, size_t M>
     31 HWY_NOINLINE void TestFastTranspose() {
     32 #if HWY_TARGET == HWY_NEON
     33   auto array_mem = hwy::AllocateAligned<int16_t>(N * M);
     34   int16_t* array = array_mem.get();
     35   auto transposed_mem = hwy::AllocateAligned<int16_t>(N * M);
     36   int16_t* transposed = transposed_mem.get();
     37   std::iota(array, array + N * M, 0);
     38   for (size_t j = 0; j < 100000000 / (N * M); j++) {
     39     FastTransposeBlock(array, M, N, M, transposed, N);
     40   }
     41   for (size_t i = 0; i < M; i++) {
     42     for (size_t j = 0; j < N; j++) {
     43       EXPECT_EQ(array[j * M + i], transposed[i * N + j]);
     44     }
     45   }
     46 #endif
     47 }
     48 
     49 template <size_t N, size_t M>
     50 HWY_NOINLINE void TestFloatTranspose() {
     51   auto array_mem = hwy::AllocateAligned<float>(N * M);
     52   float* array = array_mem.get();
     53   auto transposed_mem = hwy::AllocateAligned<float>(N * M);
     54   float* transposed = transposed_mem.get();
     55   std::iota(array, array + N * M, 0);
     56   for (size_t j = 0; j < 100000000 / (N * M); j++) {
     57     Transpose<N, M>::Run(DCTFrom(array, M), DCTTo(transposed, N));
     58   }
     59   for (size_t i = 0; i < M; i++) {
     60     for (size_t j = 0; j < N; j++) {
     61       EXPECT_EQ(array[j * M + i], transposed[i * N + j]);
     62     }
     63   }
     64 }
     65 
     66 // TODO(sboukortt): re-enable the FloatIDCT tests once we find out why they fail
     67 // in ASAN mode in the CI runners and seemingly not locally.
     68 
     69 HWY_NOINLINE void TestFastTranspose8x8() { TestFastTranspose<8, 8>(); }
     70 HWY_NOINLINE void TestFloatTranspose8x8() { TestFloatTranspose<8, 8>(); }
     71 HWY_NOINLINE void TestFastIDCT8x8() { TestFastIDCT<8, 8>(); }
     72 HWY_NOINLINE void TestFloatIDCT8x8() {
     73 #if HWY_TARGET == HWY_SCALAR && \
     74     (defined(ADDRESS_SANITIZER) || defined(MEMORY_SANITIZER))
     75   GTEST_SKIP();
     76 #else
     77   TestFloatIDCT<8, 8>();
     78 #endif
     79 }
     80 HWY_NOINLINE void TestFastTranspose8x16() { TestFastTranspose<8, 16>(); }
     81 HWY_NOINLINE void TestFloatTranspose8x16() { TestFloatTranspose<8, 16>(); }
     82 HWY_NOINLINE void TestFastIDCT8x16() { TestFastIDCT<8, 16>(); }
     83 HWY_NOINLINE void TestFloatIDCT8x16() {
     84 #if HWY_TARGET == HWY_SCALAR && \
     85     (defined(ADDRESS_SANITIZER) || defined(MEMORY_SANITIZER))
     86   GTEST_SKIP();
     87 #else
     88   TestFloatIDCT<8, 16>();
     89 #endif
     90 }
     91 HWY_NOINLINE void TestFastTranspose8x32() { TestFastTranspose<8, 32>(); }
     92 HWY_NOINLINE void TestFloatTranspose8x32() { TestFloatTranspose<8, 32>(); }
     93 HWY_NOINLINE void TestFastIDCT8x32() { TestFastIDCT<8, 32>(); }
     94 HWY_NOINLINE void TestFloatIDCT8x32() {
     95 #if HWY_TARGET == HWY_SCALAR && \
     96     (defined(ADDRESS_SANITIZER) || defined(MEMORY_SANITIZER))
     97   GTEST_SKIP();
     98 #else
     99   TestFloatIDCT<8, 32>();
    100 #endif
    101 }
    102 HWY_NOINLINE void TestFastTranspose16x8() { TestFastTranspose<16, 8>(); }
    103 HWY_NOINLINE void TestFloatTranspose16x8() { TestFloatTranspose<16, 8>(); }
    104 HWY_NOINLINE void TestFastIDCT16x8() { TestFastIDCT<16, 8>(); }
    105 HWY_NOINLINE void TestFloatIDCT16x8() {
    106 #if HWY_TARGET == HWY_SCALAR && \
    107     (defined(ADDRESS_SANITIZER) || defined(MEMORY_SANITIZER))
    108   GTEST_SKIP();
    109 #else
    110   TestFloatIDCT<16, 8>();
    111 #endif
    112 }
    113 HWY_NOINLINE void TestFastTranspose16x16() { TestFastTranspose<16, 16>(); }
    114 HWY_NOINLINE void TestFloatTranspose16x16() { TestFloatTranspose<16, 16>(); }
    115 HWY_NOINLINE void TestFastIDCT16x16() { TestFastIDCT<16, 16>(); }
    116 HWY_NOINLINE void TestFloatIDCT16x16() {
    117 #if HWY_TARGET == HWY_SCALAR && \
    118     (defined(ADDRESS_SANITIZER) || defined(MEMORY_SANITIZER))
    119   GTEST_SKIP();
    120 #else
    121   TestFloatIDCT<16, 16>();
    122 #endif
    123 }
    124 HWY_NOINLINE void TestFastTranspose16x32() { TestFastTranspose<16, 32>(); }
    125 HWY_NOINLINE void TestFloatTranspose16x32() { TestFloatTranspose<16, 32>(); }
    126 HWY_NOINLINE void TestFastIDCT16x32() { TestFastIDCT<16, 32>(); }
    127 HWY_NOINLINE void TestFloatIDCT16x32() {
    128 #if HWY_TARGET == HWY_SCALAR && \
    129     (defined(ADDRESS_SANITIZER) || defined(MEMORY_SANITIZER))
    130   GTEST_SKIP();
    131 #else
    132   TestFloatIDCT<16, 32>();
    133 #endif
    134 }
    135 HWY_NOINLINE void TestFastTranspose32x8() { TestFastTranspose<32, 8>(); }
    136 HWY_NOINLINE void TestFloatTranspose32x8() { TestFloatTranspose<32, 8>(); }
    137 HWY_NOINLINE void TestFastIDCT32x8() { TestFastIDCT<32, 8>(); }
    138 HWY_NOINLINE void TestFloatIDCT32x8() {
    139 #if HWY_TARGET == HWY_SCALAR && \
    140     (defined(ADDRESS_SANITIZER) || defined(MEMORY_SANITIZER))
    141   GTEST_SKIP();
    142 #else
    143   TestFloatIDCT<32, 8>();
    144 #endif
    145 }
    146 HWY_NOINLINE void TestFastTranspose32x16() { TestFastTranspose<32, 16>(); }
    147 HWY_NOINLINE void TestFloatTranspose32x16() { TestFloatTranspose<32, 16>(); }
    148 HWY_NOINLINE void TestFastIDCT32x16() { TestFastIDCT<32, 16>(); }
    149 HWY_NOINLINE void TestFloatIDCT32x16() {
    150 #if HWY_TARGET == HWY_SCALAR && \
    151     (defined(ADDRESS_SANITIZER) || defined(MEMORY_SANITIZER))
    152   GTEST_SKIP();
    153 #else
    154   TestFloatIDCT<32, 16>();
    155 #endif
    156 }
    157 HWY_NOINLINE void TestFastTranspose32x32() { TestFastTranspose<32, 32>(); }
    158 HWY_NOINLINE void TestFloatTranspose32x32() { TestFloatTranspose<32, 32>(); }
    159 HWY_NOINLINE void TestFastIDCT32x32() { TestFastIDCT<32, 32>(); }
    160 HWY_NOINLINE void TestFloatIDCT32x32() {
    161 #if HWY_TARGET == HWY_SCALAR && \
    162     (defined(ADDRESS_SANITIZER) || defined(MEMORY_SANITIZER))
    163   GTEST_SKIP();
    164 #else
    165   TestFloatIDCT<32, 32>();
    166 #endif
    167 }
    168 HWY_NOINLINE void TestFastTranspose32x64() { TestFastTranspose<32, 64>(); }
    169 HWY_NOINLINE void TestFloatTranspose32x64() { TestFloatTranspose<32, 64>(); }
    170 HWY_NOINLINE void TestFastIDCT32x64() { TestFastIDCT<32, 64>(); }
    171 HWY_NOINLINE void TestFloatIDCT32x64() {
    172 #if HWY_TARGET == HWY_SCALAR && \
    173     (defined(ADDRESS_SANITIZER) || defined(MEMORY_SANITIZER))
    174   GTEST_SKIP();
    175 #else
    176   TestFloatIDCT<32, 64>();
    177 #endif
    178 }
    179 HWY_NOINLINE void TestFastTranspose64x32() { TestFastTranspose<64, 32>(); }
    180 HWY_NOINLINE void TestFloatTranspose64x32() { TestFloatTranspose<64, 32>(); }
    181 HWY_NOINLINE void TestFastIDCT64x32() { TestFastIDCT<64, 32>(); }
    182 HWY_NOINLINE void TestFloatIDCT64x32() {
    183 #if HWY_TARGET == HWY_SCALAR && \
    184     (defined(ADDRESS_SANITIZER) || defined(MEMORY_SANITIZER))
    185   GTEST_SKIP();
    186 #else
    187   TestFloatIDCT<64, 32>();
    188 #endif
    189 }
    190 HWY_NOINLINE void TestFastTranspose64x64() { TestFastTranspose<64, 64>(); }
    191 HWY_NOINLINE void TestFloatTranspose64x64() { TestFloatTranspose<64, 64>(); }
    192 HWY_NOINLINE void TestFastIDCT64x64() { TestFastIDCT<64, 64>(); }
    193 HWY_NOINLINE void TestFloatIDCT64x64() {
    194 #if HWY_TARGET == HWY_SCALAR && \
    195     (defined(ADDRESS_SANITIZER) || defined(MEMORY_SANITIZER))
    196   GTEST_SKIP();
    197 #else
    198   TestFloatIDCT<64, 64>();
    199 #endif
    200 }
    201 HWY_NOINLINE void TestFastTranspose64x128() { TestFastTranspose<64, 128>(); }
    202 HWY_NOINLINE void TestFloatTranspose64x128() { TestFloatTranspose<64, 128>(); }
    203 /*
    204 HWY_NOINLINE void TestFastIDCT64x128() { TestFastIDCT<64, 128>(); }
    205 HWY_NOINLINE void TestFloatIDCT64x128() {
    206 #if HWY_TARGET == HWY_SCALAR && \
    207     (defined(ADDRESS_SANITIZER) || defined(MEMORY_SANITIZER))
    208   GTEST_SKIP();
    209 #else
    210   TestFloatIDCT<64, 128>();
    211 #endif
    212 }
    213 */
    214 HWY_NOINLINE void TestFastTranspose128x64() { TestFastTranspose<128, 64>(); }
    215 HWY_NOINLINE void TestFloatTranspose128x64() { TestFloatTranspose<128, 64>(); }
    216 /*
    217 HWY_NOINLINE void TestFastIDCT128x64() { TestFastIDCT<128, 64>(); }
    218 HWY_NOINLINE void TestFloatIDCT128x64() {
    219 #if HWY_TARGET == HWY_SCALAR && \
    220     (defined(ADDRESS_SANITIZER) || defined(MEMORY_SANITIZER))
    221   GTEST_SKIP();
    222 #else
    223   TestFloatIDCT<128, 64>();
    224 #endif
    225 }
    226 */
    227 HWY_NOINLINE void TestFastTranspose128x128() { TestFastTranspose<128, 128>(); }
    228 HWY_NOINLINE void TestFloatTranspose128x128() {
    229   TestFloatTranspose<128, 128>();
    230 }
    231 /*
    232 HWY_NOINLINE void TestFastIDCT128x128() { TestFastIDCT<128, 128>(); }
    233 HWY_NOINLINE void TestFloatIDCT128x128() {
    234 #if HWY_TARGET == HWY_SCALAR && \
    235     (defined(ADDRESS_SANITIZER) || defined(MEMORY_SANITIZER))
    236   GTEST_SKIP();
    237 #else
    238   TestFloatIDCT<128, 128>();
    239 #endif
    240 }
    241 */
    242 HWY_NOINLINE void TestFastTranspose128x256() { TestFastTranspose<128, 256>(); }
    243 HWY_NOINLINE void TestFloatTranspose128x256() {
    244   TestFloatTranspose<128, 256>();
    245 }
    246 /*
    247 HWY_NOINLINE void TestFastIDCT128x256() { TestFastIDCT<128, 256>(); }
    248 HWY_NOINLINE void TestFloatIDCT128x256() {
    249 #if HWY_TARGET == HWY_SCALAR && \
    250     (defined(ADDRESS_SANITIZER) || defined(MEMORY_SANITIZER))
    251   GTEST_SKIP();
    252 #else
    253   TestFloatIDCT<128, 256>();
    254 #endif
    255 }
    256 */
    257 HWY_NOINLINE void TestFastTranspose256x128() { TestFastTranspose<256, 128>(); }
    258 HWY_NOINLINE void TestFloatTranspose256x128() {
    259   TestFloatTranspose<256, 128>();
    260 }
    261 /*
    262 HWY_NOINLINE void TestFastIDCT256x128() { TestFastIDCT<256, 128>(); }
    263 HWY_NOINLINE void TestFloatIDCT256x128() {
    264 #if HWY_TARGET == HWY_SCALAR && \
    265     (defined(ADDRESS_SANITIZER) || defined(MEMORY_SANITIZER))
    266   GTEST_SKIP();
    267 #else
    268   TestFloatIDCT<256, 128>();
    269 #endif
    270 }
    271 */
    272 HWY_NOINLINE void TestFastTranspose256x256() { TestFastTranspose<256, 256>(); }
    273 HWY_NOINLINE void TestFloatTranspose256x256() {
    274   TestFloatTranspose<256, 256>();
    275 }
    276 /*
    277 HWY_NOINLINE void TestFastIDCT256x256() { TestFastIDCT<256, 256>(); }
    278 HWY_NOINLINE void TestFloatIDCT256x256() {
    279 #if HWY_TARGET == HWY_SCALAR && \
    280     (defined(ADDRESS_SANITIZER) || defined(MEMORY_SANITIZER))
    281   GTEST_SKIP();
    282 #else
    283   TestFloatIDCT<256, 256>();
    284 #endif
    285 }
    286 */
    287 
    288 }  // namespace
    289 // NOLINTNEXTLINE(google-readability-namespace-comments)
    290 }  // namespace HWY_NAMESPACE
    291 }  // namespace jxl
    292 HWY_AFTER_NAMESPACE();
    293 
    294 #if HWY_ONCE
    295 namespace jxl {
    296 
    297 class FastDCTTargetTest : public hwy::TestWithParamTarget {};
    298 HWY_TARGET_INSTANTIATE_TEST_SUITE_P(FastDCTTargetTest);
    299 
    300 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatTranspose8x8);
    301 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastTranspose8x8);
    302 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatTranspose8x16);
    303 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastTranspose8x16);
    304 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatTranspose8x32);
    305 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastTranspose8x32);
    306 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatTranspose16x8);
    307 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastTranspose16x8);
    308 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatTranspose16x16);
    309 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastTranspose16x16);
    310 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatTranspose16x32);
    311 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastTranspose16x32);
    312 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatTranspose32x8);
    313 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastTranspose32x8);
    314 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatTranspose32x16);
    315 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastTranspose32x16);
    316 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatTranspose32x32);
    317 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastTranspose32x32);
    318 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatTranspose32x64);
    319 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastTranspose32x64);
    320 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatTranspose64x32);
    321 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastTranspose64x32);
    322 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatTranspose64x64);
    323 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastTranspose64x64);
    324 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatTranspose64x128);
    325 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastTranspose64x128);
    326 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatTranspose128x64);
    327 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastTranspose128x64);
    328 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatTranspose128x128);
    329 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastTranspose128x128);
    330 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatTranspose128x256);
    331 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastTranspose128x256);
    332 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatTranspose256x128);
    333 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastTranspose256x128);
    334 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatTranspose256x256);
    335 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastTranspose256x256);
    336 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatIDCT8x8);
    337 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastIDCT8x8);
    338 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatIDCT8x16);
    339 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastIDCT8x16);
    340 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatIDCT8x32);
    341 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastIDCT8x32);
    342 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatIDCT16x8);
    343 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastIDCT16x8);
    344 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatIDCT16x16);
    345 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastIDCT16x16);
    346 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatIDCT16x32);
    347 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastIDCT16x32);
    348 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatIDCT32x8);
    349 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastIDCT32x8);
    350 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatIDCT32x16);
    351 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastIDCT32x16);
    352 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatIDCT32x32);
    353 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastIDCT32x32);
    354 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatIDCT32x64);
    355 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastIDCT32x64);
    356 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatIDCT64x32);
    357 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastIDCT64x32);
    358 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatIDCT64x64);
    359 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastIDCT64x64);
    360 /*
    361  * DCT-128 and above have very large errors just by rounding inputs.
    362 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatIDCT64x128);
    363 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastIDCT64x128);
    364 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatIDCT128x64);
    365 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastIDCT128x64);
    366 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatIDCT128x128);
    367 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastIDCT128x128);
    368 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatIDCT128x256);
    369 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastIDCT128x256);
    370 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatIDCT256x128);
    371 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastIDCT256x128);
    372 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatIDCT256x256);
    373 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastIDCT256x256);
    374 */
    375 
    376 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, BenchmarkFloatIDCT32x32);
    377 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, BenchmarkFastIDCT32x32);
    378 
    379 }  // namespace jxl
    380 #endif  // HWY_ONCE