fast_dct_test.cc (14767B)
1 // Copyright (c) the JPEG XL Project Authors. All rights reserved. 2 // 3 // Use of this source code is governed by a BSD-style 4 // license that can be found in the LICENSE file. 5 6 #include <numeric> 7 8 #undef HWY_TARGET_INCLUDE 9 #define HWY_TARGET_INCLUDE "lib/jxl/fast_dct_test.cc" 10 #include <hwy/foreach_target.h> 11 12 #include "lib/jxl/base/random.h" 13 #include "lib/jxl/dct-inl.h" 14 #include "lib/jxl/fast_dct-inl.h" 15 #include "lib/jxl/testing.h" 16 #include "lib/jxl/transpose-inl.h" 17 18 // Test utils 19 #include <hwy/highway.h> 20 #include <hwy/tests/hwy_gtest.h> 21 HWY_BEFORE_NAMESPACE(); 22 namespace jxl { 23 24 namespace HWY_NAMESPACE { 25 namespace { 26 27 void BenchmarkFloatIDCT32x32() { TestFloatIDCT<32, 32>(); } 28 void BenchmarkFastIDCT32x32() { TestFastIDCT<32, 32>(); } 29 30 template <size_t N, size_t M> 31 HWY_NOINLINE void TestFastTranspose() { 32 #if HWY_TARGET == HWY_NEON 33 auto array_mem = hwy::AllocateAligned<int16_t>(N * M); 34 int16_t* array = array_mem.get(); 35 auto transposed_mem = hwy::AllocateAligned<int16_t>(N * M); 36 int16_t* transposed = transposed_mem.get(); 37 std::iota(array, array + N * M, 0); 38 for (size_t j = 0; j < 100000000 / (N * M); j++) { 39 FastTransposeBlock(array, M, N, M, transposed, N); 40 } 41 for (size_t i = 0; i < M; i++) { 42 for (size_t j = 0; j < N; j++) { 43 EXPECT_EQ(array[j * M + i], transposed[i * N + j]); 44 } 45 } 46 #endif 47 } 48 49 template <size_t N, size_t M> 50 HWY_NOINLINE void TestFloatTranspose() { 51 auto array_mem = hwy::AllocateAligned<float>(N * M); 52 float* array = array_mem.get(); 53 auto transposed_mem = hwy::AllocateAligned<float>(N * M); 54 float* transposed = transposed_mem.get(); 55 std::iota(array, array + N * M, 0); 56 for (size_t j = 0; j < 100000000 / (N * M); j++) { 57 Transpose<N, M>::Run(DCTFrom(array, M), DCTTo(transposed, N)); 58 } 59 for (size_t i = 0; i < M; i++) { 60 for (size_t j = 0; j < N; j++) { 61 EXPECT_EQ(array[j * M + i], transposed[i * N + j]); 62 } 63 } 64 } 65 66 // TODO(sboukortt): re-enable the FloatIDCT tests once we find out why they fail 67 // in ASAN mode in the CI runners and seemingly not locally. 68 69 HWY_NOINLINE void TestFastTranspose8x8() { TestFastTranspose<8, 8>(); } 70 HWY_NOINLINE void TestFloatTranspose8x8() { TestFloatTranspose<8, 8>(); } 71 HWY_NOINLINE void TestFastIDCT8x8() { TestFastIDCT<8, 8>(); } 72 HWY_NOINLINE void TestFloatIDCT8x8() { 73 #if HWY_TARGET == HWY_SCALAR && \ 74 (defined(ADDRESS_SANITIZER) || defined(MEMORY_SANITIZER)) 75 GTEST_SKIP(); 76 #else 77 TestFloatIDCT<8, 8>(); 78 #endif 79 } 80 HWY_NOINLINE void TestFastTranspose8x16() { TestFastTranspose<8, 16>(); } 81 HWY_NOINLINE void TestFloatTranspose8x16() { TestFloatTranspose<8, 16>(); } 82 HWY_NOINLINE void TestFastIDCT8x16() { TestFastIDCT<8, 16>(); } 83 HWY_NOINLINE void TestFloatIDCT8x16() { 84 #if HWY_TARGET == HWY_SCALAR && \ 85 (defined(ADDRESS_SANITIZER) || defined(MEMORY_SANITIZER)) 86 GTEST_SKIP(); 87 #else 88 TestFloatIDCT<8, 16>(); 89 #endif 90 } 91 HWY_NOINLINE void TestFastTranspose8x32() { TestFastTranspose<8, 32>(); } 92 HWY_NOINLINE void TestFloatTranspose8x32() { TestFloatTranspose<8, 32>(); } 93 HWY_NOINLINE void TestFastIDCT8x32() { TestFastIDCT<8, 32>(); } 94 HWY_NOINLINE void TestFloatIDCT8x32() { 95 #if HWY_TARGET == HWY_SCALAR && \ 96 (defined(ADDRESS_SANITIZER) || defined(MEMORY_SANITIZER)) 97 GTEST_SKIP(); 98 #else 99 TestFloatIDCT<8, 32>(); 100 #endif 101 } 102 HWY_NOINLINE void TestFastTranspose16x8() { TestFastTranspose<16, 8>(); } 103 HWY_NOINLINE void TestFloatTranspose16x8() { TestFloatTranspose<16, 8>(); } 104 HWY_NOINLINE void TestFastIDCT16x8() { TestFastIDCT<16, 8>(); } 105 HWY_NOINLINE void TestFloatIDCT16x8() { 106 #if HWY_TARGET == HWY_SCALAR && \ 107 (defined(ADDRESS_SANITIZER) || defined(MEMORY_SANITIZER)) 108 GTEST_SKIP(); 109 #else 110 TestFloatIDCT<16, 8>(); 111 #endif 112 } 113 HWY_NOINLINE void TestFastTranspose16x16() { TestFastTranspose<16, 16>(); } 114 HWY_NOINLINE void TestFloatTranspose16x16() { TestFloatTranspose<16, 16>(); } 115 HWY_NOINLINE void TestFastIDCT16x16() { TestFastIDCT<16, 16>(); } 116 HWY_NOINLINE void TestFloatIDCT16x16() { 117 #if HWY_TARGET == HWY_SCALAR && \ 118 (defined(ADDRESS_SANITIZER) || defined(MEMORY_SANITIZER)) 119 GTEST_SKIP(); 120 #else 121 TestFloatIDCT<16, 16>(); 122 #endif 123 } 124 HWY_NOINLINE void TestFastTranspose16x32() { TestFastTranspose<16, 32>(); } 125 HWY_NOINLINE void TestFloatTranspose16x32() { TestFloatTranspose<16, 32>(); } 126 HWY_NOINLINE void TestFastIDCT16x32() { TestFastIDCT<16, 32>(); } 127 HWY_NOINLINE void TestFloatIDCT16x32() { 128 #if HWY_TARGET == HWY_SCALAR && \ 129 (defined(ADDRESS_SANITIZER) || defined(MEMORY_SANITIZER)) 130 GTEST_SKIP(); 131 #else 132 TestFloatIDCT<16, 32>(); 133 #endif 134 } 135 HWY_NOINLINE void TestFastTranspose32x8() { TestFastTranspose<32, 8>(); } 136 HWY_NOINLINE void TestFloatTranspose32x8() { TestFloatTranspose<32, 8>(); } 137 HWY_NOINLINE void TestFastIDCT32x8() { TestFastIDCT<32, 8>(); } 138 HWY_NOINLINE void TestFloatIDCT32x8() { 139 #if HWY_TARGET == HWY_SCALAR && \ 140 (defined(ADDRESS_SANITIZER) || defined(MEMORY_SANITIZER)) 141 GTEST_SKIP(); 142 #else 143 TestFloatIDCT<32, 8>(); 144 #endif 145 } 146 HWY_NOINLINE void TestFastTranspose32x16() { TestFastTranspose<32, 16>(); } 147 HWY_NOINLINE void TestFloatTranspose32x16() { TestFloatTranspose<32, 16>(); } 148 HWY_NOINLINE void TestFastIDCT32x16() { TestFastIDCT<32, 16>(); } 149 HWY_NOINLINE void TestFloatIDCT32x16() { 150 #if HWY_TARGET == HWY_SCALAR && \ 151 (defined(ADDRESS_SANITIZER) || defined(MEMORY_SANITIZER)) 152 GTEST_SKIP(); 153 #else 154 TestFloatIDCT<32, 16>(); 155 #endif 156 } 157 HWY_NOINLINE void TestFastTranspose32x32() { TestFastTranspose<32, 32>(); } 158 HWY_NOINLINE void TestFloatTranspose32x32() { TestFloatTranspose<32, 32>(); } 159 HWY_NOINLINE void TestFastIDCT32x32() { TestFastIDCT<32, 32>(); } 160 HWY_NOINLINE void TestFloatIDCT32x32() { 161 #if HWY_TARGET == HWY_SCALAR && \ 162 (defined(ADDRESS_SANITIZER) || defined(MEMORY_SANITIZER)) 163 GTEST_SKIP(); 164 #else 165 TestFloatIDCT<32, 32>(); 166 #endif 167 } 168 HWY_NOINLINE void TestFastTranspose32x64() { TestFastTranspose<32, 64>(); } 169 HWY_NOINLINE void TestFloatTranspose32x64() { TestFloatTranspose<32, 64>(); } 170 HWY_NOINLINE void TestFastIDCT32x64() { TestFastIDCT<32, 64>(); } 171 HWY_NOINLINE void TestFloatIDCT32x64() { 172 #if HWY_TARGET == HWY_SCALAR && \ 173 (defined(ADDRESS_SANITIZER) || defined(MEMORY_SANITIZER)) 174 GTEST_SKIP(); 175 #else 176 TestFloatIDCT<32, 64>(); 177 #endif 178 } 179 HWY_NOINLINE void TestFastTranspose64x32() { TestFastTranspose<64, 32>(); } 180 HWY_NOINLINE void TestFloatTranspose64x32() { TestFloatTranspose<64, 32>(); } 181 HWY_NOINLINE void TestFastIDCT64x32() { TestFastIDCT<64, 32>(); } 182 HWY_NOINLINE void TestFloatIDCT64x32() { 183 #if HWY_TARGET == HWY_SCALAR && \ 184 (defined(ADDRESS_SANITIZER) || defined(MEMORY_SANITIZER)) 185 GTEST_SKIP(); 186 #else 187 TestFloatIDCT<64, 32>(); 188 #endif 189 } 190 HWY_NOINLINE void TestFastTranspose64x64() { TestFastTranspose<64, 64>(); } 191 HWY_NOINLINE void TestFloatTranspose64x64() { TestFloatTranspose<64, 64>(); } 192 HWY_NOINLINE void TestFastIDCT64x64() { TestFastIDCT<64, 64>(); } 193 HWY_NOINLINE void TestFloatIDCT64x64() { 194 #if HWY_TARGET == HWY_SCALAR && \ 195 (defined(ADDRESS_SANITIZER) || defined(MEMORY_SANITIZER)) 196 GTEST_SKIP(); 197 #else 198 TestFloatIDCT<64, 64>(); 199 #endif 200 } 201 HWY_NOINLINE void TestFastTranspose64x128() { TestFastTranspose<64, 128>(); } 202 HWY_NOINLINE void TestFloatTranspose64x128() { TestFloatTranspose<64, 128>(); } 203 /* 204 HWY_NOINLINE void TestFastIDCT64x128() { TestFastIDCT<64, 128>(); } 205 HWY_NOINLINE void TestFloatIDCT64x128() { 206 #if HWY_TARGET == HWY_SCALAR && \ 207 (defined(ADDRESS_SANITIZER) || defined(MEMORY_SANITIZER)) 208 GTEST_SKIP(); 209 #else 210 TestFloatIDCT<64, 128>(); 211 #endif 212 } 213 */ 214 HWY_NOINLINE void TestFastTranspose128x64() { TestFastTranspose<128, 64>(); } 215 HWY_NOINLINE void TestFloatTranspose128x64() { TestFloatTranspose<128, 64>(); } 216 /* 217 HWY_NOINLINE void TestFastIDCT128x64() { TestFastIDCT<128, 64>(); } 218 HWY_NOINLINE void TestFloatIDCT128x64() { 219 #if HWY_TARGET == HWY_SCALAR && \ 220 (defined(ADDRESS_SANITIZER) || defined(MEMORY_SANITIZER)) 221 GTEST_SKIP(); 222 #else 223 TestFloatIDCT<128, 64>(); 224 #endif 225 } 226 */ 227 HWY_NOINLINE void TestFastTranspose128x128() { TestFastTranspose<128, 128>(); } 228 HWY_NOINLINE void TestFloatTranspose128x128() { 229 TestFloatTranspose<128, 128>(); 230 } 231 /* 232 HWY_NOINLINE void TestFastIDCT128x128() { TestFastIDCT<128, 128>(); } 233 HWY_NOINLINE void TestFloatIDCT128x128() { 234 #if HWY_TARGET == HWY_SCALAR && \ 235 (defined(ADDRESS_SANITIZER) || defined(MEMORY_SANITIZER)) 236 GTEST_SKIP(); 237 #else 238 TestFloatIDCT<128, 128>(); 239 #endif 240 } 241 */ 242 HWY_NOINLINE void TestFastTranspose128x256() { TestFastTranspose<128, 256>(); } 243 HWY_NOINLINE void TestFloatTranspose128x256() { 244 TestFloatTranspose<128, 256>(); 245 } 246 /* 247 HWY_NOINLINE void TestFastIDCT128x256() { TestFastIDCT<128, 256>(); } 248 HWY_NOINLINE void TestFloatIDCT128x256() { 249 #if HWY_TARGET == HWY_SCALAR && \ 250 (defined(ADDRESS_SANITIZER) || defined(MEMORY_SANITIZER)) 251 GTEST_SKIP(); 252 #else 253 TestFloatIDCT<128, 256>(); 254 #endif 255 } 256 */ 257 HWY_NOINLINE void TestFastTranspose256x128() { TestFastTranspose<256, 128>(); } 258 HWY_NOINLINE void TestFloatTranspose256x128() { 259 TestFloatTranspose<256, 128>(); 260 } 261 /* 262 HWY_NOINLINE void TestFastIDCT256x128() { TestFastIDCT<256, 128>(); } 263 HWY_NOINLINE void TestFloatIDCT256x128() { 264 #if HWY_TARGET == HWY_SCALAR && \ 265 (defined(ADDRESS_SANITIZER) || defined(MEMORY_SANITIZER)) 266 GTEST_SKIP(); 267 #else 268 TestFloatIDCT<256, 128>(); 269 #endif 270 } 271 */ 272 HWY_NOINLINE void TestFastTranspose256x256() { TestFastTranspose<256, 256>(); } 273 HWY_NOINLINE void TestFloatTranspose256x256() { 274 TestFloatTranspose<256, 256>(); 275 } 276 /* 277 HWY_NOINLINE void TestFastIDCT256x256() { TestFastIDCT<256, 256>(); } 278 HWY_NOINLINE void TestFloatIDCT256x256() { 279 #if HWY_TARGET == HWY_SCALAR && \ 280 (defined(ADDRESS_SANITIZER) || defined(MEMORY_SANITIZER)) 281 GTEST_SKIP(); 282 #else 283 TestFloatIDCT<256, 256>(); 284 #endif 285 } 286 */ 287 288 } // namespace 289 // NOLINTNEXTLINE(google-readability-namespace-comments) 290 } // namespace HWY_NAMESPACE 291 } // namespace jxl 292 HWY_AFTER_NAMESPACE(); 293 294 #if HWY_ONCE 295 namespace jxl { 296 297 class FastDCTTargetTest : public hwy::TestWithParamTarget {}; 298 HWY_TARGET_INSTANTIATE_TEST_SUITE_P(FastDCTTargetTest); 299 300 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatTranspose8x8); 301 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastTranspose8x8); 302 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatTranspose8x16); 303 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastTranspose8x16); 304 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatTranspose8x32); 305 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastTranspose8x32); 306 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatTranspose16x8); 307 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastTranspose16x8); 308 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatTranspose16x16); 309 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastTranspose16x16); 310 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatTranspose16x32); 311 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastTranspose16x32); 312 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatTranspose32x8); 313 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastTranspose32x8); 314 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatTranspose32x16); 315 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastTranspose32x16); 316 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatTranspose32x32); 317 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastTranspose32x32); 318 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatTranspose32x64); 319 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastTranspose32x64); 320 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatTranspose64x32); 321 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastTranspose64x32); 322 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatTranspose64x64); 323 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastTranspose64x64); 324 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatTranspose64x128); 325 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastTranspose64x128); 326 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatTranspose128x64); 327 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastTranspose128x64); 328 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatTranspose128x128); 329 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastTranspose128x128); 330 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatTranspose128x256); 331 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastTranspose128x256); 332 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatTranspose256x128); 333 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastTranspose256x128); 334 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatTranspose256x256); 335 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastTranspose256x256); 336 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatIDCT8x8); 337 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastIDCT8x8); 338 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatIDCT8x16); 339 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastIDCT8x16); 340 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatIDCT8x32); 341 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastIDCT8x32); 342 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatIDCT16x8); 343 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastIDCT16x8); 344 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatIDCT16x16); 345 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastIDCT16x16); 346 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatIDCT16x32); 347 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastIDCT16x32); 348 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatIDCT32x8); 349 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastIDCT32x8); 350 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatIDCT32x16); 351 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastIDCT32x16); 352 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatIDCT32x32); 353 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastIDCT32x32); 354 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatIDCT32x64); 355 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastIDCT32x64); 356 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatIDCT64x32); 357 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastIDCT64x32); 358 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatIDCT64x64); 359 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastIDCT64x64); 360 /* 361 * DCT-128 and above have very large errors just by rounding inputs. 362 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatIDCT64x128); 363 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastIDCT64x128); 364 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatIDCT128x64); 365 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastIDCT128x64); 366 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatIDCT128x128); 367 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastIDCT128x128); 368 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatIDCT128x256); 369 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastIDCT128x256); 370 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatIDCT256x128); 371 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastIDCT256x128); 372 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatIDCT256x256); 373 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastIDCT256x256); 374 */ 375 376 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, BenchmarkFloatIDCT32x32); 377 HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, BenchmarkFastIDCT32x32); 378 379 } // namespace jxl 380 #endif // HWY_ONCE