dct_test.cc (11575B)
1 // Copyright (c) the JPEG XL Project Authors. All rights reserved. 2 // 3 // Use of this source code is governed by a BSD-style 4 // license that can be found in the LICENSE file. 5 6 #include <string.h> 7 8 #include <cmath> 9 #include <numeric> 10 11 #undef HWY_TARGET_INCLUDE 12 #define HWY_TARGET_INCLUDE "lib/jxl/dct_test.cc" 13 #include <hwy/foreach_target.h> 14 #include <hwy/highway.h> 15 #include <hwy/tests/hwy_gtest.h> 16 17 #include "lib/jxl/dct-inl.h" 18 #include "lib/jxl/dct_for_test.h" 19 #include "lib/jxl/dct_scales.h" 20 #include "lib/jxl/image.h" 21 #include "lib/jxl/test_utils.h" 22 #include "lib/jxl/testing.h" 23 24 HWY_BEFORE_NAMESPACE(); 25 namespace jxl { 26 namespace HWY_NAMESPACE { 27 28 // Computes the in-place NxN DCT of block. 29 // Requires that block is HWY_ALIGN'ed. 30 // 31 // Performs ComputeTransposedScaledDCT and then transposes and scales it to 32 // obtain "vanilla" DCT. 33 template <size_t N> 34 void ComputeDCT(float block[N * N]) { 35 HWY_ALIGN float tmp_block[N * N]; 36 HWY_ALIGN float scratch_space[4 * N * N]; 37 ComputeScaledDCT<N, N>()(DCTFrom(block, N), tmp_block, scratch_space); 38 39 // Untranspose. 40 Transpose<N, N>::Run(DCTFrom(tmp_block, N), DCTTo(block, N)); 41 } 42 43 // Computes the in-place 8x8 iDCT of block. 44 // Requires that block is HWY_ALIGN'ed. 45 template <int N> 46 void ComputeIDCT(float block[N * N]) { 47 HWY_ALIGN float tmp_block[N * N]; 48 HWY_ALIGN float scratch_space[4 * N * N]; 49 // Untranspose. 50 Transpose<N, N>::Run(DCTFrom(block, N), DCTTo(tmp_block, N)); 51 52 ComputeScaledIDCT<N, N>()(tmp_block, DCTTo(block, N), scratch_space); 53 } 54 55 template <size_t N> 56 void TransposeTestT(float accuracy) { 57 constexpr size_t kBlockSize = N * N; 58 HWY_ALIGN float src[kBlockSize]; 59 DCTTo to_src(src, N); 60 for (size_t y = 0; y < N; ++y) { 61 for (size_t x = 0; x < N; ++x) { 62 to_src.Write(y * N + x, y, x); 63 } 64 } 65 HWY_ALIGN float dst[kBlockSize]; 66 Transpose<N, N>::Run(DCTFrom(src, N), DCTTo(dst, N)); 67 DCTFrom from_dst(dst, N); 68 for (size_t y = 0; y < N; ++y) { 69 for (size_t x = 0; x < N; ++x) { 70 float expected = x * N + y; 71 float actual = from_dst.Read(y, x); 72 EXPECT_NEAR(expected, actual, accuracy) << "x = " << x << ", y = " << y; 73 } 74 } 75 } 76 77 void TransposeTest() { 78 TransposeTestT<8>(1e-7f); 79 TransposeTestT<16>(1e-7f); 80 TransposeTestT<32>(1e-7f); 81 } 82 83 template <size_t N> 84 void ColumnDctRoundtripT(float accuracy) { 85 constexpr size_t kBlockSize = N * N; 86 // Though we are only interested in single column result, dct.h has built-in 87 // limit on minimal number of columns processed. So, to be safe, we do 88 // regular 8x8 block transformation. On the bright side - we could check all 89 // 8 basis vectors at once. 90 HWY_ALIGN float block[kBlockSize]; 91 HWY_ALIGN float scratch[3 * kBlockSize]; 92 DCTTo to(block, N); 93 DCTFrom from(block, N); 94 for (size_t i = 0; i < N; ++i) { 95 for (size_t j = 0; j < N; ++j) { 96 to.Write((i == j) ? 1.0f : 0.0f, i, j); 97 } 98 } 99 100 // Running (I)DCT on the same memory block seems to trigger a compiler bug on 101 // ARMv7 with clang6. 102 HWY_ALIGN float tmp[kBlockSize]; 103 DCTTo to_tmp(tmp, N); 104 DCTFrom from_tmp(tmp, N); 105 106 DCT1D<N, N>()(from, to_tmp, scratch); 107 IDCT1D<N, N>()(from_tmp, to, scratch); 108 109 for (size_t i = 0; i < N; ++i) { 110 for (size_t j = 0; j < N; ++j) { 111 float expected = (i == j) ? 1.0f : 0.0f; 112 float actual = from.Read(i, j); 113 EXPECT_NEAR(expected, actual, accuracy) << " i=" << i << ", j=" << j; 114 } 115 } 116 } 117 118 void ColumnDctRoundtrip() { 119 ColumnDctRoundtripT<8>(1e-6f); 120 ColumnDctRoundtripT<16>(1e-6f); 121 ColumnDctRoundtripT<32>(1e-6f); 122 } 123 124 template <size_t N> 125 void TestDctAccuracy(float accuracy, size_t start = 0, size_t end = N * N) { 126 constexpr size_t kBlockSize = N * N; 127 for (size_t i = start; i < end; i++) { 128 HWY_ALIGN float fast[kBlockSize] = {0.0f}; 129 double slow[kBlockSize] = {0.0}; 130 fast[i] = 1.0; 131 slow[i] = 1.0; 132 DCTSlow<N>(slow); 133 ComputeDCT<N>(fast); 134 for (size_t k = 0; k < kBlockSize; ++k) { 135 EXPECT_NEAR(fast[k], slow[k], accuracy / N) 136 << "i = " << i << ", k = " << k << ", N = " << N; 137 } 138 } 139 } 140 141 template <size_t N> 142 void TestIdctAccuracy(float accuracy, size_t start = 0, size_t end = N * N) { 143 constexpr size_t kBlockSize = N * N; 144 for (size_t i = start; i < end; i++) { 145 HWY_ALIGN float fast[kBlockSize] = {0.0f}; 146 double slow[kBlockSize] = {0.0}; 147 fast[i] = 1.0; 148 slow[i] = 1.0; 149 IDCTSlow<N>(slow); 150 ComputeIDCT<N>(fast); 151 for (size_t k = 0; k < kBlockSize; ++k) { 152 EXPECT_NEAR(fast[k], slow[k], accuracy * N) 153 << "i = " << i << ", k = " << k << ", N = " << N; 154 } 155 } 156 } 157 158 template <size_t N> 159 void TestInverseT(float accuracy) { 160 test::ThreadPoolForTests pool(N < 32 ? 0 : 8); 161 enum { kBlockSize = N * N }; 162 EXPECT_TRUE(RunOnPool( 163 &pool, 0, kBlockSize, ThreadPool::NoInit, 164 [accuracy](const uint32_t task, size_t /*thread*/) { 165 const size_t i = static_cast<size_t>(task); 166 HWY_ALIGN float x[kBlockSize] = {0.0f}; 167 x[i] = 1.0; 168 169 ComputeIDCT<N>(x); 170 ComputeDCT<N>(x); 171 172 for (size_t k = 0; k < kBlockSize; ++k) { 173 EXPECT_NEAR(x[k], (k == i) ? 1.0f : 0.0f, accuracy) 174 << "i = " << i << ", k = " << k; 175 } 176 }, 177 "TestInverse")); 178 } 179 180 void InverseTest() { 181 TestInverseT<8>(1e-6f); 182 TestInverseT<16>(1e-6f); 183 TestInverseT<32>(3e-6f); 184 } 185 186 template <size_t N> 187 void TestDctTranspose(float accuracy, size_t start = 0, size_t end = N * N) { 188 constexpr size_t kBlockSize = N * N; 189 for (size_t i = start; i < end; i++) { 190 for (size_t j = 0; j < kBlockSize; ++j) { 191 // We check that <e_i, Me_j> = <M^\dagger{}e_i, e_j>. 192 // That means (Me_j)_i = (M^\dagger{}e_i)_j 193 194 // x := Me_j 195 HWY_ALIGN float x[kBlockSize] = {0.0f}; 196 x[j] = 1.0; 197 ComputeIDCT<N>(x); 198 // y := M^\dagger{}e_i 199 HWY_ALIGN float y[kBlockSize] = {0.0f}; 200 y[i] = 1.0; 201 ComputeDCT<N>(y); 202 203 EXPECT_NEAR(x[i] / N, y[j] * N, accuracy) << "i = " << i << ", j = " << j; 204 } 205 } 206 } 207 208 template <size_t N> 209 void TestSlowInverse(float accuracy, size_t start = 0, size_t end = N * N) { 210 constexpr size_t kBlockSize = N * N; 211 for (size_t i = start; i < end; i++) { 212 double x[kBlockSize] = {0.0f}; 213 x[i] = 1.0; 214 215 DCTSlow<N>(x); 216 IDCTSlow<N>(x); 217 218 for (size_t k = 0; k < kBlockSize; ++k) { 219 EXPECT_NEAR(x[k], (k == i) ? 1.0f : 0.0f, accuracy) 220 << "i = " << i << ", k = " << k; 221 } 222 } 223 } 224 225 template <size_t ROWS, size_t COLS> 226 void TestRectInverseT(float accuracy) { 227 constexpr size_t kBlockSize = ROWS * COLS; 228 for (size_t i = 0; i < kBlockSize; ++i) { 229 HWY_ALIGN float x[kBlockSize] = {0.0f}; 230 HWY_ALIGN float out[kBlockSize] = {0.0f}; 231 x[i] = 1.0; 232 HWY_ALIGN float coeffs[kBlockSize] = {0.0f}; 233 HWY_ALIGN float scratch_space[kBlockSize * 5]; 234 235 ComputeScaledDCT<ROWS, COLS>()(DCTFrom(x, COLS), coeffs, scratch_space); 236 ComputeScaledIDCT<ROWS, COLS>()(coeffs, DCTTo(out, COLS), scratch_space); 237 238 for (size_t k = 0; k < kBlockSize; ++k) { 239 EXPECT_NEAR(out[k], (k == i) ? 1.0f : 0.0f, accuracy) 240 << "i = " << i << ", k = " << k << " ROWS = " << ROWS 241 << " COLS = " << COLS; 242 } 243 } 244 } 245 246 void TestRectInverse() { 247 TestRectInverseT<16, 32>(1e-6f); 248 TestRectInverseT<8, 32>(1e-6f); 249 TestRectInverseT<8, 16>(1e-6f); 250 TestRectInverseT<4, 8>(1e-6f); 251 TestRectInverseT<2, 4>(1e-6f); 252 TestRectInverseT<1, 4>(1e-6f); 253 TestRectInverseT<1, 2>(1e-6f); 254 255 TestRectInverseT<32, 16>(1e-6f); 256 TestRectInverseT<32, 8>(1e-6f); 257 TestRectInverseT<16, 8>(1e-6f); 258 TestRectInverseT<8, 4>(1e-6f); 259 TestRectInverseT<4, 2>(1e-6f); 260 TestRectInverseT<4, 1>(1e-6f); 261 TestRectInverseT<2, 1>(1e-6f); 262 } 263 264 template <size_t ROWS, size_t COLS> 265 void TestRectTransposeT(float accuracy) { 266 constexpr size_t kBlockSize = ROWS * COLS; 267 HWY_ALIGN float scratch_space[kBlockSize * 5]; 268 for (size_t px = 0; px < COLS; ++px) { 269 for (size_t py = 0; py < ROWS; ++py) { 270 HWY_ALIGN float x1[kBlockSize] = {0.0f}; 271 HWY_ALIGN float x2[kBlockSize] = {0.0f}; 272 HWY_ALIGN float coeffs1[kBlockSize] = {0.0f}; 273 HWY_ALIGN float coeffs2[kBlockSize] = {0.0f}; 274 x1[py * COLS + px] = 1; 275 x2[px * ROWS + py] = 1; 276 277 constexpr size_t OUT_ROWS = ROWS < COLS ? ROWS : COLS; 278 constexpr size_t OUT_COLS = ROWS < COLS ? COLS : ROWS; 279 280 ComputeScaledDCT<ROWS, COLS>()(DCTFrom(x1, COLS), coeffs1, scratch_space); 281 ComputeScaledDCT<COLS, ROWS>()(DCTFrom(x2, ROWS), coeffs2, scratch_space); 282 283 for (size_t x = 0; x < OUT_COLS; ++x) { 284 for (size_t y = 0; y < OUT_ROWS; ++y) { 285 EXPECT_NEAR(coeffs1[y * OUT_COLS + x], coeffs2[y * OUT_COLS + x], 286 accuracy) 287 << " px = " << px << ", py = " << py << ", x = " << x 288 << ", y = " << y; 289 } 290 } 291 } 292 } 293 } 294 295 void TestRectTranspose() { 296 TestRectTransposeT<16, 32>(1e-6f); 297 TestRectTransposeT<8, 32>(1e-6f); 298 TestRectTransposeT<8, 16>(1e-6f); 299 TestRectTransposeT<4, 8>(1e-6f); 300 TestRectTransposeT<2, 4>(1e-6f); 301 TestRectTransposeT<1, 4>(1e-6f); 302 TestRectTransposeT<1, 2>(1e-6f); 303 304 // Identical to 8, 16 305 // TestRectTranspose<16, 8>(1e-6f); 306 } 307 308 void TestDctAccuracyShard(size_t shard) { 309 if (shard == 0) { 310 TestDctAccuracy<1>(1.1E-7f); 311 TestDctAccuracy<2>(1.1E-7f); 312 TestDctAccuracy<4>(1.1E-7f); 313 TestDctAccuracy<8>(1.1E-7f); 314 TestDctAccuracy<16>(1.3E-7f); 315 } 316 TestDctAccuracy<32>(1.1E-7f, 32 * shard, 32 * (shard + 1)); 317 } 318 319 void TestIdctAccuracyShard(size_t shard) { 320 if (shard == 0) { 321 TestIdctAccuracy<1>(1E-7f); 322 TestIdctAccuracy<2>(1E-7f); 323 TestIdctAccuracy<4>(1E-7f); 324 TestIdctAccuracy<8>(1E-7f); 325 TestIdctAccuracy<16>(1E-7f); 326 } 327 TestIdctAccuracy<32>(1E-7f, 32 * shard, 32 * (shard + 1)); 328 } 329 330 void TestDctTransposeShard(size_t shard) { 331 if (shard == 0) { 332 TestDctTranspose<8>(1E-6f); 333 TestDctTranspose<16>(1E-6f); 334 } 335 TestDctTranspose<32>(3E-6f, 32 * shard, 32 * (shard + 1)); 336 } 337 338 void TestSlowInverseShard(size_t shard) { 339 if (shard == 0) { 340 TestSlowInverse<1>(1E-5f); 341 TestSlowInverse<2>(1E-5f); 342 TestSlowInverse<4>(1E-5f); 343 TestSlowInverse<8>(1E-5f); 344 TestSlowInverse<16>(1E-5f); 345 } 346 TestSlowInverse<32>(1E-5f, 32 * shard, 32 * (shard + 1)); 347 } 348 349 // NOLINTNEXTLINE(google-readability-namespace-comments) 350 } // namespace HWY_NAMESPACE 351 } // namespace jxl 352 HWY_AFTER_NAMESPACE(); 353 354 #if HWY_ONCE 355 namespace jxl { 356 357 class TransposeTest : public hwy::TestWithParamTarget {}; 358 359 HWY_TARGET_INSTANTIATE_TEST_SUITE_P(TransposeTest); 360 361 HWY_EXPORT_AND_TEST_P(TransposeTest, TransposeTest); 362 HWY_EXPORT_AND_TEST_P(TransposeTest, InverseTest); 363 HWY_EXPORT_AND_TEST_P(TransposeTest, ColumnDctRoundtrip); 364 HWY_EXPORT_AND_TEST_P(TransposeTest, TestRectInverse); 365 HWY_EXPORT_AND_TEST_P(TransposeTest, TestRectTranspose); 366 367 // Tests in the DctShardedTest class are sharded for N=32. 368 class DctShardedTest : public ::hwy::TestWithParamTargetAndT<uint32_t> {}; 369 370 std::vector<uint32_t> ShardRange(uint32_t n) { 371 #ifdef JXL_DISABLE_SLOW_TESTS 372 JXL_ASSERT(n > 6); 373 std::vector<uint32_t> ret = {0, 1, 3, 5, n - 1}; 374 #else 375 std::vector<uint32_t> ret(n); 376 std::iota(ret.begin(), ret.end(), 0); 377 #endif // JXL_DISABLE_SLOW_TESTS 378 return ret; 379 } 380 381 HWY_TARGET_INSTANTIATE_TEST_SUITE_P_T(DctShardedTest, 382 ::testing::ValuesIn(ShardRange(32))); 383 384 HWY_EXPORT_AND_TEST_P_T(DctShardedTest, TestDctAccuracyShard); 385 HWY_EXPORT_AND_TEST_P_T(DctShardedTest, TestIdctAccuracyShard); 386 HWY_EXPORT_AND_TEST_P_T(DctShardedTest, TestDctTransposeShard); 387 HWY_EXPORT_AND_TEST_P_T(DctShardedTest, TestSlowInverseShard); 388 389 } // namespace jxl 390 #endif // HWY_ONCE