enc_transforms-inl.h (26861B)
1 // Copyright (c) the JPEG XL Project Authors. All rights reserved. 2 // 3 // Use of this source code is governed by a BSD-style 4 // license that can be found in the LICENSE file. 5 6 #if defined(LIB_JXL_ENC_TRANSFORMS_INL_H_) == defined(HWY_TARGET_TOGGLE) 7 #ifdef LIB_JXL_ENC_TRANSFORMS_INL_H_ 8 #undef LIB_JXL_ENC_TRANSFORMS_INL_H_ 9 #else 10 #define LIB_JXL_ENC_TRANSFORMS_INL_H_ 11 #endif 12 13 #include <stddef.h> 14 15 #include <hwy/highway.h> 16 17 #include "lib/jxl/ac_strategy.h" 18 #include "lib/jxl/coeff_order_fwd.h" 19 #include "lib/jxl/dct-inl.h" 20 #include "lib/jxl/dct_scales.h" 21 HWY_BEFORE_NAMESPACE(); 22 namespace jxl { 23 namespace HWY_NAMESPACE { 24 namespace { 25 26 // Inverse of ReinterpretingDCT. 27 template <size_t DCT_ROWS, size_t DCT_COLS, size_t LF_ROWS, size_t LF_COLS, 28 size_t ROWS, size_t COLS> 29 HWY_INLINE void ReinterpretingIDCT(const float* input, 30 const size_t input_stride, float* output, 31 const size_t output_stride) { 32 HWY_ALIGN float block[ROWS * COLS] = {}; 33 if (ROWS < COLS) { 34 for (size_t y = 0; y < LF_ROWS; y++) { 35 for (size_t x = 0; x < LF_COLS; x++) { 36 block[y * COLS + x] = input[y * input_stride + x] * 37 DCTTotalResampleScale<DCT_ROWS, ROWS>(y) * 38 DCTTotalResampleScale<DCT_COLS, COLS>(x); 39 } 40 } 41 } else { 42 for (size_t y = 0; y < LF_COLS; y++) { 43 for (size_t x = 0; x < LF_ROWS; x++) { 44 block[y * ROWS + x] = input[y * input_stride + x] * 45 DCTTotalResampleScale<DCT_COLS, COLS>(y) * 46 DCTTotalResampleScale<DCT_ROWS, ROWS>(x); 47 } 48 } 49 } 50 51 // ROWS, COLS <= 8, so we can put scratch space on the stack. 52 HWY_ALIGN float scratch_space[ROWS * COLS * 3]; 53 ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride), 54 scratch_space); 55 } 56 57 template <size_t S> 58 void DCT2TopBlock(const float* block, size_t stride, float* out) { 59 static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim"); 60 static_assert(S % 2 == 0, "S should be even"); 61 float temp[kDCTBlockSize]; 62 constexpr size_t num_2x2 = S / 2; 63 for (size_t y = 0; y < num_2x2; y++) { 64 for (size_t x = 0; x < num_2x2; x++) { 65 float c00 = block[y * 2 * stride + x * 2]; 66 float c01 = block[y * 2 * stride + x * 2 + 1]; 67 float c10 = block[(y * 2 + 1) * stride + x * 2]; 68 float c11 = block[(y * 2 + 1) * stride + x * 2 + 1]; 69 float r00 = c00 + c01 + c10 + c11; 70 float r01 = c00 + c01 - c10 - c11; 71 float r10 = c00 - c01 + c10 - c11; 72 float r11 = c00 - c01 - c10 + c11; 73 r00 *= 0.25f; 74 r01 *= 0.25f; 75 r10 *= 0.25f; 76 r11 *= 0.25f; 77 temp[y * kBlockDim + x] = r00; 78 temp[y * kBlockDim + num_2x2 + x] = r01; 79 temp[(y + num_2x2) * kBlockDim + x] = r10; 80 temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11; 81 } 82 } 83 for (size_t y = 0; y < S; y++) { 84 for (size_t x = 0; x < S; x++) { 85 out[y * kBlockDim + x] = temp[y * kBlockDim + x]; 86 } 87 } 88 } 89 90 void AFVDCT4x4(const float* JXL_RESTRICT pixels, float* JXL_RESTRICT coeffs) { 91 HWY_ALIGN static constexpr float k4x4AFVBasisTranspose[16][16] = { 92 { 93 0.2500000000000000, 94 0.8769029297991420f, 95 0.0000000000000000, 96 0.0000000000000000, 97 0.0000000000000000, 98 -0.4105377591765233f, 99 0.0000000000000000, 100 0.0000000000000000, 101 0.0000000000000000, 102 0.0000000000000000, 103 0.0000000000000000, 104 0.0000000000000000, 105 0.0000000000000000, 106 0.0000000000000000, 107 0.0000000000000000, 108 0.0000000000000000, 109 }, 110 { 111 0.2500000000000000, 112 0.2206518106944235f, 113 0.0000000000000000, 114 0.0000000000000000, 115 -0.7071067811865474f, 116 0.6235485373547691f, 117 0.0000000000000000, 118 0.0000000000000000, 119 0.0000000000000000, 120 0.0000000000000000, 121 0.0000000000000000, 122 0.0000000000000000, 123 0.0000000000000000, 124 0.0000000000000000, 125 0.0000000000000000, 126 0.0000000000000000, 127 }, 128 { 129 0.2500000000000000, 130 -0.1014005039375376f, 131 0.4067007583026075f, 132 -0.2125574805828875f, 133 0.0000000000000000, 134 -0.0643507165794627f, 135 -0.4517556589999482f, 136 -0.3046847507248690f, 137 0.3017929516615495f, 138 0.4082482904638627f, 139 0.1747866975480809f, 140 -0.2110560104933578f, 141 -0.1426608480880726f, 142 -0.1381354035075859f, 143 -0.1743760259965107f, 144 0.1135498731499434f, 145 }, 146 { 147 0.2500000000000000, 148 -0.1014005039375375f, 149 0.4444481661973445f, 150 0.3085497062849767f, 151 0.0000000000000000f, 152 -0.0643507165794627f, 153 0.1585450355184006f, 154 0.5112616136591823f, 155 0.2579236279634118f, 156 0.0000000000000000, 157 0.0812611176717539f, 158 0.1856718091610980f, 159 -0.3416446842253372f, 160 0.3302282550303788f, 161 0.0702790691196284f, 162 -0.0741750459581035f, 163 }, 164 { 165 0.2500000000000000, 166 0.2206518106944236f, 167 0.0000000000000000, 168 0.0000000000000000, 169 0.7071067811865476f, 170 0.6235485373547694f, 171 0.0000000000000000, 172 0.0000000000000000, 173 0.0000000000000000, 174 0.0000000000000000, 175 0.0000000000000000, 176 0.0000000000000000, 177 0.0000000000000000, 178 0.0000000000000000, 179 0.0000000000000000, 180 0.0000000000000000, 181 }, 182 { 183 0.2500000000000000, 184 -0.1014005039375378f, 185 0.0000000000000000, 186 0.4706702258572536f, 187 0.0000000000000000, 188 -0.0643507165794628f, 189 -0.0403851516082220f, 190 0.0000000000000000, 191 0.1627234014286620f, 192 0.0000000000000000, 193 0.0000000000000000, 194 0.0000000000000000, 195 0.7367497537172237f, 196 0.0875511500058708f, 197 -0.2921026642334881f, 198 0.1940289303259434f, 199 }, 200 { 201 0.2500000000000000, 202 -0.1014005039375377f, 203 0.1957439937204294f, 204 -0.1621205195722993f, 205 0.0000000000000000, 206 -0.0643507165794628f, 207 0.0074182263792424f, 208 -0.2904801297289980f, 209 0.0952002265347504f, 210 0.0000000000000000, 211 -0.3675398009862027f, 212 0.4921585901373873f, 213 0.2462710772207515f, 214 -0.0794670660590957f, 215 0.3623817333531167f, 216 -0.4351904965232280f, 217 }, 218 { 219 0.2500000000000000, 220 -0.1014005039375376f, 221 0.2929100136981264f, 222 0.0000000000000000, 223 0.0000000000000000, 224 -0.0643507165794627f, 225 0.3935103426921017f, 226 -0.0657870154914280f, 227 0.0000000000000000, 228 -0.4082482904638628f, 229 -0.3078822139579090f, 230 -0.3852501370925192f, 231 -0.0857401903551931f, 232 -0.4613374887461511f, 233 0.0000000000000000, 234 0.2191868483885747f, 235 }, 236 { 237 0.2500000000000000, 238 -0.1014005039375376f, 239 -0.4067007583026072f, 240 -0.2125574805828705f, 241 0.0000000000000000, 242 -0.0643507165794627f, 243 -0.4517556589999464f, 244 0.3046847507248840f, 245 0.3017929516615503f, 246 -0.4082482904638635f, 247 -0.1747866975480813f, 248 0.2110560104933581f, 249 -0.1426608480880734f, 250 -0.1381354035075829f, 251 -0.1743760259965108f, 252 0.1135498731499426f, 253 }, 254 { 255 0.2500000000000000, 256 -0.1014005039375377f, 257 -0.1957439937204287f, 258 -0.1621205195722833f, 259 0.0000000000000000, 260 -0.0643507165794628f, 261 0.0074182263792444f, 262 0.2904801297290076f, 263 0.0952002265347505f, 264 0.0000000000000000, 265 0.3675398009862011f, 266 -0.4921585901373891f, 267 0.2462710772207514f, 268 -0.0794670660591026f, 269 0.3623817333531165f, 270 -0.4351904965232251f, 271 }, 272 { 273 0.2500000000000000, 274 -0.1014005039375375f, 275 0.0000000000000000, 276 -0.4706702258572528f, 277 0.0000000000000000, 278 -0.0643507165794627f, 279 0.1107416575309343f, 280 0.0000000000000000, 281 -0.1627234014286617f, 282 0.0000000000000000, 283 0.0000000000000000, 284 0.0000000000000000, 285 0.1488339922711357f, 286 0.4972464710953509f, 287 0.2921026642334879f, 288 0.5550443808910661f, 289 }, 290 { 291 0.2500000000000000, 292 -0.1014005039375377f, 293 0.1137907446044809f, 294 -0.1464291867126764f, 295 0.0000000000000000, 296 -0.0643507165794628f, 297 0.0829816309488205f, 298 -0.2388977352334460f, 299 -0.3531238544981630f, 300 -0.4082482904638630f, 301 0.4826689115059883f, 302 0.1741941265991622f, 303 -0.0476868035022925f, 304 0.1253805944856366f, 305 -0.4326608024727445f, 306 -0.2546827712406646f, 307 }, 308 { 309 0.2500000000000000, 310 -0.1014005039375377f, 311 -0.4444481661973438f, 312 0.3085497062849487f, 313 0.0000000000000000, 314 -0.0643507165794628f, 315 0.1585450355183970f, 316 -0.5112616136592012f, 317 0.2579236279634129f, 318 0.0000000000000000, 319 -0.0812611176717504f, 320 -0.1856718091610990f, 321 -0.3416446842253373f, 322 0.3302282550303805f, 323 0.0702790691196282f, 324 -0.0741750459581023f, 325 }, 326 { 327 0.2500000000000000, 328 -0.1014005039375376f, 329 -0.2929100136981264f, 330 0.0000000000000000, 331 0.0000000000000000, 332 -0.0643507165794627f, 333 0.3935103426921022f, 334 0.0657870154914254f, 335 0.0000000000000000, 336 0.4082482904638634f, 337 0.3078822139579031f, 338 0.3852501370925211f, 339 -0.0857401903551927f, 340 -0.4613374887461554f, 341 0.0000000000000000, 342 0.2191868483885728f, 343 }, 344 { 345 0.2500000000000000, 346 -0.1014005039375376f, 347 -0.1137907446044814f, 348 -0.1464291867126654f, 349 0.0000000000000000, 350 -0.0643507165794627f, 351 0.0829816309488214f, 352 0.2388977352334547f, 353 -0.3531238544981624f, 354 0.4082482904638630f, 355 -0.4826689115059858f, 356 -0.1741941265991621f, 357 -0.0476868035022928f, 358 0.1253805944856431f, 359 -0.4326608024727457f, 360 -0.2546827712406641f, 361 }, 362 { 363 0.2500000000000000, 364 -0.1014005039375374f, 365 0.0000000000000000, 366 0.4251149611657548f, 367 0.0000000000000000, 368 -0.0643507165794626f, 369 -0.4517556589999480f, 370 0.0000000000000000, 371 -0.6035859033230976f, 372 0.0000000000000000, 373 0.0000000000000000, 374 0.0000000000000000, 375 -0.1426608480880724f, 376 -0.1381354035075845f, 377 0.3487520519930227f, 378 0.1135498731499429f, 379 }, 380 }; 381 382 const HWY_CAPPED(float, 16) d; 383 for (size_t i = 0; i < 16; i += Lanes(d)) { 384 auto scalar = Zero(d); 385 for (size_t j = 0; j < 16; j++) { 386 auto px = Set(d, pixels[j]); 387 auto basis = Load(d, k4x4AFVBasisTranspose[j] + i); 388 scalar = MulAdd(px, basis, scalar); 389 } 390 Store(scalar, d, coeffs + i); 391 } 392 } 393 394 // Coefficient layout: 395 // - (even, even) positions hold AFV coefficients 396 // - (odd, even) positions hold DCT4x4 coefficients 397 // - (any, odd) positions hold DCT4x8 coefficients 398 template <size_t afv_kind> 399 void AFVTransformFromPixels(const float* JXL_RESTRICT pixels, 400 size_t pixels_stride, 401 float* JXL_RESTRICT coefficients) { 402 HWY_ALIGN float scratch_space[4 * 8 * 5]; 403 size_t afv_x = afv_kind & 1; 404 size_t afv_y = afv_kind / 2; 405 HWY_ALIGN float block[4 * 8] = {}; 406 for (size_t iy = 0; iy < 4; iy++) { 407 for (size_t ix = 0; ix < 4; ix++) { 408 block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] = 409 pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x]; 410 } 411 } 412 // AFV coefficients in (even, even) positions. 413 HWY_ALIGN float coeff[4 * 4]; 414 AFVDCT4x4(block, coeff); 415 for (size_t iy = 0; iy < 4; iy++) { 416 for (size_t ix = 0; ix < 4; ix++) { 417 coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix]; 418 } 419 } 420 // 4x4 DCT of the block with same y and different x. 421 ComputeScaledDCT<4, 4>()( 422 DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4), 423 pixels_stride), 424 block, scratch_space); 425 // ... in (odd, even) positions. 426 for (size_t iy = 0; iy < 4; iy++) { 427 for (size_t ix = 0; ix < 8; ix++) { 428 coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix]; 429 } 430 } 431 // 4x8 DCT of the other half of the block. 432 ComputeScaledDCT<4, 8>()( 433 DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride), 434 block, scratch_space); 435 for (size_t iy = 0; iy < 4; iy++) { 436 for (size_t ix = 0; ix < 8; ix++) { 437 coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix]; 438 } 439 } 440 float block00 = coefficients[0] * 0.25f; 441 float block01 = coefficients[1]; 442 float block10 = coefficients[8]; 443 coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f; 444 coefficients[1] = (block00 - block01) * 0.5f; 445 coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f; 446 } 447 448 HWY_MAYBE_UNUSED void TransformFromPixels(const AcStrategy::Type strategy, 449 const float* JXL_RESTRICT pixels, 450 size_t pixels_stride, 451 float* JXL_RESTRICT coefficients, 452 float* JXL_RESTRICT scratch_space) { 453 using Type = AcStrategy::Type; 454 switch (strategy) { 455 case Type::IDENTITY: { 456 for (size_t y = 0; y < 2; y++) { 457 for (size_t x = 0; x < 2; x++) { 458 float block_dc = 0; 459 for (size_t iy = 0; iy < 4; iy++) { 460 for (size_t ix = 0; ix < 4; ix++) { 461 block_dc += pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix]; 462 } 463 } 464 block_dc *= 1.0f / 16; 465 for (size_t iy = 0; iy < 4; iy++) { 466 for (size_t ix = 0; ix < 4; ix++) { 467 if (ix == 1 && iy == 1) continue; 468 coefficients[(y + iy * 2) * 8 + x + ix * 2] = 469 pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix] - 470 pixels[(y * 4 + 1) * pixels_stride + x * 4 + 1]; 471 } 472 } 473 coefficients[(y + 2) * 8 + x + 2] = coefficients[y * 8 + x]; 474 coefficients[y * 8 + x] = block_dc; 475 } 476 } 477 float block00 = coefficients[0]; 478 float block01 = coefficients[1]; 479 float block10 = coefficients[8]; 480 float block11 = coefficients[9]; 481 coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f; 482 coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f; 483 coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f; 484 coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f; 485 break; 486 } 487 case Type::DCT8X4: { 488 for (size_t x = 0; x < 2; x++) { 489 HWY_ALIGN float block[4 * 8]; 490 ComputeScaledDCT<8, 4>()(DCTFrom(pixels + x * 4, pixels_stride), block, 491 scratch_space); 492 for (size_t iy = 0; iy < 4; iy++) { 493 for (size_t ix = 0; ix < 8; ix++) { 494 // Store transposed. 495 coefficients[(x + iy * 2) * 8 + ix] = block[iy * 8 + ix]; 496 } 497 } 498 } 499 float block0 = coefficients[0]; 500 float block1 = coefficients[8]; 501 coefficients[0] = (block0 + block1) * 0.5f; 502 coefficients[8] = (block0 - block1) * 0.5f; 503 break; 504 } 505 case Type::DCT4X8: { 506 for (size_t y = 0; y < 2; y++) { 507 HWY_ALIGN float block[4 * 8]; 508 ComputeScaledDCT<4, 8>()( 509 DCTFrom(pixels + y * 4 * pixels_stride, pixels_stride), block, 510 scratch_space); 511 for (size_t iy = 0; iy < 4; iy++) { 512 for (size_t ix = 0; ix < 8; ix++) { 513 coefficients[(y + iy * 2) * 8 + ix] = block[iy * 8 + ix]; 514 } 515 } 516 } 517 float block0 = coefficients[0]; 518 float block1 = coefficients[8]; 519 coefficients[0] = (block0 + block1) * 0.5f; 520 coefficients[8] = (block0 - block1) * 0.5f; 521 break; 522 } 523 case Type::DCT4X4: { 524 for (size_t y = 0; y < 2; y++) { 525 for (size_t x = 0; x < 2; x++) { 526 HWY_ALIGN float block[4 * 4]; 527 ComputeScaledDCT<4, 4>()( 528 DCTFrom(pixels + y * 4 * pixels_stride + x * 4, pixels_stride), 529 block, scratch_space); 530 for (size_t iy = 0; iy < 4; iy++) { 531 for (size_t ix = 0; ix < 4; ix++) { 532 coefficients[(y + iy * 2) * 8 + x + ix * 2] = block[iy * 4 + ix]; 533 } 534 } 535 } 536 } 537 float block00 = coefficients[0]; 538 float block01 = coefficients[1]; 539 float block10 = coefficients[8]; 540 float block11 = coefficients[9]; 541 coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f; 542 coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f; 543 coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f; 544 coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f; 545 break; 546 } 547 case Type::DCT2X2: { 548 DCT2TopBlock<8>(pixels, pixels_stride, coefficients); 549 DCT2TopBlock<4>(coefficients, kBlockDim, coefficients); 550 DCT2TopBlock<2>(coefficients, kBlockDim, coefficients); 551 break; 552 } 553 case Type::DCT16X16: { 554 ComputeScaledDCT<16, 16>()(DCTFrom(pixels, pixels_stride), coefficients, 555 scratch_space); 556 break; 557 } 558 case Type::DCT16X8: { 559 ComputeScaledDCT<16, 8>()(DCTFrom(pixels, pixels_stride), coefficients, 560 scratch_space); 561 break; 562 } 563 case Type::DCT8X16: { 564 ComputeScaledDCT<8, 16>()(DCTFrom(pixels, pixels_stride), coefficients, 565 scratch_space); 566 break; 567 } 568 case Type::DCT32X8: { 569 ComputeScaledDCT<32, 8>()(DCTFrom(pixels, pixels_stride), coefficients, 570 scratch_space); 571 break; 572 } 573 case Type::DCT8X32: { 574 ComputeScaledDCT<8, 32>()(DCTFrom(pixels, pixels_stride), coefficients, 575 scratch_space); 576 break; 577 } 578 case Type::DCT32X16: { 579 ComputeScaledDCT<32, 16>()(DCTFrom(pixels, pixels_stride), coefficients, 580 scratch_space); 581 break; 582 } 583 case Type::DCT16X32: { 584 ComputeScaledDCT<16, 32>()(DCTFrom(pixels, pixels_stride), coefficients, 585 scratch_space); 586 break; 587 } 588 case Type::DCT32X32: { 589 ComputeScaledDCT<32, 32>()(DCTFrom(pixels, pixels_stride), coefficients, 590 scratch_space); 591 break; 592 } 593 case Type::DCT: { 594 ComputeScaledDCT<8, 8>()(DCTFrom(pixels, pixels_stride), coefficients, 595 scratch_space); 596 break; 597 } 598 case Type::AFV0: { 599 AFVTransformFromPixels<0>(pixels, pixels_stride, coefficients); 600 break; 601 } 602 case Type::AFV1: { 603 AFVTransformFromPixels<1>(pixels, pixels_stride, coefficients); 604 break; 605 } 606 case Type::AFV2: { 607 AFVTransformFromPixels<2>(pixels, pixels_stride, coefficients); 608 break; 609 } 610 case Type::AFV3: { 611 AFVTransformFromPixels<3>(pixels, pixels_stride, coefficients); 612 break; 613 } 614 case Type::DCT64X64: { 615 ComputeScaledDCT<64, 64>()(DCTFrom(pixels, pixels_stride), coefficients, 616 scratch_space); 617 break; 618 } 619 case Type::DCT64X32: { 620 ComputeScaledDCT<64, 32>()(DCTFrom(pixels, pixels_stride), coefficients, 621 scratch_space); 622 break; 623 } 624 case Type::DCT32X64: { 625 ComputeScaledDCT<32, 64>()(DCTFrom(pixels, pixels_stride), coefficients, 626 scratch_space); 627 break; 628 } 629 case Type::DCT128X128: { 630 ComputeScaledDCT<128, 128>()(DCTFrom(pixels, pixels_stride), coefficients, 631 scratch_space); 632 break; 633 } 634 case Type::DCT128X64: { 635 ComputeScaledDCT<128, 64>()(DCTFrom(pixels, pixels_stride), coefficients, 636 scratch_space); 637 break; 638 } 639 case Type::DCT64X128: { 640 ComputeScaledDCT<64, 128>()(DCTFrom(pixels, pixels_stride), coefficients, 641 scratch_space); 642 break; 643 } 644 case Type::DCT256X256: { 645 ComputeScaledDCT<256, 256>()(DCTFrom(pixels, pixels_stride), coefficients, 646 scratch_space); 647 break; 648 } 649 case Type::DCT256X128: { 650 ComputeScaledDCT<256, 128>()(DCTFrom(pixels, pixels_stride), coefficients, 651 scratch_space); 652 break; 653 } 654 case Type::DCT128X256: { 655 ComputeScaledDCT<128, 256>()(DCTFrom(pixels, pixels_stride), coefficients, 656 scratch_space); 657 break; 658 } 659 case Type::kNumValidStrategies: 660 JXL_UNREACHABLE("Invalid strategy"); 661 } 662 } 663 664 HWY_MAYBE_UNUSED void DCFromLowestFrequencies(const AcStrategy::Type strategy, 665 const float* block, float* dc, 666 size_t dc_stride) { 667 using Type = AcStrategy::Type; 668 switch (strategy) { 669 case Type::DCT16X8: { 670 ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/kBlockDim, 671 /*LF_ROWS=*/2, /*LF_COLS=*/1, /*ROWS=*/2, /*COLS=*/1>( 672 block, 2 * kBlockDim, dc, dc_stride); 673 break; 674 } 675 case Type::DCT8X16: { 676 ReinterpretingIDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/2 * kBlockDim, 677 /*LF_ROWS=*/1, /*LF_COLS=*/2, /*ROWS=*/1, /*COLS=*/2>( 678 block, 2 * kBlockDim, dc, dc_stride); 679 break; 680 } 681 case Type::DCT16X16: { 682 ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim, 683 /*LF_ROWS=*/2, /*LF_COLS=*/2, /*ROWS=*/2, /*COLS=*/2>( 684 block, 2 * kBlockDim, dc, dc_stride); 685 break; 686 } 687 case Type::DCT32X8: { 688 ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/kBlockDim, 689 /*LF_ROWS=*/4, /*LF_COLS=*/1, /*ROWS=*/4, /*COLS=*/1>( 690 block, 4 * kBlockDim, dc, dc_stride); 691 break; 692 } 693 case Type::DCT8X32: { 694 ReinterpretingIDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/4 * kBlockDim, 695 /*LF_ROWS=*/1, /*LF_COLS=*/4, /*ROWS=*/1, /*COLS=*/4>( 696 block, 4 * kBlockDim, dc, dc_stride); 697 break; 698 } 699 case Type::DCT32X16: { 700 ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim, 701 /*LF_ROWS=*/4, /*LF_COLS=*/2, /*ROWS=*/4, /*COLS=*/2>( 702 block, 4 * kBlockDim, dc, dc_stride); 703 break; 704 } 705 case Type::DCT16X32: { 706 ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim, 707 /*LF_ROWS=*/2, /*LF_COLS=*/4, /*ROWS=*/2, /*COLS=*/4>( 708 block, 4 * kBlockDim, dc, dc_stride); 709 break; 710 } 711 case Type::DCT32X32: { 712 ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim, 713 /*LF_ROWS=*/4, /*LF_COLS=*/4, /*ROWS=*/4, /*COLS=*/4>( 714 block, 4 * kBlockDim, dc, dc_stride); 715 break; 716 } 717 case Type::DCT64X32: { 718 ReinterpretingIDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim, 719 /*LF_ROWS=*/8, /*LF_COLS=*/4, /*ROWS=*/8, /*COLS=*/4>( 720 block, 8 * kBlockDim, dc, dc_stride); 721 break; 722 } 723 case Type::DCT32X64: { 724 ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim, 725 /*LF_ROWS=*/4, /*LF_COLS=*/8, /*ROWS=*/4, /*COLS=*/8>( 726 block, 8 * kBlockDim, dc, dc_stride); 727 break; 728 } 729 case Type::DCT64X64: { 730 ReinterpretingIDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim, 731 /*LF_ROWS=*/8, /*LF_COLS=*/8, /*ROWS=*/8, /*COLS=*/8>( 732 block, 8 * kBlockDim, dc, dc_stride); 733 break; 734 } 735 case Type::DCT128X64: { 736 ReinterpretingIDCT< 737 /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim, 738 /*LF_ROWS=*/16, /*LF_COLS=*/8, /*ROWS=*/16, /*COLS=*/8>( 739 block, 16 * kBlockDim, dc, dc_stride); 740 break; 741 } 742 case Type::DCT64X128: { 743 ReinterpretingIDCT< 744 /*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim, 745 /*LF_ROWS=*/8, /*LF_COLS=*/16, /*ROWS=*/8, /*COLS=*/16>( 746 block, 16 * kBlockDim, dc, dc_stride); 747 break; 748 } 749 case Type::DCT128X128: { 750 ReinterpretingIDCT< 751 /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim, 752 /*LF_ROWS=*/16, /*LF_COLS=*/16, /*ROWS=*/16, /*COLS=*/16>( 753 block, 16 * kBlockDim, dc, dc_stride); 754 break; 755 } 756 case Type::DCT256X128: { 757 ReinterpretingIDCT< 758 /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim, 759 /*LF_ROWS=*/32, /*LF_COLS=*/16, /*ROWS=*/32, /*COLS=*/16>( 760 block, 32 * kBlockDim, dc, dc_stride); 761 break; 762 } 763 case Type::DCT128X256: { 764 ReinterpretingIDCT< 765 /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim, 766 /*LF_ROWS=*/16, /*LF_COLS=*/32, /*ROWS=*/16, /*COLS=*/32>( 767 block, 32 * kBlockDim, dc, dc_stride); 768 break; 769 } 770 case Type::DCT256X256: { 771 ReinterpretingIDCT< 772 /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim, 773 /*LF_ROWS=*/32, /*LF_COLS=*/32, /*ROWS=*/32, /*COLS=*/32>( 774 block, 32 * kBlockDim, dc, dc_stride); 775 break; 776 } 777 case Type::DCT: 778 case Type::DCT2X2: 779 case Type::DCT4X4: 780 case Type::DCT4X8: 781 case Type::DCT8X4: 782 case Type::AFV0: 783 case Type::AFV1: 784 case Type::AFV2: 785 case Type::AFV3: 786 case Type::IDENTITY: 787 dc[0] = block[0]; 788 break; 789 case Type::kNumValidStrategies: 790 JXL_UNREACHABLE("Invalid strategy"); 791 } 792 } 793 794 } // namespace 795 // NOLINTNEXTLINE(google-readability-namespace-comments) 796 } // namespace HWY_NAMESPACE 797 } // namespace jxl 798 HWY_AFTER_NAMESPACE(); 799 800 #endif // LIB_JXL_ENC_TRANSFORMS_INL_H_