dec_transforms-inl.h (26791B)
1 // Copyright (c) the JPEG XL Project Authors. All rights reserved. 2 // 3 // Use of this source code is governed by a BSD-style 4 // license that can be found in the LICENSE file. 5 6 #if defined(LIB_JXL_DEC_TRANSFORMS_INL_H_) == defined(HWY_TARGET_TOGGLE) 7 #ifdef LIB_JXL_DEC_TRANSFORMS_INL_H_ 8 #undef LIB_JXL_DEC_TRANSFORMS_INL_H_ 9 #else 10 #define LIB_JXL_DEC_TRANSFORMS_INL_H_ 11 #endif 12 13 #include <stddef.h> 14 15 #include <hwy/highway.h> 16 17 #include "lib/jxl/ac_strategy.h" 18 #include "lib/jxl/coeff_order_fwd.h" 19 #include "lib/jxl/dct-inl.h" 20 #include "lib/jxl/dct_scales.h" 21 HWY_BEFORE_NAMESPACE(); 22 namespace jxl { 23 namespace HWY_NAMESPACE { 24 namespace { 25 26 // These templates are not found via ADL. 27 using hwy::HWY_NAMESPACE::MulAdd; 28 29 // Computes the lowest-frequency LF_ROWSxLF_COLS-sized square in output, which 30 // is a DCT_ROWS*DCT_COLS-sized DCT block, by doing a ROWS*COLS DCT on the 31 // input block. 32 template <size_t DCT_ROWS, size_t DCT_COLS, size_t LF_ROWS, size_t LF_COLS, 33 size_t ROWS, size_t COLS> 34 JXL_INLINE void ReinterpretingDCT(const float* input, const size_t input_stride, 35 float* output, const size_t output_stride, 36 float* JXL_RESTRICT block, 37 float* JXL_RESTRICT scratch_space) { 38 static_assert(LF_ROWS == ROWS, 39 "ReinterpretingDCT should only be called with LF == N"); 40 static_assert(LF_COLS == COLS, 41 "ReinterpretingDCT should only be called with LF == N"); 42 ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block, 43 scratch_space); 44 if (ROWS < COLS) { 45 for (size_t y = 0; y < LF_ROWS; y++) { 46 for (size_t x = 0; x < LF_COLS; x++) { 47 output[y * output_stride + x] = 48 block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) * 49 DCTTotalResampleScale<COLS, DCT_COLS>(x); 50 } 51 } 52 } else { 53 for (size_t y = 0; y < LF_COLS; y++) { 54 for (size_t x = 0; x < LF_ROWS; x++) { 55 output[y * output_stride + x] = 56 block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) * 57 DCTTotalResampleScale<ROWS, DCT_ROWS>(x); 58 } 59 } 60 } 61 } 62 63 template <size_t S> 64 void IDCT2TopBlock(const float* block, size_t stride_out, float* out) { 65 static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim"); 66 static_assert(S % 2 == 0, "S should be even"); 67 float temp[kDCTBlockSize]; 68 constexpr size_t num_2x2 = S / 2; 69 for (size_t y = 0; y < num_2x2; y++) { 70 for (size_t x = 0; x < num_2x2; x++) { 71 float c00 = block[y * kBlockDim + x]; 72 float c01 = block[y * kBlockDim + num_2x2 + x]; 73 float c10 = block[(y + num_2x2) * kBlockDim + x]; 74 float c11 = block[(y + num_2x2) * kBlockDim + num_2x2 + x]; 75 float r00 = c00 + c01 + c10 + c11; 76 float r01 = c00 + c01 - c10 - c11; 77 float r10 = c00 - c01 + c10 - c11; 78 float r11 = c00 - c01 - c10 + c11; 79 temp[y * 2 * kBlockDim + x * 2] = r00; 80 temp[y * 2 * kBlockDim + x * 2 + 1] = r01; 81 temp[(y * 2 + 1) * kBlockDim + x * 2] = r10; 82 temp[(y * 2 + 1) * kBlockDim + x * 2 + 1] = r11; 83 } 84 } 85 for (size_t y = 0; y < S; y++) { 86 for (size_t x = 0; x < S; x++) { 87 out[y * stride_out + x] = temp[y * kBlockDim + x]; 88 } 89 } 90 } 91 92 void AFVIDCT4x4(const float* JXL_RESTRICT coeffs, float* JXL_RESTRICT pixels) { 93 HWY_ALIGN static constexpr float k4x4AFVBasis[16][16] = { 94 { 95 0.25, 96 0.25, 97 0.25, 98 0.25, 99 0.25, 100 0.25, 101 0.25, 102 0.25, 103 0.25, 104 0.25, 105 0.25, 106 0.25, 107 0.25, 108 0.25, 109 0.25, 110 0.25, 111 }, 112 { 113 0.876902929799142f, 114 0.2206518106944235f, 115 -0.10140050393753763f, 116 -0.1014005039375375f, 117 0.2206518106944236f, 118 -0.10140050393753777f, 119 -0.10140050393753772f, 120 -0.10140050393753763f, 121 -0.10140050393753758f, 122 -0.10140050393753769f, 123 -0.1014005039375375f, 124 -0.10140050393753768f, 125 -0.10140050393753768f, 126 -0.10140050393753759f, 127 -0.10140050393753763f, 128 -0.10140050393753741f, 129 }, 130 { 131 0.0, 132 0.0, 133 0.40670075830260755f, 134 0.44444816619734445f, 135 0.0, 136 0.0, 137 0.19574399372042936f, 138 0.2929100136981264f, 139 -0.40670075830260716f, 140 -0.19574399372042872f, 141 0.0, 142 0.11379074460448091f, 143 -0.44444816619734384f, 144 -0.29291001369812636f, 145 -0.1137907446044814f, 146 0.0, 147 }, 148 { 149 0.0, 150 0.0, 151 -0.21255748058288748f, 152 0.3085497062849767f, 153 0.0, 154 0.4706702258572536f, 155 -0.1621205195722993f, 156 0.0, 157 -0.21255748058287047f, 158 -0.16212051957228327f, 159 -0.47067022585725277f, 160 -0.1464291867126764f, 161 0.3085497062849487f, 162 0.0, 163 -0.14642918671266536f, 164 0.4251149611657548f, 165 }, 166 { 167 0.0, 168 -0.7071067811865474f, 169 0.0, 170 0.0, 171 0.7071067811865476f, 172 0.0, 173 0.0, 174 0.0, 175 0.0, 176 0.0, 177 0.0, 178 0.0, 179 0.0, 180 0.0, 181 0.0, 182 0.0, 183 }, 184 { 185 -0.4105377591765233f, 186 0.6235485373547691f, 187 -0.06435071657946274f, 188 -0.06435071657946266f, 189 0.6235485373547694f, 190 -0.06435071657946284f, 191 -0.0643507165794628f, 192 -0.06435071657946274f, 193 -0.06435071657946272f, 194 -0.06435071657946279f, 195 -0.06435071657946266f, 196 -0.06435071657946277f, 197 -0.06435071657946277f, 198 -0.06435071657946273f, 199 -0.06435071657946274f, 200 -0.0643507165794626f, 201 }, 202 { 203 0.0, 204 0.0, 205 -0.4517556589999482f, 206 0.15854503551840063f, 207 0.0, 208 -0.04038515160822202f, 209 0.0074182263792423875f, 210 0.39351034269210167f, 211 -0.45175565899994635f, 212 0.007418226379244351f, 213 0.1107416575309343f, 214 0.08298163094882051f, 215 0.15854503551839705f, 216 0.3935103426921022f, 217 0.0829816309488214f, 218 -0.45175565899994796f, 219 }, 220 { 221 0.0, 222 0.0, 223 -0.304684750724869f, 224 0.5112616136591823f, 225 0.0, 226 0.0, 227 -0.290480129728998f, 228 -0.06578701549142804f, 229 0.304684750724884f, 230 0.2904801297290076f, 231 0.0, 232 -0.23889773523344604f, 233 -0.5112616136592012f, 234 0.06578701549142545f, 235 0.23889773523345467f, 236 0.0, 237 }, 238 { 239 0.0, 240 0.0, 241 0.3017929516615495f, 242 0.25792362796341184f, 243 0.0, 244 0.16272340142866204f, 245 0.09520022653475037f, 246 0.0, 247 0.3017929516615503f, 248 0.09520022653475055f, 249 -0.16272340142866173f, 250 -0.35312385449816297f, 251 0.25792362796341295f, 252 0.0, 253 -0.3531238544981624f, 254 -0.6035859033230976f, 255 }, 256 { 257 0.0, 258 0.0, 259 0.40824829046386274f, 260 0.0, 261 0.0, 262 0.0, 263 0.0, 264 -0.4082482904638628f, 265 -0.4082482904638635f, 266 0.0, 267 0.0, 268 -0.40824829046386296f, 269 0.0, 270 0.4082482904638634f, 271 0.408248290463863f, 272 0.0, 273 }, 274 { 275 0.0, 276 0.0, 277 0.1747866975480809f, 278 0.0812611176717539f, 279 0.0, 280 0.0, 281 -0.3675398009862027f, 282 -0.307882213957909f, 283 -0.17478669754808135f, 284 0.3675398009862011f, 285 0.0, 286 0.4826689115059883f, 287 -0.08126111767175039f, 288 0.30788221395790305f, 289 -0.48266891150598584f, 290 0.0, 291 }, 292 { 293 0.0, 294 0.0, 295 -0.21105601049335784f, 296 0.18567180916109802f, 297 0.0, 298 0.0, 299 0.49215859013738733f, 300 -0.38525013709251915f, 301 0.21105601049335806f, 302 -0.49215859013738905f, 303 0.0, 304 0.17419412659916217f, 305 -0.18567180916109904f, 306 0.3852501370925211f, 307 -0.1741941265991621f, 308 0.0, 309 }, 310 { 311 0.0, 312 0.0, 313 -0.14266084808807264f, 314 -0.3416446842253372f, 315 0.0, 316 0.7367497537172237f, 317 0.24627107722075148f, 318 -0.08574019035519306f, 319 -0.14266084808807344f, 320 0.24627107722075137f, 321 0.14883399227113567f, 322 -0.04768680350229251f, 323 -0.3416446842253373f, 324 -0.08574019035519267f, 325 -0.047686803502292804f, 326 -0.14266084808807242f, 327 }, 328 { 329 0.0, 330 0.0, 331 -0.13813540350758585f, 332 0.3302282550303788f, 333 0.0, 334 0.08755115000587084f, 335 -0.07946706605909573f, 336 -0.4613374887461511f, 337 -0.13813540350758294f, 338 -0.07946706605910261f, 339 0.49724647109535086f, 340 0.12538059448563663f, 341 0.3302282550303805f, 342 -0.4613374887461554f, 343 0.12538059448564315f, 344 -0.13813540350758452f, 345 }, 346 { 347 0.0, 348 0.0, 349 -0.17437602599651067f, 350 0.0702790691196284f, 351 0.0, 352 -0.2921026642334881f, 353 0.3623817333531167f, 354 0.0, 355 -0.1743760259965108f, 356 0.36238173335311646f, 357 0.29210266423348785f, 358 -0.4326608024727445f, 359 0.07027906911962818f, 360 0.0, 361 -0.4326608024727457f, 362 0.34875205199302267f, 363 }, 364 { 365 0.0, 366 0.0, 367 0.11354987314994337f, 368 -0.07417504595810355f, 369 0.0, 370 0.19402893032594343f, 371 -0.435190496523228f, 372 0.21918684838857466f, 373 0.11354987314994257f, 374 -0.4351904965232251f, 375 0.5550443808910661f, 376 -0.25468277124066463f, 377 -0.07417504595810233f, 378 0.2191868483885728f, 379 -0.25468277124066413f, 380 0.1135498731499429f, 381 }, 382 }; 383 384 const HWY_CAPPED(float, 16) d; 385 for (size_t i = 0; i < 16; i += Lanes(d)) { 386 auto pixel = Zero(d); 387 for (size_t j = 0; j < 16; j++) { 388 auto cf = Set(d, coeffs[j]); 389 auto basis = Load(d, k4x4AFVBasis[j] + i); 390 pixel = MulAdd(cf, basis, pixel); 391 } 392 Store(pixel, d, pixels + i); 393 } 394 } 395 396 template <size_t afv_kind> 397 void AFVTransformToPixels(const float* JXL_RESTRICT coefficients, 398 float* JXL_RESTRICT pixels, size_t pixels_stride) { 399 HWY_ALIGN float scratch_space[4 * 8 * 4]; 400 size_t afv_x = afv_kind & 1; 401 size_t afv_y = afv_kind / 2; 402 float dcs[3] = {}; 403 float block00 = coefficients[0]; 404 float block01 = coefficients[1]; 405 float block10 = coefficients[8]; 406 dcs[0] = (block00 + block10 + block01) * 4.0f; 407 dcs[1] = (block00 + block10 - block01); 408 dcs[2] = block00 - block10; 409 // IAFV: (even, even) positions. 410 HWY_ALIGN float coeff[4 * 4]; 411 coeff[0] = dcs[0]; 412 for (size_t iy = 0; iy < 4; iy++) { 413 for (size_t ix = 0; ix < 4; ix++) { 414 if (ix == 0 && iy == 0) continue; 415 coeff[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2]; 416 } 417 } 418 HWY_ALIGN float block[4 * 8]; 419 AFVIDCT4x4(coeff, block); 420 for (size_t iy = 0; iy < 4; iy++) { 421 for (size_t ix = 0; ix < 4; ix++) { 422 pixels[(iy + afv_y * 4) * pixels_stride + afv_x * 4 + ix] = 423 block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)]; 424 } 425 } 426 // IDCT4x4 in (odd, even) positions. 427 block[0] = dcs[1]; 428 for (size_t iy = 0; iy < 4; iy++) { 429 for (size_t ix = 0; ix < 4; ix++) { 430 if (ix == 0 && iy == 0) continue; 431 block[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2 + 1]; 432 } 433 } 434 ComputeScaledIDCT<4, 4>()( 435 block, 436 DCTTo(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4), 437 pixels_stride), 438 scratch_space); 439 // IDCT4x8. 440 block[0] = dcs[2]; 441 for (size_t iy = 0; iy < 4; iy++) { 442 for (size_t ix = 0; ix < 8; ix++) { 443 if (ix == 0 && iy == 0) continue; 444 block[iy * 8 + ix] = coefficients[(1 + iy * 2) * 8 + ix]; 445 } 446 } 447 ComputeScaledIDCT<4, 8>()( 448 block, 449 DCTTo(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride), 450 scratch_space); 451 } 452 453 HWY_MAYBE_UNUSED void TransformToPixels(const AcStrategy::Type strategy, 454 float* JXL_RESTRICT coefficients, 455 float* JXL_RESTRICT pixels, 456 size_t pixels_stride, 457 float* scratch_space) { 458 using Type = AcStrategy::Type; 459 switch (strategy) { 460 case Type::IDENTITY: { 461 float dcs[4] = {}; 462 float block00 = coefficients[0]; 463 float block01 = coefficients[1]; 464 float block10 = coefficients[8]; 465 float block11 = coefficients[9]; 466 dcs[0] = block00 + block01 + block10 + block11; 467 dcs[1] = block00 + block01 - block10 - block11; 468 dcs[2] = block00 - block01 + block10 - block11; 469 dcs[3] = block00 - block01 - block10 + block11; 470 for (size_t y = 0; y < 2; y++) { 471 for (size_t x = 0; x < 2; x++) { 472 float block_dc = dcs[y * 2 + x]; 473 float residual_sum = 0; 474 for (size_t iy = 0; iy < 4; iy++) { 475 for (size_t ix = 0; ix < 4; ix++) { 476 if (ix == 0 && iy == 0) continue; 477 residual_sum += coefficients[(y + iy * 2) * 8 + x + ix * 2]; 478 } 479 } 480 pixels[(4 * y + 1) * pixels_stride + 4 * x + 1] = 481 block_dc - residual_sum * (1.0f / 16); 482 for (size_t iy = 0; iy < 4; iy++) { 483 for (size_t ix = 0; ix < 4; ix++) { 484 if (ix == 1 && iy == 1) continue; 485 pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix] = 486 coefficients[(y + iy * 2) * 8 + x + ix * 2] + 487 pixels[(4 * y + 1) * pixels_stride + 4 * x + 1]; 488 } 489 } 490 pixels[y * 4 * pixels_stride + x * 4] = 491 coefficients[(y + 2) * 8 + x + 2] + 492 pixels[(4 * y + 1) * pixels_stride + 4 * x + 1]; 493 } 494 } 495 break; 496 } 497 case Type::DCT8X4: { 498 float dcs[2] = {}; 499 float block0 = coefficients[0]; 500 float block1 = coefficients[8]; 501 dcs[0] = block0 + block1; 502 dcs[1] = block0 - block1; 503 for (size_t x = 0; x < 2; x++) { 504 HWY_ALIGN float block[4 * 8]; 505 block[0] = dcs[x]; 506 for (size_t iy = 0; iy < 4; iy++) { 507 for (size_t ix = 0; ix < 8; ix++) { 508 if (ix == 0 && iy == 0) continue; 509 block[iy * 8 + ix] = coefficients[(x + iy * 2) * 8 + ix]; 510 } 511 } 512 ComputeScaledIDCT<8, 4>()(block, DCTTo(pixels + x * 4, pixels_stride), 513 scratch_space); 514 } 515 break; 516 } 517 case Type::DCT4X8: { 518 float dcs[2] = {}; 519 float block0 = coefficients[0]; 520 float block1 = coefficients[8]; 521 dcs[0] = block0 + block1; 522 dcs[1] = block0 - block1; 523 for (size_t y = 0; y < 2; y++) { 524 HWY_ALIGN float block[4 * 8]; 525 block[0] = dcs[y]; 526 for (size_t iy = 0; iy < 4; iy++) { 527 for (size_t ix = 0; ix < 8; ix++) { 528 if (ix == 0 && iy == 0) continue; 529 block[iy * 8 + ix] = coefficients[(y + iy * 2) * 8 + ix]; 530 } 531 } 532 ComputeScaledIDCT<4, 8>()( 533 block, DCTTo(pixels + y * 4 * pixels_stride, pixels_stride), 534 scratch_space); 535 } 536 break; 537 } 538 case Type::DCT4X4: { 539 float dcs[4] = {}; 540 float block00 = coefficients[0]; 541 float block01 = coefficients[1]; 542 float block10 = coefficients[8]; 543 float block11 = coefficients[9]; 544 dcs[0] = block00 + block01 + block10 + block11; 545 dcs[1] = block00 + block01 - block10 - block11; 546 dcs[2] = block00 - block01 + block10 - block11; 547 dcs[3] = block00 - block01 - block10 + block11; 548 for (size_t y = 0; y < 2; y++) { 549 for (size_t x = 0; x < 2; x++) { 550 HWY_ALIGN float block[4 * 4]; 551 block[0] = dcs[y * 2 + x]; 552 for (size_t iy = 0; iy < 4; iy++) { 553 for (size_t ix = 0; ix < 4; ix++) { 554 if (ix == 0 && iy == 0) continue; 555 block[iy * 4 + ix] = coefficients[(y + iy * 2) * 8 + x + ix * 2]; 556 } 557 } 558 ComputeScaledIDCT<4, 4>()( 559 block, 560 DCTTo(pixels + y * 4 * pixels_stride + x * 4, pixels_stride), 561 scratch_space); 562 } 563 } 564 break; 565 } 566 case Type::DCT2X2: { 567 HWY_ALIGN float coeffs[kDCTBlockSize]; 568 memcpy(coeffs, coefficients, sizeof(float) * kDCTBlockSize); 569 IDCT2TopBlock<2>(coeffs, kBlockDim, coeffs); 570 IDCT2TopBlock<4>(coeffs, kBlockDim, coeffs); 571 IDCT2TopBlock<8>(coeffs, kBlockDim, coeffs); 572 for (size_t y = 0; y < kBlockDim; y++) { 573 for (size_t x = 0; x < kBlockDim; x++) { 574 pixels[y * pixels_stride + x] = coeffs[y * kBlockDim + x]; 575 } 576 } 577 break; 578 } 579 case Type::DCT16X16: { 580 ComputeScaledIDCT<16, 16>()(coefficients, DCTTo(pixels, pixels_stride), 581 scratch_space); 582 break; 583 } 584 case Type::DCT16X8: { 585 ComputeScaledIDCT<16, 8>()(coefficients, DCTTo(pixels, pixels_stride), 586 scratch_space); 587 break; 588 } 589 case Type::DCT8X16: { 590 ComputeScaledIDCT<8, 16>()(coefficients, DCTTo(pixels, pixels_stride), 591 scratch_space); 592 break; 593 } 594 case Type::DCT32X8: { 595 ComputeScaledIDCT<32, 8>()(coefficients, DCTTo(pixels, pixels_stride), 596 scratch_space); 597 break; 598 } 599 case Type::DCT8X32: { 600 ComputeScaledIDCT<8, 32>()(coefficients, DCTTo(pixels, pixels_stride), 601 scratch_space); 602 break; 603 } 604 case Type::DCT32X16: { 605 ComputeScaledIDCT<32, 16>()(coefficients, DCTTo(pixels, pixels_stride), 606 scratch_space); 607 break; 608 } 609 case Type::DCT16X32: { 610 ComputeScaledIDCT<16, 32>()(coefficients, DCTTo(pixels, pixels_stride), 611 scratch_space); 612 break; 613 } 614 case Type::DCT32X32: { 615 ComputeScaledIDCT<32, 32>()(coefficients, DCTTo(pixels, pixels_stride), 616 scratch_space); 617 break; 618 } 619 case Type::DCT: { 620 ComputeScaledIDCT<8, 8>()(coefficients, DCTTo(pixels, pixels_stride), 621 scratch_space); 622 break; 623 } 624 case Type::AFV0: { 625 AFVTransformToPixels<0>(coefficients, pixels, pixels_stride); 626 break; 627 } 628 case Type::AFV1: { 629 AFVTransformToPixels<1>(coefficients, pixels, pixels_stride); 630 break; 631 } 632 case Type::AFV2: { 633 AFVTransformToPixels<2>(coefficients, pixels, pixels_stride); 634 break; 635 } 636 case Type::AFV3: { 637 AFVTransformToPixels<3>(coefficients, pixels, pixels_stride); 638 break; 639 } 640 case Type::DCT64X32: { 641 ComputeScaledIDCT<64, 32>()(coefficients, DCTTo(pixels, pixels_stride), 642 scratch_space); 643 break; 644 } 645 case Type::DCT32X64: { 646 ComputeScaledIDCT<32, 64>()(coefficients, DCTTo(pixels, pixels_stride), 647 scratch_space); 648 break; 649 } 650 case Type::DCT64X64: { 651 ComputeScaledIDCT<64, 64>()(coefficients, DCTTo(pixels, pixels_stride), 652 scratch_space); 653 break; 654 } 655 case Type::DCT128X64: { 656 ComputeScaledIDCT<128, 64>()(coefficients, DCTTo(pixels, pixels_stride), 657 scratch_space); 658 break; 659 } 660 case Type::DCT64X128: { 661 ComputeScaledIDCT<64, 128>()(coefficients, DCTTo(pixels, pixels_stride), 662 scratch_space); 663 break; 664 } 665 case Type::DCT128X128: { 666 ComputeScaledIDCT<128, 128>()(coefficients, DCTTo(pixels, pixels_stride), 667 scratch_space); 668 break; 669 } 670 case Type::DCT256X128: { 671 ComputeScaledIDCT<256, 128>()(coefficients, DCTTo(pixels, pixels_stride), 672 scratch_space); 673 break; 674 } 675 case Type::DCT128X256: { 676 ComputeScaledIDCT<128, 256>()(coefficients, DCTTo(pixels, pixels_stride), 677 scratch_space); 678 break; 679 } 680 case Type::DCT256X256: { 681 ComputeScaledIDCT<256, 256>()(coefficients, DCTTo(pixels, pixels_stride), 682 scratch_space); 683 break; 684 } 685 case Type::kNumValidStrategies: 686 JXL_UNREACHABLE("Invalid strategy"); 687 } 688 } 689 690 HWY_MAYBE_UNUSED void LowestFrequenciesFromDC(const AcStrategy::Type strategy, 691 const float* dc, size_t dc_stride, 692 float* llf, 693 float* JXL_RESTRICT scratch) { 694 using Type = AcStrategy::Type; 695 HWY_ALIGN float warm_block[4 * 4]; 696 HWY_ALIGN float warm_scratch_space[4 * 4 * 4]; 697 switch (strategy) { 698 case Type::DCT16X8: { 699 ReinterpretingDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/kBlockDim, 700 /*LF_ROWS=*/2, /*LF_COLS=*/1, /*ROWS=*/2, /*COLS=*/1>( 701 dc, dc_stride, llf, 2 * kBlockDim, warm_block, warm_scratch_space); 702 break; 703 } 704 case Type::DCT8X16: { 705 ReinterpretingDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/2 * kBlockDim, 706 /*LF_ROWS=*/1, /*LF_COLS=*/2, /*ROWS=*/1, /*COLS=*/2>( 707 dc, dc_stride, llf, 2 * kBlockDim, warm_block, warm_scratch_space); 708 break; 709 } 710 case Type::DCT16X16: { 711 ReinterpretingDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim, 712 /*LF_ROWS=*/2, /*LF_COLS=*/2, /*ROWS=*/2, /*COLS=*/2>( 713 dc, dc_stride, llf, 2 * kBlockDim, warm_block, warm_scratch_space); 714 break; 715 } 716 case Type::DCT32X8: { 717 ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/kBlockDim, 718 /*LF_ROWS=*/4, /*LF_COLS=*/1, /*ROWS=*/4, /*COLS=*/1>( 719 dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space); 720 break; 721 } 722 case Type::DCT8X32: { 723 ReinterpretingDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/4 * kBlockDim, 724 /*LF_ROWS=*/1, /*LF_COLS=*/4, /*ROWS=*/1, /*COLS=*/4>( 725 dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space); 726 break; 727 } 728 case Type::DCT32X16: { 729 ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim, 730 /*LF_ROWS=*/4, /*LF_COLS=*/2, /*ROWS=*/4, /*COLS=*/2>( 731 dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space); 732 break; 733 } 734 case Type::DCT16X32: { 735 ReinterpretingDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim, 736 /*LF_ROWS=*/2, /*LF_COLS=*/4, /*ROWS=*/2, /*COLS=*/4>( 737 dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space); 738 break; 739 } 740 case Type::DCT32X32: { 741 ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim, 742 /*LF_ROWS=*/4, /*LF_COLS=*/4, /*ROWS=*/4, /*COLS=*/4>( 743 dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space); 744 break; 745 } 746 case Type::DCT64X32: { 747 ReinterpretingDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim, 748 /*LF_ROWS=*/8, /*LF_COLS=*/4, /*ROWS=*/8, /*COLS=*/4>( 749 dc, dc_stride, llf, 8 * kBlockDim, scratch, scratch + 8 * 4); 750 break; 751 } 752 case Type::DCT32X64: { 753 ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim, 754 /*LF_ROWS=*/4, /*LF_COLS=*/8, /*ROWS=*/4, /*COLS=*/8>( 755 dc, dc_stride, llf, 8 * kBlockDim, scratch, scratch + 4 * 8); 756 break; 757 } 758 case Type::DCT64X64: { 759 ReinterpretingDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim, 760 /*LF_ROWS=*/8, /*LF_COLS=*/8, /*ROWS=*/8, /*COLS=*/8>( 761 dc, dc_stride, llf, 8 * kBlockDim, scratch, scratch + 8 * 8); 762 break; 763 } 764 case Type::DCT128X64: { 765 ReinterpretingDCT</*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim, 766 /*LF_ROWS=*/16, /*LF_COLS=*/8, /*ROWS=*/16, /*COLS=*/8>( 767 dc, dc_stride, llf, 16 * kBlockDim, scratch, scratch + 16 * 8); 768 break; 769 } 770 case Type::DCT64X128: { 771 ReinterpretingDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim, 772 /*LF_ROWS=*/8, /*LF_COLS=*/16, /*ROWS=*/8, /*COLS=*/16>( 773 dc, dc_stride, llf, 16 * kBlockDim, scratch, scratch + 8 * 16); 774 break; 775 } 776 case Type::DCT128X128: { 777 ReinterpretingDCT< 778 /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim, 779 /*LF_ROWS=*/16, /*LF_COLS=*/16, /*ROWS=*/16, /*COLS=*/16>( 780 dc, dc_stride, llf, 16 * kBlockDim, scratch, scratch + 16 * 16); 781 break; 782 } 783 case Type::DCT256X128: { 784 ReinterpretingDCT< 785 /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim, 786 /*LF_ROWS=*/32, /*LF_COLS=*/16, /*ROWS=*/32, /*COLS=*/16>( 787 dc, dc_stride, llf, 32 * kBlockDim, scratch, scratch + 32 * 16); 788 break; 789 } 790 case Type::DCT128X256: { 791 ReinterpretingDCT< 792 /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim, 793 /*LF_ROWS=*/16, /*LF_COLS=*/32, /*ROWS=*/16, /*COLS=*/32>( 794 dc, dc_stride, llf, 32 * kBlockDim, scratch, scratch + 16 * 32); 795 break; 796 } 797 case Type::DCT256X256: { 798 ReinterpretingDCT< 799 /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim, 800 /*LF_ROWS=*/32, /*LF_COLS=*/32, /*ROWS=*/32, /*COLS=*/32>( 801 dc, dc_stride, llf, 32 * kBlockDim, scratch, scratch + 32 * 32); 802 break; 803 } 804 case Type::DCT: 805 case Type::DCT2X2: 806 case Type::DCT4X4: 807 case Type::DCT4X8: 808 case Type::DCT8X4: 809 case Type::AFV0: 810 case Type::AFV1: 811 case Type::AFV2: 812 case Type::AFV3: 813 case Type::IDENTITY: 814 llf[0] = dc[0]; 815 break; 816 case Type::kNumValidStrategies: 817 JXL_UNREACHABLE("Invalid strategy"); 818 }; 819 } 820 821 } // namespace 822 // NOLINTNEXTLINE(google-readability-namespace-comments) 823 } // namespace HWY_NAMESPACE 824 } // namespace jxl 825 HWY_AFTER_NAMESPACE(); 826 827 #endif // LIB_JXL_DEC_TRANSFORMS_INL_H_