libjxl

FORK: libjxl patches used on blog
git clone https://git.neptards.moe/blog/libjxl.git
Log | Files | Refs | Submodules | README | LICENSE

dec_transforms-inl.h (26791B)


      1 // Copyright (c) the JPEG XL Project Authors. All rights reserved.
      2 //
      3 // Use of this source code is governed by a BSD-style
      4 // license that can be found in the LICENSE file.
      5 
      6 #if defined(LIB_JXL_DEC_TRANSFORMS_INL_H_) == defined(HWY_TARGET_TOGGLE)
      7 #ifdef LIB_JXL_DEC_TRANSFORMS_INL_H_
      8 #undef LIB_JXL_DEC_TRANSFORMS_INL_H_
      9 #else
     10 #define LIB_JXL_DEC_TRANSFORMS_INL_H_
     11 #endif
     12 
     13 #include <stddef.h>
     14 
     15 #include <hwy/highway.h>
     16 
     17 #include "lib/jxl/ac_strategy.h"
     18 #include "lib/jxl/coeff_order_fwd.h"
     19 #include "lib/jxl/dct-inl.h"
     20 #include "lib/jxl/dct_scales.h"
     21 HWY_BEFORE_NAMESPACE();
     22 namespace jxl {
     23 namespace HWY_NAMESPACE {
     24 namespace {
     25 
     26 // These templates are not found via ADL.
     27 using hwy::HWY_NAMESPACE::MulAdd;
     28 
     29 // Computes the lowest-frequency LF_ROWSxLF_COLS-sized square in output, which
     30 // is a DCT_ROWS*DCT_COLS-sized DCT block, by doing a ROWS*COLS DCT on the
     31 // input block.
     32 template <size_t DCT_ROWS, size_t DCT_COLS, size_t LF_ROWS, size_t LF_COLS,
     33           size_t ROWS, size_t COLS>
     34 JXL_INLINE void ReinterpretingDCT(const float* input, const size_t input_stride,
     35                                   float* output, const size_t output_stride,
     36                                   float* JXL_RESTRICT block,
     37                                   float* JXL_RESTRICT scratch_space) {
     38   static_assert(LF_ROWS == ROWS,
     39                 "ReinterpretingDCT should only be called with LF == N");
     40   static_assert(LF_COLS == COLS,
     41                 "ReinterpretingDCT should only be called with LF == N");
     42   ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
     43                                  scratch_space);
     44   if (ROWS < COLS) {
     45     for (size_t y = 0; y < LF_ROWS; y++) {
     46       for (size_t x = 0; x < LF_COLS; x++) {
     47         output[y * output_stride + x] =
     48             block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
     49             DCTTotalResampleScale<COLS, DCT_COLS>(x);
     50       }
     51     }
     52   } else {
     53     for (size_t y = 0; y < LF_COLS; y++) {
     54       for (size_t x = 0; x < LF_ROWS; x++) {
     55         output[y * output_stride + x] =
     56             block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
     57             DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
     58       }
     59     }
     60   }
     61 }
     62 
     63 template <size_t S>
     64 void IDCT2TopBlock(const float* block, size_t stride_out, float* out) {
     65   static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
     66   static_assert(S % 2 == 0, "S should be even");
     67   float temp[kDCTBlockSize];
     68   constexpr size_t num_2x2 = S / 2;
     69   for (size_t y = 0; y < num_2x2; y++) {
     70     for (size_t x = 0; x < num_2x2; x++) {
     71       float c00 = block[y * kBlockDim + x];
     72       float c01 = block[y * kBlockDim + num_2x2 + x];
     73       float c10 = block[(y + num_2x2) * kBlockDim + x];
     74       float c11 = block[(y + num_2x2) * kBlockDim + num_2x2 + x];
     75       float r00 = c00 + c01 + c10 + c11;
     76       float r01 = c00 + c01 - c10 - c11;
     77       float r10 = c00 - c01 + c10 - c11;
     78       float r11 = c00 - c01 - c10 + c11;
     79       temp[y * 2 * kBlockDim + x * 2] = r00;
     80       temp[y * 2 * kBlockDim + x * 2 + 1] = r01;
     81       temp[(y * 2 + 1) * kBlockDim + x * 2] = r10;
     82       temp[(y * 2 + 1) * kBlockDim + x * 2 + 1] = r11;
     83     }
     84   }
     85   for (size_t y = 0; y < S; y++) {
     86     for (size_t x = 0; x < S; x++) {
     87       out[y * stride_out + x] = temp[y * kBlockDim + x];
     88     }
     89   }
     90 }
     91 
     92 void AFVIDCT4x4(const float* JXL_RESTRICT coeffs, float* JXL_RESTRICT pixels) {
     93   HWY_ALIGN static constexpr float k4x4AFVBasis[16][16] = {
     94       {
     95           0.25,
     96           0.25,
     97           0.25,
     98           0.25,
     99           0.25,
    100           0.25,
    101           0.25,
    102           0.25,
    103           0.25,
    104           0.25,
    105           0.25,
    106           0.25,
    107           0.25,
    108           0.25,
    109           0.25,
    110           0.25,
    111       },
    112       {
    113           0.876902929799142f,
    114           0.2206518106944235f,
    115           -0.10140050393753763f,
    116           -0.1014005039375375f,
    117           0.2206518106944236f,
    118           -0.10140050393753777f,
    119           -0.10140050393753772f,
    120           -0.10140050393753763f,
    121           -0.10140050393753758f,
    122           -0.10140050393753769f,
    123           -0.1014005039375375f,
    124           -0.10140050393753768f,
    125           -0.10140050393753768f,
    126           -0.10140050393753759f,
    127           -0.10140050393753763f,
    128           -0.10140050393753741f,
    129       },
    130       {
    131           0.0,
    132           0.0,
    133           0.40670075830260755f,
    134           0.44444816619734445f,
    135           0.0,
    136           0.0,
    137           0.19574399372042936f,
    138           0.2929100136981264f,
    139           -0.40670075830260716f,
    140           -0.19574399372042872f,
    141           0.0,
    142           0.11379074460448091f,
    143           -0.44444816619734384f,
    144           -0.29291001369812636f,
    145           -0.1137907446044814f,
    146           0.0,
    147       },
    148       {
    149           0.0,
    150           0.0,
    151           -0.21255748058288748f,
    152           0.3085497062849767f,
    153           0.0,
    154           0.4706702258572536f,
    155           -0.1621205195722993f,
    156           0.0,
    157           -0.21255748058287047f,
    158           -0.16212051957228327f,
    159           -0.47067022585725277f,
    160           -0.1464291867126764f,
    161           0.3085497062849487f,
    162           0.0,
    163           -0.14642918671266536f,
    164           0.4251149611657548f,
    165       },
    166       {
    167           0.0,
    168           -0.7071067811865474f,
    169           0.0,
    170           0.0,
    171           0.7071067811865476f,
    172           0.0,
    173           0.0,
    174           0.0,
    175           0.0,
    176           0.0,
    177           0.0,
    178           0.0,
    179           0.0,
    180           0.0,
    181           0.0,
    182           0.0,
    183       },
    184       {
    185           -0.4105377591765233f,
    186           0.6235485373547691f,
    187           -0.06435071657946274f,
    188           -0.06435071657946266f,
    189           0.6235485373547694f,
    190           -0.06435071657946284f,
    191           -0.0643507165794628f,
    192           -0.06435071657946274f,
    193           -0.06435071657946272f,
    194           -0.06435071657946279f,
    195           -0.06435071657946266f,
    196           -0.06435071657946277f,
    197           -0.06435071657946277f,
    198           -0.06435071657946273f,
    199           -0.06435071657946274f,
    200           -0.0643507165794626f,
    201       },
    202       {
    203           0.0,
    204           0.0,
    205           -0.4517556589999482f,
    206           0.15854503551840063f,
    207           0.0,
    208           -0.04038515160822202f,
    209           0.0074182263792423875f,
    210           0.39351034269210167f,
    211           -0.45175565899994635f,
    212           0.007418226379244351f,
    213           0.1107416575309343f,
    214           0.08298163094882051f,
    215           0.15854503551839705f,
    216           0.3935103426921022f,
    217           0.0829816309488214f,
    218           -0.45175565899994796f,
    219       },
    220       {
    221           0.0,
    222           0.0,
    223           -0.304684750724869f,
    224           0.5112616136591823f,
    225           0.0,
    226           0.0,
    227           -0.290480129728998f,
    228           -0.06578701549142804f,
    229           0.304684750724884f,
    230           0.2904801297290076f,
    231           0.0,
    232           -0.23889773523344604f,
    233           -0.5112616136592012f,
    234           0.06578701549142545f,
    235           0.23889773523345467f,
    236           0.0,
    237       },
    238       {
    239           0.0,
    240           0.0,
    241           0.3017929516615495f,
    242           0.25792362796341184f,
    243           0.0,
    244           0.16272340142866204f,
    245           0.09520022653475037f,
    246           0.0,
    247           0.3017929516615503f,
    248           0.09520022653475055f,
    249           -0.16272340142866173f,
    250           -0.35312385449816297f,
    251           0.25792362796341295f,
    252           0.0,
    253           -0.3531238544981624f,
    254           -0.6035859033230976f,
    255       },
    256       {
    257           0.0,
    258           0.0,
    259           0.40824829046386274f,
    260           0.0,
    261           0.0,
    262           0.0,
    263           0.0,
    264           -0.4082482904638628f,
    265           -0.4082482904638635f,
    266           0.0,
    267           0.0,
    268           -0.40824829046386296f,
    269           0.0,
    270           0.4082482904638634f,
    271           0.408248290463863f,
    272           0.0,
    273       },
    274       {
    275           0.0,
    276           0.0,
    277           0.1747866975480809f,
    278           0.0812611176717539f,
    279           0.0,
    280           0.0,
    281           -0.3675398009862027f,
    282           -0.307882213957909f,
    283           -0.17478669754808135f,
    284           0.3675398009862011f,
    285           0.0,
    286           0.4826689115059883f,
    287           -0.08126111767175039f,
    288           0.30788221395790305f,
    289           -0.48266891150598584f,
    290           0.0,
    291       },
    292       {
    293           0.0,
    294           0.0,
    295           -0.21105601049335784f,
    296           0.18567180916109802f,
    297           0.0,
    298           0.0,
    299           0.49215859013738733f,
    300           -0.38525013709251915f,
    301           0.21105601049335806f,
    302           -0.49215859013738905f,
    303           0.0,
    304           0.17419412659916217f,
    305           -0.18567180916109904f,
    306           0.3852501370925211f,
    307           -0.1741941265991621f,
    308           0.0,
    309       },
    310       {
    311           0.0,
    312           0.0,
    313           -0.14266084808807264f,
    314           -0.3416446842253372f,
    315           0.0,
    316           0.7367497537172237f,
    317           0.24627107722075148f,
    318           -0.08574019035519306f,
    319           -0.14266084808807344f,
    320           0.24627107722075137f,
    321           0.14883399227113567f,
    322           -0.04768680350229251f,
    323           -0.3416446842253373f,
    324           -0.08574019035519267f,
    325           -0.047686803502292804f,
    326           -0.14266084808807242f,
    327       },
    328       {
    329           0.0,
    330           0.0,
    331           -0.13813540350758585f,
    332           0.3302282550303788f,
    333           0.0,
    334           0.08755115000587084f,
    335           -0.07946706605909573f,
    336           -0.4613374887461511f,
    337           -0.13813540350758294f,
    338           -0.07946706605910261f,
    339           0.49724647109535086f,
    340           0.12538059448563663f,
    341           0.3302282550303805f,
    342           -0.4613374887461554f,
    343           0.12538059448564315f,
    344           -0.13813540350758452f,
    345       },
    346       {
    347           0.0,
    348           0.0,
    349           -0.17437602599651067f,
    350           0.0702790691196284f,
    351           0.0,
    352           -0.2921026642334881f,
    353           0.3623817333531167f,
    354           0.0,
    355           -0.1743760259965108f,
    356           0.36238173335311646f,
    357           0.29210266423348785f,
    358           -0.4326608024727445f,
    359           0.07027906911962818f,
    360           0.0,
    361           -0.4326608024727457f,
    362           0.34875205199302267f,
    363       },
    364       {
    365           0.0,
    366           0.0,
    367           0.11354987314994337f,
    368           -0.07417504595810355f,
    369           0.0,
    370           0.19402893032594343f,
    371           -0.435190496523228f,
    372           0.21918684838857466f,
    373           0.11354987314994257f,
    374           -0.4351904965232251f,
    375           0.5550443808910661f,
    376           -0.25468277124066463f,
    377           -0.07417504595810233f,
    378           0.2191868483885728f,
    379           -0.25468277124066413f,
    380           0.1135498731499429f,
    381       },
    382   };
    383 
    384   const HWY_CAPPED(float, 16) d;
    385   for (size_t i = 0; i < 16; i += Lanes(d)) {
    386     auto pixel = Zero(d);
    387     for (size_t j = 0; j < 16; j++) {
    388       auto cf = Set(d, coeffs[j]);
    389       auto basis = Load(d, k4x4AFVBasis[j] + i);
    390       pixel = MulAdd(cf, basis, pixel);
    391     }
    392     Store(pixel, d, pixels + i);
    393   }
    394 }
    395 
    396 template <size_t afv_kind>
    397 void AFVTransformToPixels(const float* JXL_RESTRICT coefficients,
    398                           float* JXL_RESTRICT pixels, size_t pixels_stride) {
    399   HWY_ALIGN float scratch_space[4 * 8 * 4];
    400   size_t afv_x = afv_kind & 1;
    401   size_t afv_y = afv_kind / 2;
    402   float dcs[3] = {};
    403   float block00 = coefficients[0];
    404   float block01 = coefficients[1];
    405   float block10 = coefficients[8];
    406   dcs[0] = (block00 + block10 + block01) * 4.0f;
    407   dcs[1] = (block00 + block10 - block01);
    408   dcs[2] = block00 - block10;
    409   // IAFV: (even, even) positions.
    410   HWY_ALIGN float coeff[4 * 4];
    411   coeff[0] = dcs[0];
    412   for (size_t iy = 0; iy < 4; iy++) {
    413     for (size_t ix = 0; ix < 4; ix++) {
    414       if (ix == 0 && iy == 0) continue;
    415       coeff[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2];
    416     }
    417   }
    418   HWY_ALIGN float block[4 * 8];
    419   AFVIDCT4x4(coeff, block);
    420   for (size_t iy = 0; iy < 4; iy++) {
    421     for (size_t ix = 0; ix < 4; ix++) {
    422       pixels[(iy + afv_y * 4) * pixels_stride + afv_x * 4 + ix] =
    423           block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)];
    424     }
    425   }
    426   // IDCT4x4 in (odd, even) positions.
    427   block[0] = dcs[1];
    428   for (size_t iy = 0; iy < 4; iy++) {
    429     for (size_t ix = 0; ix < 4; ix++) {
    430       if (ix == 0 && iy == 0) continue;
    431       block[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2 + 1];
    432     }
    433   }
    434   ComputeScaledIDCT<4, 4>()(
    435       block,
    436       DCTTo(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
    437             pixels_stride),
    438       scratch_space);
    439   // IDCT4x8.
    440   block[0] = dcs[2];
    441   for (size_t iy = 0; iy < 4; iy++) {
    442     for (size_t ix = 0; ix < 8; ix++) {
    443       if (ix == 0 && iy == 0) continue;
    444       block[iy * 8 + ix] = coefficients[(1 + iy * 2) * 8 + ix];
    445     }
    446   }
    447   ComputeScaledIDCT<4, 8>()(
    448       block,
    449       DCTTo(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
    450       scratch_space);
    451 }
    452 
    453 HWY_MAYBE_UNUSED void TransformToPixels(const AcStrategy::Type strategy,
    454                                         float* JXL_RESTRICT coefficients,
    455                                         float* JXL_RESTRICT pixels,
    456                                         size_t pixels_stride,
    457                                         float* scratch_space) {
    458   using Type = AcStrategy::Type;
    459   switch (strategy) {
    460     case Type::IDENTITY: {
    461       float dcs[4] = {};
    462       float block00 = coefficients[0];
    463       float block01 = coefficients[1];
    464       float block10 = coefficients[8];
    465       float block11 = coefficients[9];
    466       dcs[0] = block00 + block01 + block10 + block11;
    467       dcs[1] = block00 + block01 - block10 - block11;
    468       dcs[2] = block00 - block01 + block10 - block11;
    469       dcs[3] = block00 - block01 - block10 + block11;
    470       for (size_t y = 0; y < 2; y++) {
    471         for (size_t x = 0; x < 2; x++) {
    472           float block_dc = dcs[y * 2 + x];
    473           float residual_sum = 0;
    474           for (size_t iy = 0; iy < 4; iy++) {
    475             for (size_t ix = 0; ix < 4; ix++) {
    476               if (ix == 0 && iy == 0) continue;
    477               residual_sum += coefficients[(y + iy * 2) * 8 + x + ix * 2];
    478             }
    479           }
    480           pixels[(4 * y + 1) * pixels_stride + 4 * x + 1] =
    481               block_dc - residual_sum * (1.0f / 16);
    482           for (size_t iy = 0; iy < 4; iy++) {
    483             for (size_t ix = 0; ix < 4; ix++) {
    484               if (ix == 1 && iy == 1) continue;
    485               pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix] =
    486                   coefficients[(y + iy * 2) * 8 + x + ix * 2] +
    487                   pixels[(4 * y + 1) * pixels_stride + 4 * x + 1];
    488             }
    489           }
    490           pixels[y * 4 * pixels_stride + x * 4] =
    491               coefficients[(y + 2) * 8 + x + 2] +
    492               pixels[(4 * y + 1) * pixels_stride + 4 * x + 1];
    493         }
    494       }
    495       break;
    496     }
    497     case Type::DCT8X4: {
    498       float dcs[2] = {};
    499       float block0 = coefficients[0];
    500       float block1 = coefficients[8];
    501       dcs[0] = block0 + block1;
    502       dcs[1] = block0 - block1;
    503       for (size_t x = 0; x < 2; x++) {
    504         HWY_ALIGN float block[4 * 8];
    505         block[0] = dcs[x];
    506         for (size_t iy = 0; iy < 4; iy++) {
    507           for (size_t ix = 0; ix < 8; ix++) {
    508             if (ix == 0 && iy == 0) continue;
    509             block[iy * 8 + ix] = coefficients[(x + iy * 2) * 8 + ix];
    510           }
    511         }
    512         ComputeScaledIDCT<8, 4>()(block, DCTTo(pixels + x * 4, pixels_stride),
    513                                   scratch_space);
    514       }
    515       break;
    516     }
    517     case Type::DCT4X8: {
    518       float dcs[2] = {};
    519       float block0 = coefficients[0];
    520       float block1 = coefficients[8];
    521       dcs[0] = block0 + block1;
    522       dcs[1] = block0 - block1;
    523       for (size_t y = 0; y < 2; y++) {
    524         HWY_ALIGN float block[4 * 8];
    525         block[0] = dcs[y];
    526         for (size_t iy = 0; iy < 4; iy++) {
    527           for (size_t ix = 0; ix < 8; ix++) {
    528             if (ix == 0 && iy == 0) continue;
    529             block[iy * 8 + ix] = coefficients[(y + iy * 2) * 8 + ix];
    530           }
    531         }
    532         ComputeScaledIDCT<4, 8>()(
    533             block, DCTTo(pixels + y * 4 * pixels_stride, pixels_stride),
    534             scratch_space);
    535       }
    536       break;
    537     }
    538     case Type::DCT4X4: {
    539       float dcs[4] = {};
    540       float block00 = coefficients[0];
    541       float block01 = coefficients[1];
    542       float block10 = coefficients[8];
    543       float block11 = coefficients[9];
    544       dcs[0] = block00 + block01 + block10 + block11;
    545       dcs[1] = block00 + block01 - block10 - block11;
    546       dcs[2] = block00 - block01 + block10 - block11;
    547       dcs[3] = block00 - block01 - block10 + block11;
    548       for (size_t y = 0; y < 2; y++) {
    549         for (size_t x = 0; x < 2; x++) {
    550           HWY_ALIGN float block[4 * 4];
    551           block[0] = dcs[y * 2 + x];
    552           for (size_t iy = 0; iy < 4; iy++) {
    553             for (size_t ix = 0; ix < 4; ix++) {
    554               if (ix == 0 && iy == 0) continue;
    555               block[iy * 4 + ix] = coefficients[(y + iy * 2) * 8 + x + ix * 2];
    556             }
    557           }
    558           ComputeScaledIDCT<4, 4>()(
    559               block,
    560               DCTTo(pixels + y * 4 * pixels_stride + x * 4, pixels_stride),
    561               scratch_space);
    562         }
    563       }
    564       break;
    565     }
    566     case Type::DCT2X2: {
    567       HWY_ALIGN float coeffs[kDCTBlockSize];
    568       memcpy(coeffs, coefficients, sizeof(float) * kDCTBlockSize);
    569       IDCT2TopBlock<2>(coeffs, kBlockDim, coeffs);
    570       IDCT2TopBlock<4>(coeffs, kBlockDim, coeffs);
    571       IDCT2TopBlock<8>(coeffs, kBlockDim, coeffs);
    572       for (size_t y = 0; y < kBlockDim; y++) {
    573         for (size_t x = 0; x < kBlockDim; x++) {
    574           pixels[y * pixels_stride + x] = coeffs[y * kBlockDim + x];
    575         }
    576       }
    577       break;
    578     }
    579     case Type::DCT16X16: {
    580       ComputeScaledIDCT<16, 16>()(coefficients, DCTTo(pixels, pixels_stride),
    581                                   scratch_space);
    582       break;
    583     }
    584     case Type::DCT16X8: {
    585       ComputeScaledIDCT<16, 8>()(coefficients, DCTTo(pixels, pixels_stride),
    586                                  scratch_space);
    587       break;
    588     }
    589     case Type::DCT8X16: {
    590       ComputeScaledIDCT<8, 16>()(coefficients, DCTTo(pixels, pixels_stride),
    591                                  scratch_space);
    592       break;
    593     }
    594     case Type::DCT32X8: {
    595       ComputeScaledIDCT<32, 8>()(coefficients, DCTTo(pixels, pixels_stride),
    596                                  scratch_space);
    597       break;
    598     }
    599     case Type::DCT8X32: {
    600       ComputeScaledIDCT<8, 32>()(coefficients, DCTTo(pixels, pixels_stride),
    601                                  scratch_space);
    602       break;
    603     }
    604     case Type::DCT32X16: {
    605       ComputeScaledIDCT<32, 16>()(coefficients, DCTTo(pixels, pixels_stride),
    606                                   scratch_space);
    607       break;
    608     }
    609     case Type::DCT16X32: {
    610       ComputeScaledIDCT<16, 32>()(coefficients, DCTTo(pixels, pixels_stride),
    611                                   scratch_space);
    612       break;
    613     }
    614     case Type::DCT32X32: {
    615       ComputeScaledIDCT<32, 32>()(coefficients, DCTTo(pixels, pixels_stride),
    616                                   scratch_space);
    617       break;
    618     }
    619     case Type::DCT: {
    620       ComputeScaledIDCT<8, 8>()(coefficients, DCTTo(pixels, pixels_stride),
    621                                 scratch_space);
    622       break;
    623     }
    624     case Type::AFV0: {
    625       AFVTransformToPixels<0>(coefficients, pixels, pixels_stride);
    626       break;
    627     }
    628     case Type::AFV1: {
    629       AFVTransformToPixels<1>(coefficients, pixels, pixels_stride);
    630       break;
    631     }
    632     case Type::AFV2: {
    633       AFVTransformToPixels<2>(coefficients, pixels, pixels_stride);
    634       break;
    635     }
    636     case Type::AFV3: {
    637       AFVTransformToPixels<3>(coefficients, pixels, pixels_stride);
    638       break;
    639     }
    640     case Type::DCT64X32: {
    641       ComputeScaledIDCT<64, 32>()(coefficients, DCTTo(pixels, pixels_stride),
    642                                   scratch_space);
    643       break;
    644     }
    645     case Type::DCT32X64: {
    646       ComputeScaledIDCT<32, 64>()(coefficients, DCTTo(pixels, pixels_stride),
    647                                   scratch_space);
    648       break;
    649     }
    650     case Type::DCT64X64: {
    651       ComputeScaledIDCT<64, 64>()(coefficients, DCTTo(pixels, pixels_stride),
    652                                   scratch_space);
    653       break;
    654     }
    655     case Type::DCT128X64: {
    656       ComputeScaledIDCT<128, 64>()(coefficients, DCTTo(pixels, pixels_stride),
    657                                    scratch_space);
    658       break;
    659     }
    660     case Type::DCT64X128: {
    661       ComputeScaledIDCT<64, 128>()(coefficients, DCTTo(pixels, pixels_stride),
    662                                    scratch_space);
    663       break;
    664     }
    665     case Type::DCT128X128: {
    666       ComputeScaledIDCT<128, 128>()(coefficients, DCTTo(pixels, pixels_stride),
    667                                     scratch_space);
    668       break;
    669     }
    670     case Type::DCT256X128: {
    671       ComputeScaledIDCT<256, 128>()(coefficients, DCTTo(pixels, pixels_stride),
    672                                     scratch_space);
    673       break;
    674     }
    675     case Type::DCT128X256: {
    676       ComputeScaledIDCT<128, 256>()(coefficients, DCTTo(pixels, pixels_stride),
    677                                     scratch_space);
    678       break;
    679     }
    680     case Type::DCT256X256: {
    681       ComputeScaledIDCT<256, 256>()(coefficients, DCTTo(pixels, pixels_stride),
    682                                     scratch_space);
    683       break;
    684     }
    685     case Type::kNumValidStrategies:
    686       JXL_UNREACHABLE("Invalid strategy");
    687   }
    688 }
    689 
    690 HWY_MAYBE_UNUSED void LowestFrequenciesFromDC(const AcStrategy::Type strategy,
    691                                               const float* dc, size_t dc_stride,
    692                                               float* llf,
    693                                               float* JXL_RESTRICT scratch) {
    694   using Type = AcStrategy::Type;
    695   HWY_ALIGN float warm_block[4 * 4];
    696   HWY_ALIGN float warm_scratch_space[4 * 4 * 4];
    697   switch (strategy) {
    698     case Type::DCT16X8: {
    699       ReinterpretingDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/kBlockDim,
    700                         /*LF_ROWS=*/2, /*LF_COLS=*/1, /*ROWS=*/2, /*COLS=*/1>(
    701           dc, dc_stride, llf, 2 * kBlockDim, warm_block, warm_scratch_space);
    702       break;
    703     }
    704     case Type::DCT8X16: {
    705       ReinterpretingDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
    706                         /*LF_ROWS=*/1, /*LF_COLS=*/2, /*ROWS=*/1, /*COLS=*/2>(
    707           dc, dc_stride, llf, 2 * kBlockDim, warm_block, warm_scratch_space);
    708       break;
    709     }
    710     case Type::DCT16X16: {
    711       ReinterpretingDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
    712                         /*LF_ROWS=*/2, /*LF_COLS=*/2, /*ROWS=*/2, /*COLS=*/2>(
    713           dc, dc_stride, llf, 2 * kBlockDim, warm_block, warm_scratch_space);
    714       break;
    715     }
    716     case Type::DCT32X8: {
    717       ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/kBlockDim,
    718                         /*LF_ROWS=*/4, /*LF_COLS=*/1, /*ROWS=*/4, /*COLS=*/1>(
    719           dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
    720       break;
    721     }
    722     case Type::DCT8X32: {
    723       ReinterpretingDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
    724                         /*LF_ROWS=*/1, /*LF_COLS=*/4, /*ROWS=*/1, /*COLS=*/4>(
    725           dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
    726       break;
    727     }
    728     case Type::DCT32X16: {
    729       ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
    730                         /*LF_ROWS=*/4, /*LF_COLS=*/2, /*ROWS=*/4, /*COLS=*/2>(
    731           dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
    732       break;
    733     }
    734     case Type::DCT16X32: {
    735       ReinterpretingDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
    736                         /*LF_ROWS=*/2, /*LF_COLS=*/4, /*ROWS=*/2, /*COLS=*/4>(
    737           dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
    738       break;
    739     }
    740     case Type::DCT32X32: {
    741       ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
    742                         /*LF_ROWS=*/4, /*LF_COLS=*/4, /*ROWS=*/4, /*COLS=*/4>(
    743           dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
    744       break;
    745     }
    746     case Type::DCT64X32: {
    747       ReinterpretingDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
    748                         /*LF_ROWS=*/8, /*LF_COLS=*/4, /*ROWS=*/8, /*COLS=*/4>(
    749           dc, dc_stride, llf, 8 * kBlockDim, scratch, scratch + 8 * 4);
    750       break;
    751     }
    752     case Type::DCT32X64: {
    753       ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
    754                         /*LF_ROWS=*/4, /*LF_COLS=*/8, /*ROWS=*/4, /*COLS=*/8>(
    755           dc, dc_stride, llf, 8 * kBlockDim, scratch, scratch + 4 * 8);
    756       break;
    757     }
    758     case Type::DCT64X64: {
    759       ReinterpretingDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
    760                         /*LF_ROWS=*/8, /*LF_COLS=*/8, /*ROWS=*/8, /*COLS=*/8>(
    761           dc, dc_stride, llf, 8 * kBlockDim, scratch, scratch + 8 * 8);
    762       break;
    763     }
    764     case Type::DCT128X64: {
    765       ReinterpretingDCT</*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
    766                         /*LF_ROWS=*/16, /*LF_COLS=*/8, /*ROWS=*/16, /*COLS=*/8>(
    767           dc, dc_stride, llf, 16 * kBlockDim, scratch, scratch + 16 * 8);
    768       break;
    769     }
    770     case Type::DCT64X128: {
    771       ReinterpretingDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
    772                         /*LF_ROWS=*/8, /*LF_COLS=*/16, /*ROWS=*/8, /*COLS=*/16>(
    773           dc, dc_stride, llf, 16 * kBlockDim, scratch, scratch + 8 * 16);
    774       break;
    775     }
    776     case Type::DCT128X128: {
    777       ReinterpretingDCT<
    778           /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
    779           /*LF_ROWS=*/16, /*LF_COLS=*/16, /*ROWS=*/16, /*COLS=*/16>(
    780           dc, dc_stride, llf, 16 * kBlockDim, scratch, scratch + 16 * 16);
    781       break;
    782     }
    783     case Type::DCT256X128: {
    784       ReinterpretingDCT<
    785           /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
    786           /*LF_ROWS=*/32, /*LF_COLS=*/16, /*ROWS=*/32, /*COLS=*/16>(
    787           dc, dc_stride, llf, 32 * kBlockDim, scratch, scratch + 32 * 16);
    788       break;
    789     }
    790     case Type::DCT128X256: {
    791       ReinterpretingDCT<
    792           /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
    793           /*LF_ROWS=*/16, /*LF_COLS=*/32, /*ROWS=*/16, /*COLS=*/32>(
    794           dc, dc_stride, llf, 32 * kBlockDim, scratch, scratch + 16 * 32);
    795       break;
    796     }
    797     case Type::DCT256X256: {
    798       ReinterpretingDCT<
    799           /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
    800           /*LF_ROWS=*/32, /*LF_COLS=*/32, /*ROWS=*/32, /*COLS=*/32>(
    801           dc, dc_stride, llf, 32 * kBlockDim, scratch, scratch + 32 * 32);
    802       break;
    803     }
    804     case Type::DCT:
    805     case Type::DCT2X2:
    806     case Type::DCT4X4:
    807     case Type::DCT4X8:
    808     case Type::DCT8X4:
    809     case Type::AFV0:
    810     case Type::AFV1:
    811     case Type::AFV2:
    812     case Type::AFV3:
    813     case Type::IDENTITY:
    814       llf[0] = dc[0];
    815       break;
    816     case Type::kNumValidStrategies:
    817       JXL_UNREACHABLE("Invalid strategy");
    818   };
    819 }
    820 
    821 }  // namespace
    822 // NOLINTNEXTLINE(google-readability-namespace-comments)
    823 }  // namespace HWY_NAMESPACE
    824 }  // namespace jxl
    825 HWY_AFTER_NAMESPACE();
    826 
    827 #endif  // LIB_JXL_DEC_TRANSFORMS_INL_H_