libjxl

FORK: libjxl patches used on blog
git clone https://git.neptards.moe/blog/libjxl.git
Log | Files | Refs | Submodules | README | LICENSE

enc_transforms-inl.h (26861B)


      1 // Copyright (c) the JPEG XL Project Authors. All rights reserved.
      2 //
      3 // Use of this source code is governed by a BSD-style
      4 // license that can be found in the LICENSE file.
      5 
      6 #if defined(LIB_JXL_ENC_TRANSFORMS_INL_H_) == defined(HWY_TARGET_TOGGLE)
      7 #ifdef LIB_JXL_ENC_TRANSFORMS_INL_H_
      8 #undef LIB_JXL_ENC_TRANSFORMS_INL_H_
      9 #else
     10 #define LIB_JXL_ENC_TRANSFORMS_INL_H_
     11 #endif
     12 
     13 #include <stddef.h>
     14 
     15 #include <hwy/highway.h>
     16 
     17 #include "lib/jxl/ac_strategy.h"
     18 #include "lib/jxl/coeff_order_fwd.h"
     19 #include "lib/jxl/dct-inl.h"
     20 #include "lib/jxl/dct_scales.h"
     21 HWY_BEFORE_NAMESPACE();
     22 namespace jxl {
     23 namespace HWY_NAMESPACE {
     24 namespace {
     25 
     26 // Inverse of ReinterpretingDCT.
     27 template <size_t DCT_ROWS, size_t DCT_COLS, size_t LF_ROWS, size_t LF_COLS,
     28           size_t ROWS, size_t COLS>
     29 HWY_INLINE void ReinterpretingIDCT(const float* input,
     30                                    const size_t input_stride, float* output,
     31                                    const size_t output_stride) {
     32   HWY_ALIGN float block[ROWS * COLS] = {};
     33   if (ROWS < COLS) {
     34     for (size_t y = 0; y < LF_ROWS; y++) {
     35       for (size_t x = 0; x < LF_COLS; x++) {
     36         block[y * COLS + x] = input[y * input_stride + x] *
     37                               DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
     38                               DCTTotalResampleScale<DCT_COLS, COLS>(x);
     39       }
     40     }
     41   } else {
     42     for (size_t y = 0; y < LF_COLS; y++) {
     43       for (size_t x = 0; x < LF_ROWS; x++) {
     44         block[y * ROWS + x] = input[y * input_stride + x] *
     45                               DCTTotalResampleScale<DCT_COLS, COLS>(y) *
     46                               DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
     47       }
     48     }
     49   }
     50 
     51   // ROWS, COLS <= 8, so we can put scratch space on the stack.
     52   HWY_ALIGN float scratch_space[ROWS * COLS * 3];
     53   ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
     54                                   scratch_space);
     55 }
     56 
     57 template <size_t S>
     58 void DCT2TopBlock(const float* block, size_t stride, float* out) {
     59   static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
     60   static_assert(S % 2 == 0, "S should be even");
     61   float temp[kDCTBlockSize];
     62   constexpr size_t num_2x2 = S / 2;
     63   for (size_t y = 0; y < num_2x2; y++) {
     64     for (size_t x = 0; x < num_2x2; x++) {
     65       float c00 = block[y * 2 * stride + x * 2];
     66       float c01 = block[y * 2 * stride + x * 2 + 1];
     67       float c10 = block[(y * 2 + 1) * stride + x * 2];
     68       float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
     69       float r00 = c00 + c01 + c10 + c11;
     70       float r01 = c00 + c01 - c10 - c11;
     71       float r10 = c00 - c01 + c10 - c11;
     72       float r11 = c00 - c01 - c10 + c11;
     73       r00 *= 0.25f;
     74       r01 *= 0.25f;
     75       r10 *= 0.25f;
     76       r11 *= 0.25f;
     77       temp[y * kBlockDim + x] = r00;
     78       temp[y * kBlockDim + num_2x2 + x] = r01;
     79       temp[(y + num_2x2) * kBlockDim + x] = r10;
     80       temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
     81     }
     82   }
     83   for (size_t y = 0; y < S; y++) {
     84     for (size_t x = 0; x < S; x++) {
     85       out[y * kBlockDim + x] = temp[y * kBlockDim + x];
     86     }
     87   }
     88 }
     89 
     90 void AFVDCT4x4(const float* JXL_RESTRICT pixels, float* JXL_RESTRICT coeffs) {
     91   HWY_ALIGN static constexpr float k4x4AFVBasisTranspose[16][16] = {
     92       {
     93           0.2500000000000000,
     94           0.8769029297991420f,
     95           0.0000000000000000,
     96           0.0000000000000000,
     97           0.0000000000000000,
     98           -0.4105377591765233f,
     99           0.0000000000000000,
    100           0.0000000000000000,
    101           0.0000000000000000,
    102           0.0000000000000000,
    103           0.0000000000000000,
    104           0.0000000000000000,
    105           0.0000000000000000,
    106           0.0000000000000000,
    107           0.0000000000000000,
    108           0.0000000000000000,
    109       },
    110       {
    111           0.2500000000000000,
    112           0.2206518106944235f,
    113           0.0000000000000000,
    114           0.0000000000000000,
    115           -0.7071067811865474f,
    116           0.6235485373547691f,
    117           0.0000000000000000,
    118           0.0000000000000000,
    119           0.0000000000000000,
    120           0.0000000000000000,
    121           0.0000000000000000,
    122           0.0000000000000000,
    123           0.0000000000000000,
    124           0.0000000000000000,
    125           0.0000000000000000,
    126           0.0000000000000000,
    127       },
    128       {
    129           0.2500000000000000,
    130           -0.1014005039375376f,
    131           0.4067007583026075f,
    132           -0.2125574805828875f,
    133           0.0000000000000000,
    134           -0.0643507165794627f,
    135           -0.4517556589999482f,
    136           -0.3046847507248690f,
    137           0.3017929516615495f,
    138           0.4082482904638627f,
    139           0.1747866975480809f,
    140           -0.2110560104933578f,
    141           -0.1426608480880726f,
    142           -0.1381354035075859f,
    143           -0.1743760259965107f,
    144           0.1135498731499434f,
    145       },
    146       {
    147           0.2500000000000000,
    148           -0.1014005039375375f,
    149           0.4444481661973445f,
    150           0.3085497062849767f,
    151           0.0000000000000000f,
    152           -0.0643507165794627f,
    153           0.1585450355184006f,
    154           0.5112616136591823f,
    155           0.2579236279634118f,
    156           0.0000000000000000,
    157           0.0812611176717539f,
    158           0.1856718091610980f,
    159           -0.3416446842253372f,
    160           0.3302282550303788f,
    161           0.0702790691196284f,
    162           -0.0741750459581035f,
    163       },
    164       {
    165           0.2500000000000000,
    166           0.2206518106944236f,
    167           0.0000000000000000,
    168           0.0000000000000000,
    169           0.7071067811865476f,
    170           0.6235485373547694f,
    171           0.0000000000000000,
    172           0.0000000000000000,
    173           0.0000000000000000,
    174           0.0000000000000000,
    175           0.0000000000000000,
    176           0.0000000000000000,
    177           0.0000000000000000,
    178           0.0000000000000000,
    179           0.0000000000000000,
    180           0.0000000000000000,
    181       },
    182       {
    183           0.2500000000000000,
    184           -0.1014005039375378f,
    185           0.0000000000000000,
    186           0.4706702258572536f,
    187           0.0000000000000000,
    188           -0.0643507165794628f,
    189           -0.0403851516082220f,
    190           0.0000000000000000,
    191           0.1627234014286620f,
    192           0.0000000000000000,
    193           0.0000000000000000,
    194           0.0000000000000000,
    195           0.7367497537172237f,
    196           0.0875511500058708f,
    197           -0.2921026642334881f,
    198           0.1940289303259434f,
    199       },
    200       {
    201           0.2500000000000000,
    202           -0.1014005039375377f,
    203           0.1957439937204294f,
    204           -0.1621205195722993f,
    205           0.0000000000000000,
    206           -0.0643507165794628f,
    207           0.0074182263792424f,
    208           -0.2904801297289980f,
    209           0.0952002265347504f,
    210           0.0000000000000000,
    211           -0.3675398009862027f,
    212           0.4921585901373873f,
    213           0.2462710772207515f,
    214           -0.0794670660590957f,
    215           0.3623817333531167f,
    216           -0.4351904965232280f,
    217       },
    218       {
    219           0.2500000000000000,
    220           -0.1014005039375376f,
    221           0.2929100136981264f,
    222           0.0000000000000000,
    223           0.0000000000000000,
    224           -0.0643507165794627f,
    225           0.3935103426921017f,
    226           -0.0657870154914280f,
    227           0.0000000000000000,
    228           -0.4082482904638628f,
    229           -0.3078822139579090f,
    230           -0.3852501370925192f,
    231           -0.0857401903551931f,
    232           -0.4613374887461511f,
    233           0.0000000000000000,
    234           0.2191868483885747f,
    235       },
    236       {
    237           0.2500000000000000,
    238           -0.1014005039375376f,
    239           -0.4067007583026072f,
    240           -0.2125574805828705f,
    241           0.0000000000000000,
    242           -0.0643507165794627f,
    243           -0.4517556589999464f,
    244           0.3046847507248840f,
    245           0.3017929516615503f,
    246           -0.4082482904638635f,
    247           -0.1747866975480813f,
    248           0.2110560104933581f,
    249           -0.1426608480880734f,
    250           -0.1381354035075829f,
    251           -0.1743760259965108f,
    252           0.1135498731499426f,
    253       },
    254       {
    255           0.2500000000000000,
    256           -0.1014005039375377f,
    257           -0.1957439937204287f,
    258           -0.1621205195722833f,
    259           0.0000000000000000,
    260           -0.0643507165794628f,
    261           0.0074182263792444f,
    262           0.2904801297290076f,
    263           0.0952002265347505f,
    264           0.0000000000000000,
    265           0.3675398009862011f,
    266           -0.4921585901373891f,
    267           0.2462710772207514f,
    268           -0.0794670660591026f,
    269           0.3623817333531165f,
    270           -0.4351904965232251f,
    271       },
    272       {
    273           0.2500000000000000,
    274           -0.1014005039375375f,
    275           0.0000000000000000,
    276           -0.4706702258572528f,
    277           0.0000000000000000,
    278           -0.0643507165794627f,
    279           0.1107416575309343f,
    280           0.0000000000000000,
    281           -0.1627234014286617f,
    282           0.0000000000000000,
    283           0.0000000000000000,
    284           0.0000000000000000,
    285           0.1488339922711357f,
    286           0.4972464710953509f,
    287           0.2921026642334879f,
    288           0.5550443808910661f,
    289       },
    290       {
    291           0.2500000000000000,
    292           -0.1014005039375377f,
    293           0.1137907446044809f,
    294           -0.1464291867126764f,
    295           0.0000000000000000,
    296           -0.0643507165794628f,
    297           0.0829816309488205f,
    298           -0.2388977352334460f,
    299           -0.3531238544981630f,
    300           -0.4082482904638630f,
    301           0.4826689115059883f,
    302           0.1741941265991622f,
    303           -0.0476868035022925f,
    304           0.1253805944856366f,
    305           -0.4326608024727445f,
    306           -0.2546827712406646f,
    307       },
    308       {
    309           0.2500000000000000,
    310           -0.1014005039375377f,
    311           -0.4444481661973438f,
    312           0.3085497062849487f,
    313           0.0000000000000000,
    314           -0.0643507165794628f,
    315           0.1585450355183970f,
    316           -0.5112616136592012f,
    317           0.2579236279634129f,
    318           0.0000000000000000,
    319           -0.0812611176717504f,
    320           -0.1856718091610990f,
    321           -0.3416446842253373f,
    322           0.3302282550303805f,
    323           0.0702790691196282f,
    324           -0.0741750459581023f,
    325       },
    326       {
    327           0.2500000000000000,
    328           -0.1014005039375376f,
    329           -0.2929100136981264f,
    330           0.0000000000000000,
    331           0.0000000000000000,
    332           -0.0643507165794627f,
    333           0.3935103426921022f,
    334           0.0657870154914254f,
    335           0.0000000000000000,
    336           0.4082482904638634f,
    337           0.3078822139579031f,
    338           0.3852501370925211f,
    339           -0.0857401903551927f,
    340           -0.4613374887461554f,
    341           0.0000000000000000,
    342           0.2191868483885728f,
    343       },
    344       {
    345           0.2500000000000000,
    346           -0.1014005039375376f,
    347           -0.1137907446044814f,
    348           -0.1464291867126654f,
    349           0.0000000000000000,
    350           -0.0643507165794627f,
    351           0.0829816309488214f,
    352           0.2388977352334547f,
    353           -0.3531238544981624f,
    354           0.4082482904638630f,
    355           -0.4826689115059858f,
    356           -0.1741941265991621f,
    357           -0.0476868035022928f,
    358           0.1253805944856431f,
    359           -0.4326608024727457f,
    360           -0.2546827712406641f,
    361       },
    362       {
    363           0.2500000000000000,
    364           -0.1014005039375374f,
    365           0.0000000000000000,
    366           0.4251149611657548f,
    367           0.0000000000000000,
    368           -0.0643507165794626f,
    369           -0.4517556589999480f,
    370           0.0000000000000000,
    371           -0.6035859033230976f,
    372           0.0000000000000000,
    373           0.0000000000000000,
    374           0.0000000000000000,
    375           -0.1426608480880724f,
    376           -0.1381354035075845f,
    377           0.3487520519930227f,
    378           0.1135498731499429f,
    379       },
    380   };
    381 
    382   const HWY_CAPPED(float, 16) d;
    383   for (size_t i = 0; i < 16; i += Lanes(d)) {
    384     auto scalar = Zero(d);
    385     for (size_t j = 0; j < 16; j++) {
    386       auto px = Set(d, pixels[j]);
    387       auto basis = Load(d, k4x4AFVBasisTranspose[j] + i);
    388       scalar = MulAdd(px, basis, scalar);
    389     }
    390     Store(scalar, d, coeffs + i);
    391   }
    392 }
    393 
    394 // Coefficient layout:
    395 //  - (even, even) positions hold AFV coefficients
    396 //  - (odd, even) positions hold DCT4x4 coefficients
    397 //  - (any, odd) positions hold DCT4x8 coefficients
    398 template <size_t afv_kind>
    399 void AFVTransformFromPixels(const float* JXL_RESTRICT pixels,
    400                             size_t pixels_stride,
    401                             float* JXL_RESTRICT coefficients) {
    402   HWY_ALIGN float scratch_space[4 * 8 * 5];
    403   size_t afv_x = afv_kind & 1;
    404   size_t afv_y = afv_kind / 2;
    405   HWY_ALIGN float block[4 * 8] = {};
    406   for (size_t iy = 0; iy < 4; iy++) {
    407     for (size_t ix = 0; ix < 4; ix++) {
    408       block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
    409           pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
    410     }
    411   }
    412   // AFV coefficients in (even, even) positions.
    413   HWY_ALIGN float coeff[4 * 4];
    414   AFVDCT4x4(block, coeff);
    415   for (size_t iy = 0; iy < 4; iy++) {
    416     for (size_t ix = 0; ix < 4; ix++) {
    417       coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
    418     }
    419   }
    420   // 4x4 DCT of the block with same y and different x.
    421   ComputeScaledDCT<4, 4>()(
    422       DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
    423               pixels_stride),
    424       block, scratch_space);
    425   // ... in (odd, even) positions.
    426   for (size_t iy = 0; iy < 4; iy++) {
    427     for (size_t ix = 0; ix < 8; ix++) {
    428       coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
    429     }
    430   }
    431   // 4x8 DCT of the other half of the block.
    432   ComputeScaledDCT<4, 8>()(
    433       DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
    434       block, scratch_space);
    435   for (size_t iy = 0; iy < 4; iy++) {
    436     for (size_t ix = 0; ix < 8; ix++) {
    437       coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
    438     }
    439   }
    440   float block00 = coefficients[0] * 0.25f;
    441   float block01 = coefficients[1];
    442   float block10 = coefficients[8];
    443   coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
    444   coefficients[1] = (block00 - block01) * 0.5f;
    445   coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
    446 }
    447 
    448 HWY_MAYBE_UNUSED void TransformFromPixels(const AcStrategy::Type strategy,
    449                                           const float* JXL_RESTRICT pixels,
    450                                           size_t pixels_stride,
    451                                           float* JXL_RESTRICT coefficients,
    452                                           float* JXL_RESTRICT scratch_space) {
    453   using Type = AcStrategy::Type;
    454   switch (strategy) {
    455     case Type::IDENTITY: {
    456       for (size_t y = 0; y < 2; y++) {
    457         for (size_t x = 0; x < 2; x++) {
    458           float block_dc = 0;
    459           for (size_t iy = 0; iy < 4; iy++) {
    460             for (size_t ix = 0; ix < 4; ix++) {
    461               block_dc += pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix];
    462             }
    463           }
    464           block_dc *= 1.0f / 16;
    465           for (size_t iy = 0; iy < 4; iy++) {
    466             for (size_t ix = 0; ix < 4; ix++) {
    467               if (ix == 1 && iy == 1) continue;
    468               coefficients[(y + iy * 2) * 8 + x + ix * 2] =
    469                   pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix] -
    470                   pixels[(y * 4 + 1) * pixels_stride + x * 4 + 1];
    471             }
    472           }
    473           coefficients[(y + 2) * 8 + x + 2] = coefficients[y * 8 + x];
    474           coefficients[y * 8 + x] = block_dc;
    475         }
    476       }
    477       float block00 = coefficients[0];
    478       float block01 = coefficients[1];
    479       float block10 = coefficients[8];
    480       float block11 = coefficients[9];
    481       coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
    482       coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
    483       coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
    484       coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
    485       break;
    486     }
    487     case Type::DCT8X4: {
    488       for (size_t x = 0; x < 2; x++) {
    489         HWY_ALIGN float block[4 * 8];
    490         ComputeScaledDCT<8, 4>()(DCTFrom(pixels + x * 4, pixels_stride), block,
    491                                  scratch_space);
    492         for (size_t iy = 0; iy < 4; iy++) {
    493           for (size_t ix = 0; ix < 8; ix++) {
    494             // Store transposed.
    495             coefficients[(x + iy * 2) * 8 + ix] = block[iy * 8 + ix];
    496           }
    497         }
    498       }
    499       float block0 = coefficients[0];
    500       float block1 = coefficients[8];
    501       coefficients[0] = (block0 + block1) * 0.5f;
    502       coefficients[8] = (block0 - block1) * 0.5f;
    503       break;
    504     }
    505     case Type::DCT4X8: {
    506       for (size_t y = 0; y < 2; y++) {
    507         HWY_ALIGN float block[4 * 8];
    508         ComputeScaledDCT<4, 8>()(
    509             DCTFrom(pixels + y * 4 * pixels_stride, pixels_stride), block,
    510             scratch_space);
    511         for (size_t iy = 0; iy < 4; iy++) {
    512           for (size_t ix = 0; ix < 8; ix++) {
    513             coefficients[(y + iy * 2) * 8 + ix] = block[iy * 8 + ix];
    514           }
    515         }
    516       }
    517       float block0 = coefficients[0];
    518       float block1 = coefficients[8];
    519       coefficients[0] = (block0 + block1) * 0.5f;
    520       coefficients[8] = (block0 - block1) * 0.5f;
    521       break;
    522     }
    523     case Type::DCT4X4: {
    524       for (size_t y = 0; y < 2; y++) {
    525         for (size_t x = 0; x < 2; x++) {
    526           HWY_ALIGN float block[4 * 4];
    527           ComputeScaledDCT<4, 4>()(
    528               DCTFrom(pixels + y * 4 * pixels_stride + x * 4, pixels_stride),
    529               block, scratch_space);
    530           for (size_t iy = 0; iy < 4; iy++) {
    531             for (size_t ix = 0; ix < 4; ix++) {
    532               coefficients[(y + iy * 2) * 8 + x + ix * 2] = block[iy * 4 + ix];
    533             }
    534           }
    535         }
    536       }
    537       float block00 = coefficients[0];
    538       float block01 = coefficients[1];
    539       float block10 = coefficients[8];
    540       float block11 = coefficients[9];
    541       coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
    542       coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
    543       coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
    544       coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
    545       break;
    546     }
    547     case Type::DCT2X2: {
    548       DCT2TopBlock<8>(pixels, pixels_stride, coefficients);
    549       DCT2TopBlock<4>(coefficients, kBlockDim, coefficients);
    550       DCT2TopBlock<2>(coefficients, kBlockDim, coefficients);
    551       break;
    552     }
    553     case Type::DCT16X16: {
    554       ComputeScaledDCT<16, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
    555                                  scratch_space);
    556       break;
    557     }
    558     case Type::DCT16X8: {
    559       ComputeScaledDCT<16, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
    560                                 scratch_space);
    561       break;
    562     }
    563     case Type::DCT8X16: {
    564       ComputeScaledDCT<8, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
    565                                 scratch_space);
    566       break;
    567     }
    568     case Type::DCT32X8: {
    569       ComputeScaledDCT<32, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
    570                                 scratch_space);
    571       break;
    572     }
    573     case Type::DCT8X32: {
    574       ComputeScaledDCT<8, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
    575                                 scratch_space);
    576       break;
    577     }
    578     case Type::DCT32X16: {
    579       ComputeScaledDCT<32, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
    580                                  scratch_space);
    581       break;
    582     }
    583     case Type::DCT16X32: {
    584       ComputeScaledDCT<16, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
    585                                  scratch_space);
    586       break;
    587     }
    588     case Type::DCT32X32: {
    589       ComputeScaledDCT<32, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
    590                                  scratch_space);
    591       break;
    592     }
    593     case Type::DCT: {
    594       ComputeScaledDCT<8, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
    595                                scratch_space);
    596       break;
    597     }
    598     case Type::AFV0: {
    599       AFVTransformFromPixels<0>(pixels, pixels_stride, coefficients);
    600       break;
    601     }
    602     case Type::AFV1: {
    603       AFVTransformFromPixels<1>(pixels, pixels_stride, coefficients);
    604       break;
    605     }
    606     case Type::AFV2: {
    607       AFVTransformFromPixels<2>(pixels, pixels_stride, coefficients);
    608       break;
    609     }
    610     case Type::AFV3: {
    611       AFVTransformFromPixels<3>(pixels, pixels_stride, coefficients);
    612       break;
    613     }
    614     case Type::DCT64X64: {
    615       ComputeScaledDCT<64, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
    616                                  scratch_space);
    617       break;
    618     }
    619     case Type::DCT64X32: {
    620       ComputeScaledDCT<64, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
    621                                  scratch_space);
    622       break;
    623     }
    624     case Type::DCT32X64: {
    625       ComputeScaledDCT<32, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
    626                                  scratch_space);
    627       break;
    628     }
    629     case Type::DCT128X128: {
    630       ComputeScaledDCT<128, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
    631                                    scratch_space);
    632       break;
    633     }
    634     case Type::DCT128X64: {
    635       ComputeScaledDCT<128, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
    636                                   scratch_space);
    637       break;
    638     }
    639     case Type::DCT64X128: {
    640       ComputeScaledDCT<64, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
    641                                   scratch_space);
    642       break;
    643     }
    644     case Type::DCT256X256: {
    645       ComputeScaledDCT<256, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
    646                                    scratch_space);
    647       break;
    648     }
    649     case Type::DCT256X128: {
    650       ComputeScaledDCT<256, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
    651                                    scratch_space);
    652       break;
    653     }
    654     case Type::DCT128X256: {
    655       ComputeScaledDCT<128, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
    656                                    scratch_space);
    657       break;
    658     }
    659     case Type::kNumValidStrategies:
    660       JXL_UNREACHABLE("Invalid strategy");
    661   }
    662 }
    663 
    664 HWY_MAYBE_UNUSED void DCFromLowestFrequencies(const AcStrategy::Type strategy,
    665                                               const float* block, float* dc,
    666                                               size_t dc_stride) {
    667   using Type = AcStrategy::Type;
    668   switch (strategy) {
    669     case Type::DCT16X8: {
    670       ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/kBlockDim,
    671                          /*LF_ROWS=*/2, /*LF_COLS=*/1, /*ROWS=*/2, /*COLS=*/1>(
    672           block, 2 * kBlockDim, dc, dc_stride);
    673       break;
    674     }
    675     case Type::DCT8X16: {
    676       ReinterpretingIDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
    677                          /*LF_ROWS=*/1, /*LF_COLS=*/2, /*ROWS=*/1, /*COLS=*/2>(
    678           block, 2 * kBlockDim, dc, dc_stride);
    679       break;
    680     }
    681     case Type::DCT16X16: {
    682       ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
    683                          /*LF_ROWS=*/2, /*LF_COLS=*/2, /*ROWS=*/2, /*COLS=*/2>(
    684           block, 2 * kBlockDim, dc, dc_stride);
    685       break;
    686     }
    687     case Type::DCT32X8: {
    688       ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/kBlockDim,
    689                          /*LF_ROWS=*/4, /*LF_COLS=*/1, /*ROWS=*/4, /*COLS=*/1>(
    690           block, 4 * kBlockDim, dc, dc_stride);
    691       break;
    692     }
    693     case Type::DCT8X32: {
    694       ReinterpretingIDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
    695                          /*LF_ROWS=*/1, /*LF_COLS=*/4, /*ROWS=*/1, /*COLS=*/4>(
    696           block, 4 * kBlockDim, dc, dc_stride);
    697       break;
    698     }
    699     case Type::DCT32X16: {
    700       ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
    701                          /*LF_ROWS=*/4, /*LF_COLS=*/2, /*ROWS=*/4, /*COLS=*/2>(
    702           block, 4 * kBlockDim, dc, dc_stride);
    703       break;
    704     }
    705     case Type::DCT16X32: {
    706       ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
    707                          /*LF_ROWS=*/2, /*LF_COLS=*/4, /*ROWS=*/2, /*COLS=*/4>(
    708           block, 4 * kBlockDim, dc, dc_stride);
    709       break;
    710     }
    711     case Type::DCT32X32: {
    712       ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
    713                          /*LF_ROWS=*/4, /*LF_COLS=*/4, /*ROWS=*/4, /*COLS=*/4>(
    714           block, 4 * kBlockDim, dc, dc_stride);
    715       break;
    716     }
    717     case Type::DCT64X32: {
    718       ReinterpretingIDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
    719                          /*LF_ROWS=*/8, /*LF_COLS=*/4, /*ROWS=*/8, /*COLS=*/4>(
    720           block, 8 * kBlockDim, dc, dc_stride);
    721       break;
    722     }
    723     case Type::DCT32X64: {
    724       ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
    725                          /*LF_ROWS=*/4, /*LF_COLS=*/8, /*ROWS=*/4, /*COLS=*/8>(
    726           block, 8 * kBlockDim, dc, dc_stride);
    727       break;
    728     }
    729     case Type::DCT64X64: {
    730       ReinterpretingIDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
    731                          /*LF_ROWS=*/8, /*LF_COLS=*/8, /*ROWS=*/8, /*COLS=*/8>(
    732           block, 8 * kBlockDim, dc, dc_stride);
    733       break;
    734     }
    735     case Type::DCT128X64: {
    736       ReinterpretingIDCT<
    737           /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
    738           /*LF_ROWS=*/16, /*LF_COLS=*/8, /*ROWS=*/16, /*COLS=*/8>(
    739           block, 16 * kBlockDim, dc, dc_stride);
    740       break;
    741     }
    742     case Type::DCT64X128: {
    743       ReinterpretingIDCT<
    744           /*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
    745           /*LF_ROWS=*/8, /*LF_COLS=*/16, /*ROWS=*/8, /*COLS=*/16>(
    746           block, 16 * kBlockDim, dc, dc_stride);
    747       break;
    748     }
    749     case Type::DCT128X128: {
    750       ReinterpretingIDCT<
    751           /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
    752           /*LF_ROWS=*/16, /*LF_COLS=*/16, /*ROWS=*/16, /*COLS=*/16>(
    753           block, 16 * kBlockDim, dc, dc_stride);
    754       break;
    755     }
    756     case Type::DCT256X128: {
    757       ReinterpretingIDCT<
    758           /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
    759           /*LF_ROWS=*/32, /*LF_COLS=*/16, /*ROWS=*/32, /*COLS=*/16>(
    760           block, 32 * kBlockDim, dc, dc_stride);
    761       break;
    762     }
    763     case Type::DCT128X256: {
    764       ReinterpretingIDCT<
    765           /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
    766           /*LF_ROWS=*/16, /*LF_COLS=*/32, /*ROWS=*/16, /*COLS=*/32>(
    767           block, 32 * kBlockDim, dc, dc_stride);
    768       break;
    769     }
    770     case Type::DCT256X256: {
    771       ReinterpretingIDCT<
    772           /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
    773           /*LF_ROWS=*/32, /*LF_COLS=*/32, /*ROWS=*/32, /*COLS=*/32>(
    774           block, 32 * kBlockDim, dc, dc_stride);
    775       break;
    776     }
    777     case Type::DCT:
    778     case Type::DCT2X2:
    779     case Type::DCT4X4:
    780     case Type::DCT4X8:
    781     case Type::DCT8X4:
    782     case Type::AFV0:
    783     case Type::AFV1:
    784     case Type::AFV2:
    785     case Type::AFV3:
    786     case Type::IDENTITY:
    787       dc[0] = block[0];
    788       break;
    789     case Type::kNumValidStrategies:
    790       JXL_UNREACHABLE("Invalid strategy");
    791   }
    792 }
    793 
    794 }  // namespace
    795 // NOLINTNEXTLINE(google-readability-namespace-comments)
    796 }  // namespace HWY_NAMESPACE
    797 }  // namespace jxl
    798 HWY_AFTER_NAMESPACE();
    799 
    800 #endif  // LIB_JXL_ENC_TRANSFORMS_INL_H_