enc_coeff_order.cc - libjxl - FORK: libjxl patches used on blog

enc_coeff_order.cc (11017B)
      1 // Copyright (c) the JPEG XL Project Authors. All rights reserved.
      2 //
      3 // Use of this source code is governed by a BSD-style
      4 // license that can be found in the LICENSE file.
      5 
      6 #include <stdint.h>
      7 
      8 #include <algorithm>
      9 #include <cmath>
     10 #include <hwy/aligned_allocator.h>
     11 #include <vector>
     12 
     13 #include "lib/jxl/coeff_order.h"
     14 #include "lib/jxl/coeff_order_fwd.h"
     15 #include "lib/jxl/dct_util.h"
     16 #include "lib/jxl/enc_ans.h"
     17 #include "lib/jxl/enc_bit_writer.h"
     18 #include "lib/jxl/lehmer_code.h"
     19 
     20 namespace jxl {
     21 
     22 struct AuxOut;
     23 
     24 std::pair<uint32_t, uint32_t> ComputeUsedOrders(
     25     const SpeedTier speed, const AcStrategyImage& ac_strategy,
     26     const Rect& rect) {
     27   // No coefficient reordering in Falcon or faster.
     28   // Only uses DCT8 = 0, so bitfield = 1.
     29   if (speed >= SpeedTier::kFalcon) return {1, 1};
     30 
     31   uint32_t ret = 0;
     32   uint32_t ret_customize = 0;
     33   size_t xsize_blocks = rect.xsize();
     34   size_t ysize_blocks = rect.ysize();
     35   // TODO(veluca): precompute when doing DCT.
     36   for (size_t by = 0; by < ysize_blocks; ++by) {
     37     AcStrategyRow acs_row = ac_strategy.ConstRow(rect, by);
     38     for (size_t bx = 0; bx < xsize_blocks; ++bx) {
     39       int ord = kStrategyOrder[acs_row[bx].RawStrategy()];
     40       // Do not customize coefficient orders for blocks bigger than 32x32.
     41       ret |= 1u << ord;
     42       if (ord > 6) {
     43         continue;
     44       }
     45       ret_customize |= 1u << ord;
     46     }
     47   }
     48   // Use default orders for small images.
     49   if (ac_strategy.xsize() < 5 && ac_strategy.ysize() < 5) return {ret, 0};
     50   return {ret, ret_customize};
     51 }
     52 
     53 void ComputeCoeffOrder(SpeedTier speed, const ACImage& acs,
     54                        const AcStrategyImage& ac_strategy,
     55                        const FrameDimensions& frame_dim,
     56                        uint32_t& all_used_orders, uint32_t prev_used_acs,
     57                        uint32_t current_used_acs, uint32_t current_used_orders,
     58                        coeff_order_t* JXL_RESTRICT order) {
     59   std::vector<int32_t> num_zeros(kCoeffOrderMaxSize);
     60   // If compressing at high speed and only using 8x8 DCTs, only consider a
     61   // subset of blocks.
     62   double block_fraction = 1.0f;
     63   // TODO(veluca): figure out why sampling blocks if non-8x8s are used makes
     64   // encoding significantly less dense.
     65   if (speed >= SpeedTier::kSquirrel && current_used_orders == 1) {
     66     block_fraction = 0.5f;
     67   }
     68   // No need to compute number of zero coefficients if all orders are the
     69   // default.
     70   if (current_used_orders != 0) {
     71     uint64_t threshold =
     72         (std::numeric_limits<uint64_t>::max() >> 32) * block_fraction;
     73     uint64_t s[2] = {static_cast<uint64_t>(0x94D049BB133111EBull),
     74                      static_cast<uint64_t>(0xBF58476D1CE4E5B9ull)};
     75     // Xorshift128+ adapted from xorshift128+-inl.h
     76     auto use_sample = [&]() {
     77       auto s1 = s[0];
     78       const auto s0 = s[1];
     79       const auto bits = s1 + s0;  // b, c
     80       s[0] = s0;
     81       s1 ^= s1 << 23;
     82       s1 ^= s0 ^ (s1 >> 18) ^ (s0 >> 5);
     83       s[1] = s1;
     84       return (bits >> 32) <= threshold;
     85     };
     86 
     87     // Count number of zero coefficients, separately for each DCT band.
     88     // TODO(veluca): precompute when doing DCT.
     89     for (size_t group_index = 0; group_index < frame_dim.num_groups;
     90          group_index++) {
     91       const size_t gx = group_index % frame_dim.xsize_groups;
     92       const size_t gy = group_index / frame_dim.xsize_groups;
     93       const Rect rect(gx * kGroupDimInBlocks, gy * kGroupDimInBlocks,
     94                       kGroupDimInBlocks, kGroupDimInBlocks,
     95                       frame_dim.xsize_blocks, frame_dim.ysize_blocks);
     96       ConstACPtr rows[3];
     97       ACType type = acs.Type();
     98       for (size_t c = 0; c < 3; c++) {
     99         rows[c] = acs.PlaneRow(c, group_index, 0);
    100       }
    101       size_t ac_offset = 0;
    102 
    103       // TODO(veluca): SIMDfy.
    104       for (size_t by = 0; by < rect.ysize(); ++by) {
    105         AcStrategyRow acs_row = ac_strategy.ConstRow(rect, by);
    106         for (size_t bx = 0; bx < rect.xsize(); ++bx) {
    107           AcStrategy acs = acs_row[bx];
    108           if (!acs.IsFirstBlock()) continue;
    109           if (!use_sample()) continue;
    110           size_t size = kDCTBlockSize << acs.log2_covered_blocks();
    111           for (size_t c = 0; c < 3; ++c) {
    112             const size_t order_offset =
    113                 CoeffOrderOffset(kStrategyOrder[acs.RawStrategy()], c);
    114             if (type == ACType::k16) {
    115               for (size_t k = 0; k < size; k++) {
    116                 bool is_zero = rows[c].ptr16[ac_offset + k] == 0;
    117                 num_zeros[order_offset + k] += is_zero ? 1 : 0;
    118               }
    119             } else {
    120               for (size_t k = 0; k < size; k++) {
    121                 bool is_zero = rows[c].ptr32[ac_offset + k] == 0;
    122                 num_zeros[order_offset + k] += is_zero ? 1 : 0;
    123               }
    124             }
    125             // Ensure LLFs are first in the order.
    126             size_t cx = acs.covered_blocks_x();
    127             size_t cy = acs.covered_blocks_y();
    128             CoefficientLayout(&cy, &cx);
    129             for (size_t iy = 0; iy < cy; iy++) {
    130               for (size_t ix = 0; ix < cx; ix++) {
    131                 num_zeros[order_offset + iy * kBlockDim * cx + ix] = -1;
    132               }
    133             }
    134           }
    135           ac_offset += size;
    136         }
    137       }
    138     }
    139   }
    140   struct PosAndCount {
    141     uint32_t pos;
    142     uint32_t count;
    143   };
    144   auto mem = hwy::AllocateAligned<PosAndCount>(AcStrategy::kMaxCoeffArea);
    145 
    146   std::vector<coeff_order_t> natural_order_buffer;
    147 
    148   uint16_t computed = 0;
    149   for (uint8_t o = 0; o < AcStrategy::kNumValidStrategies; ++o) {
    150     uint8_t ord = kStrategyOrder[o];
    151     if (computed & (1 << ord)) continue;
    152     computed |= 1 << ord;
    153     AcStrategy acs = AcStrategy::FromRawStrategy(o);
    154     size_t sz = kDCTBlockSize * acs.covered_blocks_x() * acs.covered_blocks_y();
    155 
    156     // Do nothing for transforms that don't appear.
    157     if ((1 << ord) & ~current_used_acs) continue;
    158 
    159     // Do nothing if we already committed to this custom order previously.
    160     if ((1 << ord) & prev_used_acs) continue;
    161     if ((1 << ord) & all_used_orders) continue;
    162 
    163     if (natural_order_buffer.size() < sz) natural_order_buffer.resize(sz);
    164     acs.ComputeNaturalCoeffOrder(natural_order_buffer.data());
    165 
    166     // Ensure natural coefficient order is not permuted if the order is
    167     // not transmitted.
    168     if ((1 << ord) & ~current_used_orders) {
    169       for (size_t c = 0; c < 3; c++) {
    170         size_t offset = CoeffOrderOffset(ord, c);
    171         JXL_DASSERT(CoeffOrderOffset(ord, c + 1) - offset == sz);
    172         memcpy(&order[offset], natural_order_buffer.data(),
    173                sz * sizeof(*order));
    174       }
    175       continue;
    176     }
    177 
    178     bool is_nondefault = false;
    179     for (uint8_t c = 0; c < 3; c++) {
    180       // Apply zig-zag order.
    181       PosAndCount* pos_and_val = mem.get();
    182       size_t offset = CoeffOrderOffset(ord, c);
    183       JXL_DASSERT(CoeffOrderOffset(ord, c + 1) - offset == sz);
    184       float inv_sqrt_sz = 1.0f / std::sqrt(sz);
    185       for (size_t i = 0; i < sz; ++i) {
    186         size_t pos = natural_order_buffer[i];
    187         pos_and_val[i].pos = pos;
    188         // We don't care for the exact number -> quantize number of zeros,
    189         // to get less permuted order.
    190         pos_and_val[i].count = num_zeros[offset + pos] * inv_sqrt_sz + 0.1f;
    191       }
    192 
    193       // Stable-sort -> elements with same number of zeros will preserve their
    194       // order.
    195       auto comparator = [](const PosAndCount& a, const PosAndCount& b) -> bool {
    196         return a.count < b.count;
    197       };
    198       std::stable_sort(pos_and_val, pos_and_val + sz, comparator);
    199 
    200       // Grab indices.
    201       for (size_t i = 0; i < sz; ++i) {
    202         order[offset + i] = pos_and_val[i].pos;
    203         is_nondefault |= natural_order_buffer[i] != pos_and_val[i].pos;
    204       }
    205     }
    206     if (!is_nondefault) {
    207       current_used_orders &= ~(1 << ord);
    208     }
    209   }
    210   all_used_orders |= current_used_orders;
    211 }
    212 
    213 namespace {
    214 
    215 void TokenizePermutation(const coeff_order_t* JXL_RESTRICT order, size_t skip,
    216                          size_t size, std::vector<Token>* tokens) {
    217   std::vector<LehmerT> lehmer(size);
    218   std::vector<uint32_t> temp(size + 1);
    219   ComputeLehmerCode(order, temp.data(), size, lehmer.data());
    220   size_t end = size;
    221   while (end > skip && lehmer[end - 1] == 0) {
    222     --end;
    223   }
    224   tokens->emplace_back(CoeffOrderContext(size), end - skip);
    225   uint32_t last = 0;
    226   for (size_t i = skip; i < end; ++i) {
    227     tokens->emplace_back(CoeffOrderContext(last), lehmer[i]);
    228     last = lehmer[i];
    229   }
    230 }
    231 
    232 }  // namespace
    233 
    234 void EncodePermutation(const coeff_order_t* JXL_RESTRICT order, size_t skip,
    235                        size_t size, BitWriter* writer, int layer,
    236                        AuxOut* aux_out) {
    237   std::vector<std::vector<Token>> tokens(1);
    238   TokenizePermutation(order, skip, size, tokens.data());
    239   std::vector<uint8_t> context_map;
    240   EntropyEncodingData codes;
    241   BuildAndEncodeHistograms(HistogramParams(), kPermutationContexts, tokens,
    242                            &codes, &context_map, writer, layer, aux_out);
    243   WriteTokens(tokens[0], codes, context_map, 0, writer, layer, aux_out);
    244 }
    245 
    246 namespace {
    247 void EncodeCoeffOrder(const coeff_order_t* JXL_RESTRICT order, AcStrategy acs,
    248                       std::vector<Token>* tokens, coeff_order_t* order_zigzag,
    249                       std::vector<coeff_order_t>& natural_order_lut) {
    250   const size_t llf = acs.covered_blocks_x() * acs.covered_blocks_y();
    251   const size_t size = kDCTBlockSize * llf;
    252   for (size_t i = 0; i < size; ++i) {
    253     order_zigzag[i] = natural_order_lut[order[i]];
    254   }
    255   TokenizePermutation(order_zigzag, llf, size, tokens);
    256 }
    257 }  // namespace
    258 
    259 void EncodeCoeffOrders(uint16_t used_orders,
    260                        const coeff_order_t* JXL_RESTRICT order,
    261                        BitWriter* writer, size_t layer,
    262                        AuxOut* JXL_RESTRICT aux_out) {
    263   auto mem = hwy::AllocateAligned<coeff_order_t>(AcStrategy::kMaxCoeffArea);
    264   uint16_t computed = 0;
    265   std::vector<std::vector<Token>> tokens(1);
    266   std::vector<coeff_order_t> natural_order_lut;
    267   for (uint8_t o = 0; o < AcStrategy::kNumValidStrategies; ++o) {
    268     uint8_t ord = kStrategyOrder[o];
    269     if (computed & (1 << ord)) continue;
    270     computed |= 1 << ord;
    271     if ((used_orders & (1 << ord)) == 0) continue;
    272     AcStrategy acs = AcStrategy::FromRawStrategy(o);
    273     const size_t llf = acs.covered_blocks_x() * acs.covered_blocks_y();
    274     const size_t size = kDCTBlockSize * llf;
    275     if (natural_order_lut.size() < size) natural_order_lut.resize(size);
    276     acs.ComputeNaturalCoeffOrderLut(natural_order_lut.data());
    277     for (size_t c = 0; c < 3; c++) {
    278       EncodeCoeffOrder(&order[CoeffOrderOffset(ord, c)], acs, tokens.data(),
    279                        mem.get(), natural_order_lut);
    280     }
    281   }
    282   // Do not write anything if no order is used.
    283   if (used_orders != 0) {
    284     std::vector<uint8_t> context_map;
    285     EntropyEncodingData codes;
    286     BuildAndEncodeHistograms(HistogramParams(), kPermutationContexts, tokens,
    287                              &codes, &context_map, writer, layer, aux_out);
    288     WriteTokens(tokens[0], codes, context_map, 0, writer, layer, aux_out);
    289   }
    290 }
    291 
    292 }  // namespace jxl
	libjxl FORK: libjxl patches used on blog
	git clone https://git.neptards.moe/blog/libjxl.git
	Log \| Files \| Refs \| Submodules \| README \| LICENSE