enc_coeff_order.cc (11017B)
1 // Copyright (c) the JPEG XL Project Authors. All rights reserved. 2 // 3 // Use of this source code is governed by a BSD-style 4 // license that can be found in the LICENSE file. 5 6 #include <stdint.h> 7 8 #include <algorithm> 9 #include <cmath> 10 #include <hwy/aligned_allocator.h> 11 #include <vector> 12 13 #include "lib/jxl/coeff_order.h" 14 #include "lib/jxl/coeff_order_fwd.h" 15 #include "lib/jxl/dct_util.h" 16 #include "lib/jxl/enc_ans.h" 17 #include "lib/jxl/enc_bit_writer.h" 18 #include "lib/jxl/lehmer_code.h" 19 20 namespace jxl { 21 22 struct AuxOut; 23 24 std::pair<uint32_t, uint32_t> ComputeUsedOrders( 25 const SpeedTier speed, const AcStrategyImage& ac_strategy, 26 const Rect& rect) { 27 // No coefficient reordering in Falcon or faster. 28 // Only uses DCT8 = 0, so bitfield = 1. 29 if (speed >= SpeedTier::kFalcon) return {1, 1}; 30 31 uint32_t ret = 0; 32 uint32_t ret_customize = 0; 33 size_t xsize_blocks = rect.xsize(); 34 size_t ysize_blocks = rect.ysize(); 35 // TODO(veluca): precompute when doing DCT. 36 for (size_t by = 0; by < ysize_blocks; ++by) { 37 AcStrategyRow acs_row = ac_strategy.ConstRow(rect, by); 38 for (size_t bx = 0; bx < xsize_blocks; ++bx) { 39 int ord = kStrategyOrder[acs_row[bx].RawStrategy()]; 40 // Do not customize coefficient orders for blocks bigger than 32x32. 41 ret |= 1u << ord; 42 if (ord > 6) { 43 continue; 44 } 45 ret_customize |= 1u << ord; 46 } 47 } 48 // Use default orders for small images. 49 if (ac_strategy.xsize() < 5 && ac_strategy.ysize() < 5) return {ret, 0}; 50 return {ret, ret_customize}; 51 } 52 53 void ComputeCoeffOrder(SpeedTier speed, const ACImage& acs, 54 const AcStrategyImage& ac_strategy, 55 const FrameDimensions& frame_dim, 56 uint32_t& all_used_orders, uint32_t prev_used_acs, 57 uint32_t current_used_acs, uint32_t current_used_orders, 58 coeff_order_t* JXL_RESTRICT order) { 59 std::vector<int32_t> num_zeros(kCoeffOrderMaxSize); 60 // If compressing at high speed and only using 8x8 DCTs, only consider a 61 // subset of blocks. 62 double block_fraction = 1.0f; 63 // TODO(veluca): figure out why sampling blocks if non-8x8s are used makes 64 // encoding significantly less dense. 65 if (speed >= SpeedTier::kSquirrel && current_used_orders == 1) { 66 block_fraction = 0.5f; 67 } 68 // No need to compute number of zero coefficients if all orders are the 69 // default. 70 if (current_used_orders != 0) { 71 uint64_t threshold = 72 (std::numeric_limits<uint64_t>::max() >> 32) * block_fraction; 73 uint64_t s[2] = {static_cast<uint64_t>(0x94D049BB133111EBull), 74 static_cast<uint64_t>(0xBF58476D1CE4E5B9ull)}; 75 // Xorshift128+ adapted from xorshift128+-inl.h 76 auto use_sample = [&]() { 77 auto s1 = s[0]; 78 const auto s0 = s[1]; 79 const auto bits = s1 + s0; // b, c 80 s[0] = s0; 81 s1 ^= s1 << 23; 82 s1 ^= s0 ^ (s1 >> 18) ^ (s0 >> 5); 83 s[1] = s1; 84 return (bits >> 32) <= threshold; 85 }; 86 87 // Count number of zero coefficients, separately for each DCT band. 88 // TODO(veluca): precompute when doing DCT. 89 for (size_t group_index = 0; group_index < frame_dim.num_groups; 90 group_index++) { 91 const size_t gx = group_index % frame_dim.xsize_groups; 92 const size_t gy = group_index / frame_dim.xsize_groups; 93 const Rect rect(gx * kGroupDimInBlocks, gy * kGroupDimInBlocks, 94 kGroupDimInBlocks, kGroupDimInBlocks, 95 frame_dim.xsize_blocks, frame_dim.ysize_blocks); 96 ConstACPtr rows[3]; 97 ACType type = acs.Type(); 98 for (size_t c = 0; c < 3; c++) { 99 rows[c] = acs.PlaneRow(c, group_index, 0); 100 } 101 size_t ac_offset = 0; 102 103 // TODO(veluca): SIMDfy. 104 for (size_t by = 0; by < rect.ysize(); ++by) { 105 AcStrategyRow acs_row = ac_strategy.ConstRow(rect, by); 106 for (size_t bx = 0; bx < rect.xsize(); ++bx) { 107 AcStrategy acs = acs_row[bx]; 108 if (!acs.IsFirstBlock()) continue; 109 if (!use_sample()) continue; 110 size_t size = kDCTBlockSize << acs.log2_covered_blocks(); 111 for (size_t c = 0; c < 3; ++c) { 112 const size_t order_offset = 113 CoeffOrderOffset(kStrategyOrder[acs.RawStrategy()], c); 114 if (type == ACType::k16) { 115 for (size_t k = 0; k < size; k++) { 116 bool is_zero = rows[c].ptr16[ac_offset + k] == 0; 117 num_zeros[order_offset + k] += is_zero ? 1 : 0; 118 } 119 } else { 120 for (size_t k = 0; k < size; k++) { 121 bool is_zero = rows[c].ptr32[ac_offset + k] == 0; 122 num_zeros[order_offset + k] += is_zero ? 1 : 0; 123 } 124 } 125 // Ensure LLFs are first in the order. 126 size_t cx = acs.covered_blocks_x(); 127 size_t cy = acs.covered_blocks_y(); 128 CoefficientLayout(&cy, &cx); 129 for (size_t iy = 0; iy < cy; iy++) { 130 for (size_t ix = 0; ix < cx; ix++) { 131 num_zeros[order_offset + iy * kBlockDim * cx + ix] = -1; 132 } 133 } 134 } 135 ac_offset += size; 136 } 137 } 138 } 139 } 140 struct PosAndCount { 141 uint32_t pos; 142 uint32_t count; 143 }; 144 auto mem = hwy::AllocateAligned<PosAndCount>(AcStrategy::kMaxCoeffArea); 145 146 std::vector<coeff_order_t> natural_order_buffer; 147 148 uint16_t computed = 0; 149 for (uint8_t o = 0; o < AcStrategy::kNumValidStrategies; ++o) { 150 uint8_t ord = kStrategyOrder[o]; 151 if (computed & (1 << ord)) continue; 152 computed |= 1 << ord; 153 AcStrategy acs = AcStrategy::FromRawStrategy(o); 154 size_t sz = kDCTBlockSize * acs.covered_blocks_x() * acs.covered_blocks_y(); 155 156 // Do nothing for transforms that don't appear. 157 if ((1 << ord) & ~current_used_acs) continue; 158 159 // Do nothing if we already committed to this custom order previously. 160 if ((1 << ord) & prev_used_acs) continue; 161 if ((1 << ord) & all_used_orders) continue; 162 163 if (natural_order_buffer.size() < sz) natural_order_buffer.resize(sz); 164 acs.ComputeNaturalCoeffOrder(natural_order_buffer.data()); 165 166 // Ensure natural coefficient order is not permuted if the order is 167 // not transmitted. 168 if ((1 << ord) & ~current_used_orders) { 169 for (size_t c = 0; c < 3; c++) { 170 size_t offset = CoeffOrderOffset(ord, c); 171 JXL_DASSERT(CoeffOrderOffset(ord, c + 1) - offset == sz); 172 memcpy(&order[offset], natural_order_buffer.data(), 173 sz * sizeof(*order)); 174 } 175 continue; 176 } 177 178 bool is_nondefault = false; 179 for (uint8_t c = 0; c < 3; c++) { 180 // Apply zig-zag order. 181 PosAndCount* pos_and_val = mem.get(); 182 size_t offset = CoeffOrderOffset(ord, c); 183 JXL_DASSERT(CoeffOrderOffset(ord, c + 1) - offset == sz); 184 float inv_sqrt_sz = 1.0f / std::sqrt(sz); 185 for (size_t i = 0; i < sz; ++i) { 186 size_t pos = natural_order_buffer[i]; 187 pos_and_val[i].pos = pos; 188 // We don't care for the exact number -> quantize number of zeros, 189 // to get less permuted order. 190 pos_and_val[i].count = num_zeros[offset + pos] * inv_sqrt_sz + 0.1f; 191 } 192 193 // Stable-sort -> elements with same number of zeros will preserve their 194 // order. 195 auto comparator = [](const PosAndCount& a, const PosAndCount& b) -> bool { 196 return a.count < b.count; 197 }; 198 std::stable_sort(pos_and_val, pos_and_val + sz, comparator); 199 200 // Grab indices. 201 for (size_t i = 0; i < sz; ++i) { 202 order[offset + i] = pos_and_val[i].pos; 203 is_nondefault |= natural_order_buffer[i] != pos_and_val[i].pos; 204 } 205 } 206 if (!is_nondefault) { 207 current_used_orders &= ~(1 << ord); 208 } 209 } 210 all_used_orders |= current_used_orders; 211 } 212 213 namespace { 214 215 void TokenizePermutation(const coeff_order_t* JXL_RESTRICT order, size_t skip, 216 size_t size, std::vector<Token>* tokens) { 217 std::vector<LehmerT> lehmer(size); 218 std::vector<uint32_t> temp(size + 1); 219 ComputeLehmerCode(order, temp.data(), size, lehmer.data()); 220 size_t end = size; 221 while (end > skip && lehmer[end - 1] == 0) { 222 --end; 223 } 224 tokens->emplace_back(CoeffOrderContext(size), end - skip); 225 uint32_t last = 0; 226 for (size_t i = skip; i < end; ++i) { 227 tokens->emplace_back(CoeffOrderContext(last), lehmer[i]); 228 last = lehmer[i]; 229 } 230 } 231 232 } // namespace 233 234 void EncodePermutation(const coeff_order_t* JXL_RESTRICT order, size_t skip, 235 size_t size, BitWriter* writer, int layer, 236 AuxOut* aux_out) { 237 std::vector<std::vector<Token>> tokens(1); 238 TokenizePermutation(order, skip, size, tokens.data()); 239 std::vector<uint8_t> context_map; 240 EntropyEncodingData codes; 241 BuildAndEncodeHistograms(HistogramParams(), kPermutationContexts, tokens, 242 &codes, &context_map, writer, layer, aux_out); 243 WriteTokens(tokens[0], codes, context_map, 0, writer, layer, aux_out); 244 } 245 246 namespace { 247 void EncodeCoeffOrder(const coeff_order_t* JXL_RESTRICT order, AcStrategy acs, 248 std::vector<Token>* tokens, coeff_order_t* order_zigzag, 249 std::vector<coeff_order_t>& natural_order_lut) { 250 const size_t llf = acs.covered_blocks_x() * acs.covered_blocks_y(); 251 const size_t size = kDCTBlockSize * llf; 252 for (size_t i = 0; i < size; ++i) { 253 order_zigzag[i] = natural_order_lut[order[i]]; 254 } 255 TokenizePermutation(order_zigzag, llf, size, tokens); 256 } 257 } // namespace 258 259 void EncodeCoeffOrders(uint16_t used_orders, 260 const coeff_order_t* JXL_RESTRICT order, 261 BitWriter* writer, size_t layer, 262 AuxOut* JXL_RESTRICT aux_out) { 263 auto mem = hwy::AllocateAligned<coeff_order_t>(AcStrategy::kMaxCoeffArea); 264 uint16_t computed = 0; 265 std::vector<std::vector<Token>> tokens(1); 266 std::vector<coeff_order_t> natural_order_lut; 267 for (uint8_t o = 0; o < AcStrategy::kNumValidStrategies; ++o) { 268 uint8_t ord = kStrategyOrder[o]; 269 if (computed & (1 << ord)) continue; 270 computed |= 1 << ord; 271 if ((used_orders & (1 << ord)) == 0) continue; 272 AcStrategy acs = AcStrategy::FromRawStrategy(o); 273 const size_t llf = acs.covered_blocks_x() * acs.covered_blocks_y(); 274 const size_t size = kDCTBlockSize * llf; 275 if (natural_order_lut.size() < size) natural_order_lut.resize(size); 276 acs.ComputeNaturalCoeffOrderLut(natural_order_lut.data()); 277 for (size_t c = 0; c < 3; c++) { 278 EncodeCoeffOrder(&order[CoeffOrderOffset(ord, c)], acs, tokens.data(), 279 mem.get(), natural_order_lut); 280 } 281 } 282 // Do not write anything if no order is used. 283 if (used_orders != 0) { 284 std::vector<uint8_t> context_map; 285 EntropyEncodingData codes; 286 BuildAndEncodeHistograms(HistogramParams(), kPermutationContexts, tokens, 287 &codes, &context_map, writer, layer, aux_out); 288 WriteTokens(tokens[0], codes, context_map, 0, writer, layer, aux_out); 289 } 290 } 291 292 } // namespace jxl