enc_modular.cc (72414B)
1 // Copyright (c) the JPEG XL Project Authors. All rights reserved. 2 // 3 // Use of this source code is governed by a BSD-style 4 // license that can be found in the LICENSE file. 5 6 #include "lib/jxl/enc_modular.h" 7 8 #include <stddef.h> 9 #include <stdint.h> 10 11 #include <array> 12 #include <atomic> 13 #include <cstdint> 14 #include <limits> 15 #include <utility> 16 #include <vector> 17 18 #include "lib/jxl/base/compiler_specific.h" 19 #include "lib/jxl/base/printf_macros.h" 20 #include "lib/jxl/base/status.h" 21 #include "lib/jxl/compressed_dc.h" 22 #include "lib/jxl/dec_ans.h" 23 #include "lib/jxl/enc_aux_out.h" 24 #include "lib/jxl/enc_bit_writer.h" 25 #include "lib/jxl/enc_cluster.h" 26 #include "lib/jxl/enc_fields.h" 27 #include "lib/jxl/enc_gaborish.h" 28 #include "lib/jxl/enc_params.h" 29 #include "lib/jxl/enc_patch_dictionary.h" 30 #include "lib/jxl/enc_quant_weights.h" 31 #include "lib/jxl/frame_dimensions.h" 32 #include "lib/jxl/frame_header.h" 33 #include "lib/jxl/modular/encoding/context_predict.h" 34 #include "lib/jxl/modular/encoding/enc_encoding.h" 35 #include "lib/jxl/modular/encoding/encoding.h" 36 #include "lib/jxl/modular/encoding/ma_common.h" 37 #include "lib/jxl/modular/modular_image.h" 38 #include "lib/jxl/modular/options.h" 39 #include "lib/jxl/modular/transform/enc_transform.h" 40 #include "lib/jxl/pack_signed.h" 41 #include "modular/options.h" 42 43 namespace jxl { 44 45 namespace { 46 // constexpr bool kPrintTree = false; 47 48 // Squeeze default quantization factors 49 // these quantization factors are for -Q 50 (other qualities simply scale the 50 // factors; things are rounded down and obviously cannot get below 1) 51 const float squeeze_quality_factor = 52 0.35; // for easy tweaking of the quality range (decrease this number for 53 // higher quality) 54 const float squeeze_luma_factor = 55 1.1; // for easy tweaking of the balance between luma (or anything 56 // non-chroma) and chroma (decrease this number for higher quality 57 // luma) 58 const float squeeze_quality_factor_xyb = 2.4f; 59 const float squeeze_xyb_qtable[3][16] = { 60 {163.84, 81.92, 40.96, 20.48, 10.24, 5.12, 2.56, 1.28, 0.64, 0.32, 0.16, 61 0.08, 0.04, 0.02, 0.01, 0.005}, // Y 62 {1024, 512, 256, 128, 64, 32, 16, 8, 4, 2, 1, 0.5, 0.5, 0.5, 0.5, 63 0.5}, // X 64 {2048, 1024, 512, 256, 128, 64, 32, 16, 8, 4, 2, 1, 0.5, 0.5, 0.5, 65 0.5}, // B-Y 66 }; 67 68 const float squeeze_luma_qtable[16] = {163.84, 81.92, 40.96, 20.48, 10.24, 5.12, 69 2.56, 1.28, 0.64, 0.32, 0.16, 0.08, 70 0.04, 0.02, 0.01, 0.005}; 71 // for 8-bit input, the range of YCoCg chroma is -255..255 so basically this 72 // does 4:2:0 subsampling (two most fine grained layers get quantized away) 73 const float squeeze_chroma_qtable[16] = { 74 1024, 512, 256, 128, 64, 32, 16, 8, 4, 2, 1, 0.5, 0.5, 0.5, 0.5, 0.5}; 75 76 // Merges the trees in `trees` using nodes that decide on stream_id, as defined 77 // by `tree_splits`. 78 void MergeTrees(const std::vector<Tree>& trees, 79 const std::vector<size_t>& tree_splits, size_t begin, 80 size_t end, Tree* tree) { 81 JXL_ASSERT(trees.size() + 1 == tree_splits.size()); 82 JXL_ASSERT(end > begin); 83 JXL_ASSERT(end <= trees.size()); 84 if (end == begin + 1) { 85 // Insert the tree, adding the opportune offset to all child nodes. 86 // This will make the leaf IDs wrong, but subsequent roundtripping will fix 87 // them. 88 size_t sz = tree->size(); 89 tree->insert(tree->end(), trees[begin].begin(), trees[begin].end()); 90 for (size_t i = sz; i < tree->size(); i++) { 91 (*tree)[i].lchild += sz; 92 (*tree)[i].rchild += sz; 93 } 94 return; 95 } 96 size_t mid = (begin + end) / 2; 97 size_t splitval = tree_splits[mid] - 1; 98 size_t cur = tree->size(); 99 tree->emplace_back(1 /*stream_id*/, splitval, 0, 0, Predictor::Zero, 0, 1); 100 (*tree)[cur].lchild = tree->size(); 101 MergeTrees(trees, tree_splits, mid, end, tree); 102 (*tree)[cur].rchild = tree->size(); 103 MergeTrees(trees, tree_splits, begin, mid, tree); 104 } 105 106 void QuantizeChannel(Channel& ch, const int q) { 107 if (q == 1) return; 108 for (size_t y = 0; y < ch.plane.ysize(); y++) { 109 pixel_type* row = ch.plane.Row(y); 110 for (size_t x = 0; x < ch.plane.xsize(); x++) { 111 if (row[x] < 0) { 112 row[x] = -((-row[x] + q / 2) / q) * q; 113 } else { 114 row[x] = ((row[x] + q / 2) / q) * q; 115 } 116 } 117 } 118 } 119 120 // convert binary32 float that corresponds to custom [bits]-bit float (with 121 // [exp_bits] exponent bits) to a [bits]-bit integer representation that should 122 // fit in pixel_type 123 Status float_to_int(const float* const row_in, pixel_type* const row_out, 124 size_t xsize, unsigned int bits, unsigned int exp_bits, 125 bool fp, double dfactor) { 126 JXL_ASSERT(sizeof(pixel_type) * 8 >= bits); 127 if (!fp) { 128 if (bits > 22) { 129 for (size_t x = 0; x < xsize; ++x) { 130 row_out[x] = row_in[x] * dfactor + (row_in[x] < 0 ? -0.5 : 0.5); 131 } 132 } else { 133 float factor = dfactor; 134 for (size_t x = 0; x < xsize; ++x) { 135 row_out[x] = row_in[x] * factor + (row_in[x] < 0 ? -0.5f : 0.5f); 136 } 137 } 138 return true; 139 } 140 if (bits == 32 && fp) { 141 JXL_ASSERT(exp_bits == 8); 142 memcpy(static_cast<void*>(row_out), static_cast<const void*>(row_in), 143 4 * xsize); 144 return true; 145 } 146 147 JXL_ASSERT(bits > 0); 148 int exp_bias = (1 << (exp_bits - 1)) - 1; 149 int max_exp = (1 << exp_bits) - 1; 150 uint32_t sign = (1u << (bits - 1)); 151 int mant_bits = bits - exp_bits - 1; 152 int mant_shift = 23 - mant_bits; 153 for (size_t x = 0; x < xsize; ++x) { 154 uint32_t f; 155 memcpy(&f, &row_in[x], 4); 156 int signbit = (f >> 31); 157 f &= 0x7fffffff; 158 if (f == 0) { 159 row_out[x] = (signbit ? sign : 0); 160 continue; 161 } 162 int exp = (f >> 23) - 127; 163 if (exp == 128) return JXL_FAILURE("Inf/NaN not allowed"); 164 int mantissa = (f & 0x007fffff); 165 // broke up the binary32 into its parts, now reassemble into 166 // arbitrary float 167 exp += exp_bias; 168 if (exp < 0) { // will become a subnormal number 169 // add implicit leading 1 to mantissa 170 mantissa |= 0x00800000; 171 if (exp < -mant_bits) { 172 return JXL_FAILURE( 173 "Invalid float number: %g cannot be represented with %i " 174 "exp_bits and %i mant_bits (exp %i)", 175 row_in[x], exp_bits, mant_bits, exp); 176 } 177 mantissa >>= 1 - exp; 178 exp = 0; 179 } 180 // exp should be representable in exp_bits, otherwise input was 181 // invalid 182 if (exp > max_exp) return JXL_FAILURE("Invalid float exponent"); 183 if (mantissa & ((1 << mant_shift) - 1)) { 184 return JXL_FAILURE("%g is losing precision (mant: %x)", row_in[x], 185 mantissa); 186 } 187 mantissa >>= mant_shift; 188 f = (signbit ? sign : 0); 189 f |= (exp << mant_bits); 190 f |= mantissa; 191 row_out[x] = static_cast<pixel_type>(f); 192 } 193 return true; 194 } 195 196 float EstimateWPCost(const Image& img, size_t i) { 197 size_t extra_bits = 0; 198 float histo_cost = 0; 199 HybridUintConfig config; 200 int32_t cutoffs[] = {-500, -392, -255, -191, -127, -95, -63, -47, -31, 201 -23, -15, -11, -7, -4, -3, -1, 0, 1, 202 3, 5, 7, 11, 15, 23, 31, 47, 63, 203 95, 127, 191, 255, 392, 500}; 204 constexpr size_t nc = sizeof(cutoffs) / sizeof(*cutoffs) + 1; 205 Histogram histo[nc] = {}; 206 weighted::Header wp_header; 207 PredictorMode(i, &wp_header); 208 for (const Channel& ch : img.channel) { 209 const intptr_t onerow = ch.plane.PixelsPerRow(); 210 weighted::State wp_state(wp_header, ch.w, ch.h); 211 Properties properties(1); 212 for (size_t y = 0; y < ch.h; y++) { 213 const pixel_type* JXL_RESTRICT r = ch.Row(y); 214 for (size_t x = 0; x < ch.w; x++) { 215 size_t offset = 0; 216 pixel_type_w left = (x ? r[x - 1] : y ? *(r + x - onerow) : 0); 217 pixel_type_w top = (y ? *(r + x - onerow) : left); 218 pixel_type_w topleft = (x && y ? *(r + x - 1 - onerow) : left); 219 pixel_type_w topright = 220 (x + 1 < ch.w && y ? *(r + x + 1 - onerow) : top); 221 pixel_type_w toptop = (y > 1 ? *(r + x - onerow - onerow) : top); 222 pixel_type guess = wp_state.Predict</*compute_properties=*/true>( 223 x, y, ch.w, top, left, topright, topleft, toptop, &properties, 224 offset); 225 size_t ctx = 0; 226 for (int c : cutoffs) { 227 ctx += (c >= properties[0]) ? 1 : 0; 228 } 229 pixel_type res = r[x] - guess; 230 uint32_t token; 231 uint32_t nbits; 232 uint32_t bits; 233 config.Encode(PackSigned(res), &token, &nbits, &bits); 234 histo[ctx].Add(token); 235 extra_bits += nbits; 236 wp_state.UpdateErrors(r[x], x, y, ch.w); 237 } 238 } 239 for (auto& h : histo) { 240 histo_cost += h.ShannonEntropy(); 241 h.Clear(); 242 } 243 } 244 return histo_cost + extra_bits; 245 } 246 247 float EstimateCost(const Image& img) { 248 // TODO(veluca): consider SIMDfication of this code. 249 size_t extra_bits = 0; 250 float histo_cost = 0; 251 HybridUintConfig config; 252 uint32_t cutoffs[] = {0, 1, 3, 5, 7, 11, 15, 23, 31, 253 47, 63, 95, 127, 191, 255, 392, 500}; 254 constexpr size_t nc = sizeof(cutoffs) / sizeof(*cutoffs) + 1; 255 Histogram histo[nc] = {}; 256 for (const Channel& ch : img.channel) { 257 const intptr_t onerow = ch.plane.PixelsPerRow(); 258 for (size_t y = 0; y < ch.h; y++) { 259 const pixel_type* JXL_RESTRICT r = ch.Row(y); 260 for (size_t x = 0; x < ch.w; x++) { 261 pixel_type_w left = (x ? r[x - 1] : y ? *(r + x - onerow) : 0); 262 pixel_type_w top = (y ? *(r + x - onerow) : left); 263 pixel_type_w topleft = (x && y ? *(r + x - 1 - onerow) : left); 264 size_t maxdiff = std::max(std::max(left, top), topleft) - 265 std::min(std::min(left, top), topleft); 266 size_t ctx = 0; 267 for (uint32_t c : cutoffs) { 268 ctx += (c > maxdiff) ? 1 : 0; 269 } 270 pixel_type res = r[x] - ClampedGradient(top, left, topleft); 271 uint32_t token; 272 uint32_t nbits; 273 uint32_t bits; 274 config.Encode(PackSigned(res), &token, &nbits, &bits); 275 histo[ctx].Add(token); 276 extra_bits += nbits; 277 } 278 } 279 for (auto& h : histo) { 280 histo_cost += h.ShannonEntropy(); 281 h.Clear(); 282 } 283 } 284 return histo_cost + extra_bits; 285 } 286 287 bool do_transform(Image& image, const Transform& tr, 288 const weighted::Header& wp_header, 289 jxl::ThreadPool* pool = nullptr, bool force_jxlart = false) { 290 Transform t = tr; 291 bool did_it = true; 292 if (force_jxlart) { 293 if (!t.MetaApply(image)) return false; 294 } else { 295 did_it = TransformForward(t, image, wp_header, pool); 296 } 297 if (did_it) image.transform.push_back(t); 298 return did_it; 299 } 300 301 bool maybe_do_transform(Image& image, const Transform& tr, 302 const CompressParams& cparams, 303 const weighted::Header& wp_header, 304 jxl::ThreadPool* pool = nullptr, 305 bool force_jxlart = false) { 306 if (force_jxlart || cparams.speed_tier >= SpeedTier::kSquirrel) { 307 return do_transform(image, tr, wp_header, pool, force_jxlart); 308 } 309 float cost_before = EstimateCost(image); 310 bool did_it = do_transform(image, tr, wp_header, pool); 311 if (did_it) { 312 float cost_after = EstimateCost(image); 313 JXL_DEBUG_V(7, "Cost before: %f cost after: %f", cost_before, cost_after); 314 if (cost_after > cost_before) { 315 Transform t = image.transform.back(); 316 JXL_RETURN_IF_ERROR(t.Inverse(image, wp_header, pool)); 317 image.transform.pop_back(); 318 did_it = false; 319 } 320 } 321 return did_it; 322 } 323 324 } // namespace 325 326 ModularFrameEncoder::ModularFrameEncoder(const FrameHeader& frame_header, 327 const CompressParams& cparams_orig, 328 bool streaming_mode) 329 : frame_dim_(frame_header.ToFrameDimensions()), cparams_(cparams_orig) { 330 size_t num_streams = 331 ModularStreamId::Num(frame_dim_, frame_header.passes.num_passes); 332 if (cparams_.ModularPartIsLossless()) { 333 switch (cparams_.decoding_speed_tier) { 334 case 0: 335 break; 336 case 1: 337 cparams_.options.wp_tree_mode = ModularOptions::TreeMode::kWPOnly; 338 break; 339 case 2: { 340 cparams_.options.wp_tree_mode = ModularOptions::TreeMode::kGradientOnly; 341 cparams_.options.predictor = Predictor::Gradient; 342 break; 343 } 344 case 3: { // LZ77, no Gradient. 345 cparams_.options.nb_repeats = 0; 346 cparams_.options.predictor = Predictor::Gradient; 347 break; 348 } 349 default: { // LZ77, no predictor. 350 cparams_.options.nb_repeats = 0; 351 cparams_.options.predictor = Predictor::Zero; 352 break; 353 } 354 } 355 } 356 if (cparams_.decoding_speed_tier >= 1 && cparams_.responsive && 357 cparams_.ModularPartIsLossless()) { 358 cparams_.options.tree_kind = 359 ModularOptions::TreeKind::kTrivialTreeNoPredictor; 360 cparams_.options.nb_repeats = 0; 361 } 362 stream_images_.resize(num_streams); 363 364 // use a sensible default if nothing explicit is specified: 365 // Squeeze for lossy, no squeeze for lossless 366 if (cparams_.responsive < 0) { 367 if (cparams_.ModularPartIsLossless()) { 368 cparams_.responsive = 0; 369 } else { 370 cparams_.responsive = 1; 371 } 372 } 373 374 cparams_.options.splitting_heuristics_node_threshold = 375 82 + 14 * static_cast<int>(cparams_.speed_tier); 376 377 { 378 // Set properties. 379 std::vector<uint32_t> prop_order; 380 if (cparams_.responsive) { 381 // Properties in order of their likelihood of being useful for Squeeze 382 // residuals. 383 prop_order = {0, 1, 4, 5, 6, 7, 8, 15, 9, 10, 11, 12, 13, 14, 2, 3}; 384 } else { 385 // Same, but for the non-Squeeze case. 386 prop_order = {0, 1, 15, 9, 10, 11, 12, 13, 14, 2, 3, 4, 5, 6, 7, 8}; 387 // if few groups, don't use group as a property 388 if (num_streams < 30 && cparams_.speed_tier > SpeedTier::kTortoise && 389 cparams_orig.ModularPartIsLossless()) { 390 prop_order.erase(prop_order.begin() + 1); 391 } 392 } 393 int max_properties = std::min<int>( 394 cparams_.options.max_properties, 395 static_cast<int>( 396 frame_header.nonserialized_metadata->m.num_extra_channels) + 397 (frame_header.encoding == FrameEncoding::kModular ? 2 : -1)); 398 switch (cparams_.speed_tier) { 399 case SpeedTier::kHare: 400 cparams_.options.splitting_heuristics_properties.assign( 401 prop_order.begin(), prop_order.begin() + 4); 402 cparams_.options.max_property_values = 24; 403 break; 404 case SpeedTier::kWombat: 405 cparams_.options.splitting_heuristics_properties.assign( 406 prop_order.begin(), prop_order.begin() + 5); 407 cparams_.options.max_property_values = 32; 408 break; 409 case SpeedTier::kSquirrel: 410 cparams_.options.splitting_heuristics_properties.assign( 411 prop_order.begin(), prop_order.begin() + 7); 412 cparams_.options.max_property_values = 48; 413 break; 414 case SpeedTier::kKitten: 415 cparams_.options.splitting_heuristics_properties.assign( 416 prop_order.begin(), prop_order.begin() + 10); 417 cparams_.options.max_property_values = 96; 418 break; 419 case SpeedTier::kGlacier: 420 case SpeedTier::kTortoise: 421 cparams_.options.splitting_heuristics_properties = prop_order; 422 cparams_.options.max_property_values = 256; 423 break; 424 default: 425 cparams_.options.splitting_heuristics_properties.assign( 426 prop_order.begin(), prop_order.begin() + 3); 427 cparams_.options.max_property_values = 16; 428 break; 429 } 430 if (cparams_.speed_tier > SpeedTier::kTortoise) { 431 // Gradient in previous channels. 432 for (int i = 0; i < max_properties; i++) { 433 cparams_.options.splitting_heuristics_properties.push_back( 434 kNumNonrefProperties + i * 4 + 3); 435 } 436 } else { 437 // All the extra properties in Tortoise mode. 438 for (int i = 0; i < max_properties * 4; i++) { 439 cparams_.options.splitting_heuristics_properties.push_back( 440 kNumNonrefProperties + i); 441 } 442 } 443 } 444 445 if ((cparams_.options.predictor == Predictor::Average0 || 446 cparams_.options.predictor == Predictor::Average1 || 447 cparams_.options.predictor == Predictor::Average2 || 448 cparams_.options.predictor == Predictor::Average3 || 449 cparams_.options.predictor == Predictor::Average4 || 450 cparams_.options.predictor == Predictor::Weighted) && 451 !cparams_.ModularPartIsLossless()) { 452 // Lossy + Average/Weighted predictors does not work, so switch to default 453 // predictors. 454 cparams_.options.predictor = kUndefinedPredictor; 455 } 456 457 if (cparams_.options.predictor == kUndefinedPredictor) { 458 // no explicit predictor(s) given, set a good default 459 if ((cparams_.speed_tier <= SpeedTier::kGlacier || 460 cparams_.modular_mode == false) && 461 cparams_.IsLossless() && cparams_.responsive == JXL_FALSE) { 462 // TODO(veluca): allow all predictors that don't break residual 463 // multipliers in lossy mode. 464 cparams_.options.predictor = Predictor::Variable; 465 } else if (cparams_.responsive || cparams_.lossy_palette) { 466 // zero predictor for Squeeze residues and lossy palette 467 cparams_.options.predictor = Predictor::Zero; 468 } else if (!cparams_.IsLossless()) { 469 // If not responsive and lossy. TODO(veluca): use near_lossless instead? 470 cparams_.options.predictor = Predictor::Gradient; 471 } else if (cparams_.speed_tier < SpeedTier::kFalcon) { 472 // try median and weighted predictor for anything else 473 cparams_.options.predictor = Predictor::Best; 474 } else if (cparams_.speed_tier == SpeedTier::kFalcon) { 475 // just weighted predictor in falcon mode 476 cparams_.options.predictor = Predictor::Weighted; 477 } else if (cparams_.speed_tier > SpeedTier::kFalcon) { 478 // just gradient predictor in thunder mode 479 cparams_.options.predictor = Predictor::Gradient; 480 } 481 } else { 482 delta_pred_ = cparams_.options.predictor; 483 if (cparams_.lossy_palette) cparams_.options.predictor = Predictor::Zero; 484 } 485 if (!cparams_.ModularPartIsLossless()) { 486 if (cparams_.options.predictor == Predictor::Weighted || 487 cparams_.options.predictor == Predictor::Variable || 488 cparams_.options.predictor == Predictor::Best) 489 cparams_.options.predictor = Predictor::Zero; 490 } 491 tree_splits_.push_back(0); 492 if (cparams_.modular_mode == false) { 493 cparams_.options.fast_decode_multiplier = 1.0f; 494 tree_splits_.push_back(ModularStreamId::VarDCTDC(0).ID(frame_dim_)); 495 tree_splits_.push_back(ModularStreamId::ModularDC(0).ID(frame_dim_)); 496 tree_splits_.push_back(ModularStreamId::ACMetadata(0).ID(frame_dim_)); 497 tree_splits_.push_back(ModularStreamId::QuantTable(0).ID(frame_dim_)); 498 tree_splits_.push_back(ModularStreamId::ModularAC(0, 0).ID(frame_dim_)); 499 ac_metadata_size.resize(frame_dim_.num_dc_groups); 500 extra_dc_precision.resize(frame_dim_.num_dc_groups); 501 } 502 tree_splits_.push_back(num_streams); 503 cparams_.options.max_chan_size = frame_dim_.group_dim; 504 cparams_.options.group_dim = frame_dim_.group_dim; 505 506 // TODO(veluca): figure out how to use different predictor sets per channel. 507 stream_options_.resize(num_streams, cparams_.options); 508 509 stream_options_[0] = cparams_.options; 510 if (cparams_.speed_tier == SpeedTier::kFalcon) { 511 stream_options_[0].tree_kind = ModularOptions::TreeKind::kWPFixedDC; 512 } else if (cparams_.speed_tier == SpeedTier::kThunder) { 513 stream_options_[0].tree_kind = ModularOptions::TreeKind::kGradientFixedDC; 514 } 515 stream_options_[0].histogram_params = 516 HistogramParams::ForModular(cparams_, {}, streaming_mode); 517 } 518 519 Status ModularFrameEncoder::ComputeEncodingData( 520 const FrameHeader& frame_header, const ImageMetadata& metadata, 521 Image3F* JXL_RESTRICT color, const std::vector<ImageF>& extra_channels, 522 const Rect& group_rect, const FrameDimensions& patch_dim, 523 const Rect& frame_area_rect, PassesEncoderState* JXL_RESTRICT enc_state, 524 const JxlCmsInterface& cms, ThreadPool* pool, AuxOut* aux_out, 525 bool do_color) { 526 JXL_DEBUG_V(6, "Computing modular encoding data for frame %s", 527 frame_header.DebugString().c_str()); 528 529 bool groupwise = enc_state->streaming_mode; 530 531 if (do_color && frame_header.loop_filter.gab && !groupwise) { 532 float w = 0.9908511000000001f; 533 float weights[3] = {w, w, w}; 534 JXL_RETURN_IF_ERROR(GaborishInverse(color, Rect(*color), weights, pool)); 535 } 536 537 if (do_color && metadata.bit_depth.bits_per_sample <= 16 && 538 cparams_.speed_tier < SpeedTier::kCheetah && 539 cparams_.decoding_speed_tier < 2 && !groupwise) { 540 JXL_RETURN_IF_ERROR(FindBestPatchDictionary( 541 *color, enc_state, cms, nullptr, aux_out, 542 cparams_.color_transform == ColorTransform::kXYB)); 543 PatchDictionaryEncoder::SubtractFrom( 544 enc_state->shared.image_features.patches, color); 545 } 546 547 if (cparams_.custom_splines.HasAny()) { 548 PassesSharedState& shared = enc_state->shared; 549 ImageFeatures& image_features = shared.image_features; 550 image_features.splines = cparams_.custom_splines; 551 } 552 553 // Convert ImageBundle to modular Image object 554 const size_t xsize = patch_dim.xsize; 555 const size_t ysize = patch_dim.ysize; 556 557 int nb_chans = 3; 558 if (metadata.color_encoding.IsGray() && 559 cparams_.color_transform == ColorTransform::kNone) { 560 nb_chans = 1; 561 } 562 if (!do_color) nb_chans = 0; 563 564 nb_chans += extra_channels.size(); 565 566 bool fp = metadata.bit_depth.floating_point_sample && 567 cparams_.color_transform != ColorTransform::kXYB; 568 569 // bits_per_sample is just metadata for XYB images. 570 if (metadata.bit_depth.bits_per_sample >= 32 && do_color && 571 cparams_.color_transform != ColorTransform::kXYB) { 572 if (metadata.bit_depth.bits_per_sample == 32 && fp == false) { 573 return JXL_FAILURE("uint32_t not supported in enc_modular"); 574 } else if (metadata.bit_depth.bits_per_sample > 32) { 575 return JXL_FAILURE("bits_per_sample > 32 not supported"); 576 } 577 } 578 579 // in the non-float case, there is an implicit 0 sign bit 580 int max_bitdepth = 581 do_color ? metadata.bit_depth.bits_per_sample + (fp ? 0 : 1) : 0; 582 Image& gi = stream_images_[0]; 583 JXL_ASSIGN_OR_RETURN( 584 gi, Image::Create(xsize, ysize, metadata.bit_depth.bits_per_sample, 585 nb_chans)); 586 int c = 0; 587 if (cparams_.color_transform == ColorTransform::kXYB && 588 cparams_.modular_mode == true) { 589 float enc_factors[3] = {32768.0f, 2048.0f, 2048.0f}; 590 if (cparams_.butteraugli_distance > 0 && !cparams_.responsive) { 591 // quantize XYB here and then treat it as a lossless image 592 enc_factors[0] *= 1.f / (1.f + 23.f * cparams_.butteraugli_distance); 593 enc_factors[1] *= 1.f / (1.f + 14.f * cparams_.butteraugli_distance); 594 enc_factors[2] *= 1.f / (1.f + 14.f * cparams_.butteraugli_distance); 595 cparams_.butteraugli_distance = 0; 596 } 597 if (cparams_.manual_xyb_factors.size() == 3) { 598 DequantMatricesSetCustomDC(&enc_state->shared.matrices, 599 cparams_.manual_xyb_factors.data()); 600 // TODO(jon): update max_bitdepth in this case 601 } else { 602 DequantMatricesSetCustomDC(&enc_state->shared.matrices, enc_factors); 603 max_bitdepth = 12; 604 } 605 } 606 pixel_type maxval = gi.bitdepth < 32 ? (1u << gi.bitdepth) - 1 : 0; 607 if (do_color) { 608 for (; c < 3; c++) { 609 if (metadata.color_encoding.IsGray() && 610 cparams_.color_transform == ColorTransform::kNone && 611 c != (cparams_.color_transform == ColorTransform::kXYB ? 1 : 0)) 612 continue; 613 int c_out = c; 614 // XYB is encoded as YX(B-Y) 615 if (cparams_.color_transform == ColorTransform::kXYB && c < 2) 616 c_out = 1 - c_out; 617 double factor = maxval; 618 if (cparams_.color_transform == ColorTransform::kXYB) 619 factor = enc_state->shared.matrices.InvDCQuant(c); 620 if (c == 2 && cparams_.color_transform == ColorTransform::kXYB) { 621 JXL_ASSERT(!fp); 622 for (size_t y = 0; y < ysize; ++y) { 623 const float* const JXL_RESTRICT row_in = color->PlaneRow(c, y); 624 pixel_type* const JXL_RESTRICT row_out = gi.channel[c_out].Row(y); 625 pixel_type* const JXL_RESTRICT row_Y = gi.channel[0].Row(y); 626 for (size_t x = 0; x < xsize; ++x) { 627 row_out[x] = row_in[x] * factor + 0.5f; 628 row_out[x] -= row_Y[x]; 629 // zero the lsb of B 630 row_out[x] = row_out[x] / 2 * 2; 631 } 632 } 633 } else { 634 int bits = metadata.bit_depth.bits_per_sample; 635 int exp_bits = metadata.bit_depth.exponent_bits_per_sample; 636 gi.channel[c_out].hshift = frame_header.chroma_subsampling.HShift(c); 637 gi.channel[c_out].vshift = frame_header.chroma_subsampling.VShift(c); 638 size_t xsize_shifted = DivCeil(xsize, 1 << gi.channel[c_out].hshift); 639 size_t ysize_shifted = DivCeil(ysize, 1 << gi.channel[c_out].vshift); 640 JXL_RETURN_IF_ERROR( 641 gi.channel[c_out].shrink(xsize_shifted, ysize_shifted)); 642 std::atomic<bool> has_error{false}; 643 JXL_RETURN_IF_ERROR(RunOnPool( 644 pool, 0, ysize_shifted, ThreadPool::NoInit, 645 [&](const int task, const int thread) { 646 if (has_error) return; 647 const size_t y = task; 648 const float* const JXL_RESTRICT row_in = 649 color->PlaneRow(c, y + group_rect.y0()) + group_rect.x0(); 650 pixel_type* const JXL_RESTRICT row_out = gi.channel[c_out].Row(y); 651 if (!float_to_int(row_in, row_out, xsize_shifted, bits, exp_bits, 652 fp, factor)) { 653 has_error = true; 654 return; 655 }; 656 }, 657 "float2int")); 658 if (has_error) { 659 return JXL_FAILURE("Error in float to integer conversion"); 660 } 661 } 662 } 663 if (metadata.color_encoding.IsGray() && 664 cparams_.color_transform == ColorTransform::kNone) 665 c = 1; 666 } 667 668 for (size_t ec = 0; ec < extra_channels.size(); ec++, c++) { 669 const ExtraChannelInfo& eci = metadata.extra_channel_info[ec]; 670 size_t ecups = frame_header.extra_channel_upsampling[ec]; 671 JXL_RETURN_IF_ERROR( 672 gi.channel[c].shrink(DivCeil(patch_dim.xsize_upsampled, ecups), 673 DivCeil(patch_dim.ysize_upsampled, ecups))); 674 gi.channel[c].hshift = gi.channel[c].vshift = 675 CeilLog2Nonzero(ecups) - CeilLog2Nonzero(frame_header.upsampling); 676 677 int bits = eci.bit_depth.bits_per_sample; 678 int exp_bits = eci.bit_depth.exponent_bits_per_sample; 679 bool fp = eci.bit_depth.floating_point_sample; 680 double factor = (fp ? 1 : ((1u << eci.bit_depth.bits_per_sample) - 1)); 681 if (bits + (fp ? 0 : 1) > max_bitdepth) max_bitdepth = bits + (fp ? 0 : 1); 682 std::atomic<bool> has_error{false}; 683 JXL_RETURN_IF_ERROR(RunOnPool( 684 pool, 0, gi.channel[c].plane.ysize(), ThreadPool::NoInit, 685 [&](const int task, const int thread) { 686 if (has_error) return; 687 const size_t y = task; 688 const float* const JXL_RESTRICT row_in = 689 extra_channels[ec].Row(y + group_rect.y0()) + group_rect.x0(); 690 pixel_type* const JXL_RESTRICT row_out = gi.channel[c].Row(y); 691 if (!float_to_int(row_in, row_out, gi.channel[c].plane.xsize(), bits, 692 exp_bits, fp, factor)) { 693 has_error = true; 694 return; 695 }; 696 }, 697 "float2int")); 698 if (has_error) return JXL_FAILURE("Error in float to integer conversion"); 699 } 700 JXL_ASSERT(c == nb_chans); 701 702 int level_max_bitdepth = (cparams_.level == 5 ? 16 : 32); 703 if (max_bitdepth > level_max_bitdepth) { 704 return JXL_FAILURE( 705 "Bitdepth too high for level %i (need %i bits, have only %i in this " 706 "level)", 707 cparams_.level, max_bitdepth, level_max_bitdepth); 708 } 709 710 // Set options and apply transformations 711 if (!cparams_.ModularPartIsLossless()) { 712 if (cparams_.palette_colors != 0) { 713 JXL_DEBUG_V(3, "Lossy encode, not doing palette transforms"); 714 } 715 if (cparams_.color_transform == ColorTransform::kXYB) { 716 cparams_.channel_colors_pre_transform_percent = 0; 717 } 718 cparams_.channel_colors_percent = 0; 719 cparams_.palette_colors = 0; 720 cparams_.lossy_palette = false; 721 } 722 723 // Global palette 724 if ((cparams_.palette_colors != 0 || cparams_.lossy_palette) && !groupwise) { 725 // all-channel palette (e.g. RGBA) 726 if (gi.channel.size() - gi.nb_meta_channels > 1) { 727 Transform maybe_palette(TransformId::kPalette); 728 maybe_palette.begin_c = gi.nb_meta_channels; 729 maybe_palette.num_c = gi.channel.size() - gi.nb_meta_channels; 730 maybe_palette.nb_colors = std::min(static_cast<int>(xsize * ysize / 2), 731 std::abs(cparams_.palette_colors)); 732 maybe_palette.ordered_palette = cparams_.palette_colors >= 0; 733 maybe_palette.lossy_palette = 734 (cparams_.lossy_palette && maybe_palette.num_c == 3); 735 if (maybe_palette.lossy_palette) { 736 maybe_palette.predictor = delta_pred_; 737 } 738 // TODO(veluca): use a custom weighted header if using the weighted 739 // predictor. 740 maybe_do_transform(gi, maybe_palette, cparams_, weighted::Header(), pool, 741 cparams_.options.zero_tokens); 742 } 743 // all-minus-one-channel palette (RGB with separate alpha, or CMY with 744 // separate K) 745 if (gi.channel.size() - gi.nb_meta_channels > 3) { 746 Transform maybe_palette_3(TransformId::kPalette); 747 maybe_palette_3.begin_c = gi.nb_meta_channels; 748 maybe_palette_3.num_c = gi.channel.size() - gi.nb_meta_channels - 1; 749 maybe_palette_3.nb_colors = std::min(static_cast<int>(xsize * ysize / 3), 750 std::abs(cparams_.palette_colors)); 751 maybe_palette_3.ordered_palette = cparams_.palette_colors >= 0; 752 maybe_palette_3.lossy_palette = cparams_.lossy_palette; 753 if (maybe_palette_3.lossy_palette) { 754 maybe_palette_3.predictor = delta_pred_; 755 } 756 maybe_do_transform(gi, maybe_palette_3, cparams_, weighted::Header(), 757 pool, cparams_.options.zero_tokens); 758 } 759 } 760 761 // Global channel palette 762 if (!groupwise && cparams_.channel_colors_pre_transform_percent > 0 && 763 !cparams_.lossy_palette && 764 (cparams_.speed_tier <= SpeedTier::kThunder || 765 (do_color && metadata.bit_depth.bits_per_sample > 8))) { 766 // single channel palette (like FLIF's ChannelCompact) 767 size_t nb_channels = gi.channel.size() - gi.nb_meta_channels; 768 int orig_bitdepth = max_bitdepth; 769 max_bitdepth = 0; 770 for (size_t i = 0; i < nb_channels; i++) { 771 int32_t min; 772 int32_t max; 773 compute_minmax(gi.channel[gi.nb_meta_channels + i], &min, &max); 774 int64_t colors = static_cast<int64_t>(max) - min + 1; 775 JXL_DEBUG_V(10, "Channel %" PRIuS ": range=%i..%i", i, min, max); 776 Transform maybe_palette_1(TransformId::kPalette); 777 maybe_palette_1.begin_c = i + gi.nb_meta_channels; 778 maybe_palette_1.num_c = 1; 779 // simple heuristic: if less than X percent of the values in the range 780 // actually occur, it is probably worth it to do a compaction 781 // (but only if the channel palette is less than 6% the size of the 782 // image itself) 783 maybe_palette_1.nb_colors = std::min( 784 static_cast<int>(xsize * ysize / 16), 785 static_cast<int>(cparams_.channel_colors_pre_transform_percent / 786 100. * colors)); 787 if (maybe_do_transform(gi, maybe_palette_1, cparams_, weighted::Header(), 788 pool)) { 789 // effective bit depth is lower, adjust quantization accordingly 790 compute_minmax(gi.channel[gi.nb_meta_channels + i], &min, &max); 791 if (max < maxval) maxval = max; 792 int ch_bitdepth = 793 (max > 0 ? CeilLog2Nonzero(static_cast<uint32_t>(max)) : 0); 794 if (ch_bitdepth > max_bitdepth) max_bitdepth = ch_bitdepth; 795 } else 796 max_bitdepth = orig_bitdepth; 797 } 798 } 799 800 // don't do an RCT if we're short on bits 801 if (cparams_.color_transform == ColorTransform::kNone && do_color && 802 gi.channel.size() - gi.nb_meta_channels >= 3 && 803 max_bitdepth + 1 < level_max_bitdepth) { 804 if (cparams_.colorspace < 0 && (!cparams_.ModularPartIsLossless() || 805 cparams_.speed_tier > SpeedTier::kHare)) { 806 Transform ycocg{TransformId::kRCT}; 807 ycocg.rct_type = 6; 808 ycocg.begin_c = gi.nb_meta_channels; 809 do_transform(gi, ycocg, weighted::Header(), pool); 810 max_bitdepth++; 811 } else if (cparams_.colorspace > 0) { 812 Transform sg(TransformId::kRCT); 813 sg.begin_c = gi.nb_meta_channels; 814 sg.rct_type = cparams_.colorspace; 815 do_transform(gi, sg, weighted::Header(), pool); 816 max_bitdepth++; 817 } 818 } 819 820 if (cparams_.move_to_front_from_channel > 0) { 821 for (size_t tgt = 0; 822 tgt + cparams_.move_to_front_from_channel < gi.channel.size(); tgt++) { 823 size_t pos = cparams_.move_to_front_from_channel; 824 while (pos > 0) { 825 Transform move(TransformId::kRCT); 826 if (pos == 1) { 827 move.begin_c = tgt; 828 move.rct_type = 28; // RGB -> GRB 829 pos -= 1; 830 } else { 831 move.begin_c = tgt + pos - 2; 832 move.rct_type = 14; // RGB -> BRG 833 pos -= 2; 834 } 835 do_transform(gi, move, weighted::Header(), pool); 836 } 837 } 838 } 839 840 // don't do squeeze if we don't have some spare bits 841 if (!groupwise && cparams_.responsive && !gi.channel.empty() && 842 max_bitdepth + 2 < level_max_bitdepth) { 843 Transform t(TransformId::kSqueeze); 844 do_transform(gi, t, weighted::Header(), pool); 845 max_bitdepth += 2; 846 } 847 848 if (max_bitdepth + 1 > level_max_bitdepth) { 849 // force no group RCTs if we don't have a spare bit 850 cparams_.colorspace = 0; 851 } 852 JXL_ASSERT(max_bitdepth <= level_max_bitdepth); 853 854 if (!cparams_.ModularPartIsLossless()) { 855 quants_.resize(gi.channel.size(), 1); 856 float quantizer = 0.25f; 857 if (!cparams_.responsive) { 858 JXL_DEBUG_V(1, 859 "Warning: lossy compression without Squeeze " 860 "transform is just color quantization."); 861 quantizer *= 0.1f; 862 } 863 float bitdepth_correction = 1.f; 864 if (cparams_.color_transform != ColorTransform::kXYB) { 865 bitdepth_correction = maxval / 255.f; 866 } 867 std::vector<float> quantizers; 868 for (size_t i = 0; i < 3; i++) { 869 float dist = cparams_.butteraugli_distance; 870 quantizers.push_back(quantizer * dist * bitdepth_correction); 871 } 872 for (size_t i = 0; i < extra_channels.size(); i++) { 873 int ec_bitdepth = 874 metadata.extra_channel_info[i].bit_depth.bits_per_sample; 875 pixel_type ec_maxval = ec_bitdepth < 32 ? (1u << ec_bitdepth) - 1 : 0; 876 bitdepth_correction = ec_maxval / 255.f; 877 float dist = 0; 878 if (i < cparams_.ec_distance.size()) dist = cparams_.ec_distance[i]; 879 if (dist < 0) dist = cparams_.butteraugli_distance; 880 quantizers.push_back(quantizer * dist * bitdepth_correction); 881 } 882 if (cparams_.options.nb_repeats == 0) { 883 return JXL_FAILURE("nb_repeats = 0 not supported with modular lossy!"); 884 } 885 for (uint32_t i = gi.nb_meta_channels; i < gi.channel.size(); i++) { 886 Channel& ch = gi.channel[i]; 887 int shift = ch.hshift + ch.vshift; // number of pixel halvings 888 if (shift > 16) shift = 16; 889 if (shift > 0) shift--; 890 int q; 891 // assuming default Squeeze here 892 int component = 893 (do_color ? 0 : 3) + ((i - gi.nb_meta_channels) % nb_chans); 894 // last 4 channels are final chroma residuals 895 if (nb_chans > 2 && i >= gi.channel.size() - 4 && cparams_.responsive) { 896 component = 1; 897 } 898 if (cparams_.color_transform == ColorTransform::kXYB && component < 3) { 899 q = quantizers[component] * squeeze_quality_factor_xyb * 900 squeeze_xyb_qtable[component][shift]; 901 } else { 902 if (cparams_.colorspace != 0 && component > 0 && component < 3) { 903 q = quantizers[component] * squeeze_quality_factor * 904 squeeze_chroma_qtable[shift]; 905 } else { 906 q = quantizers[component] * squeeze_quality_factor * 907 squeeze_luma_factor * squeeze_luma_qtable[shift]; 908 } 909 } 910 if (q < 1) q = 1; 911 QuantizeChannel(gi.channel[i], q); 912 quants_[i] = q; 913 } 914 } 915 916 // Fill other groups. 917 // DC 918 for (size_t group_id = 0; group_id < patch_dim.num_dc_groups; group_id++) { 919 const size_t rgx = group_id % patch_dim.xsize_dc_groups; 920 const size_t rgy = group_id / patch_dim.xsize_dc_groups; 921 const Rect rect(rgx * patch_dim.dc_group_dim, rgy * patch_dim.dc_group_dim, 922 patch_dim.dc_group_dim, patch_dim.dc_group_dim); 923 size_t gx = rgx + frame_area_rect.x0() / 2048; 924 size_t gy = rgy + frame_area_rect.y0() / 2048; 925 size_t real_group_id = gy * frame_dim_.xsize_dc_groups + gx; 926 // minShift==3 because (frame_dim.dc_group_dim >> 3) == frame_dim.group_dim 927 // maxShift==1000 is infinity 928 stream_params_.push_back( 929 GroupParams{rect, 3, 1000, ModularStreamId::ModularDC(real_group_id)}); 930 } 931 // AC global -> nothing. 932 // AC 933 for (size_t group_id = 0; group_id < patch_dim.num_groups; group_id++) { 934 const size_t rgx = group_id % patch_dim.xsize_groups; 935 const size_t rgy = group_id / patch_dim.xsize_groups; 936 const Rect mrect(rgx * patch_dim.group_dim, rgy * patch_dim.group_dim, 937 patch_dim.group_dim, patch_dim.group_dim); 938 size_t gx = rgx + frame_area_rect.x0() / (frame_dim_.group_dim); 939 size_t gy = rgy + frame_area_rect.y0() / (frame_dim_.group_dim); 940 size_t real_group_id = gy * frame_dim_.xsize_groups + gx; 941 for (size_t i = 0; i < enc_state->progressive_splitter.GetNumPasses(); 942 i++) { 943 int maxShift; 944 int minShift; 945 frame_header.passes.GetDownsamplingBracket(i, minShift, maxShift); 946 stream_params_.push_back( 947 GroupParams{mrect, minShift, maxShift, 948 ModularStreamId::ModularAC(real_group_id, i)}); 949 } 950 } 951 // if there's only one group, everything ends up in GlobalModular 952 // in that case, also try RCTs/WP params for the one group 953 if (stream_params_.size() == 2) { 954 stream_params_.push_back(GroupParams{Rect(0, 0, xsize, ysize), 0, 1000, 955 ModularStreamId::Global()}); 956 } 957 gi_channel_.resize(stream_images_.size()); 958 959 JXL_RETURN_IF_ERROR(RunOnPool( 960 pool, 0, stream_params_.size(), ThreadPool::NoInit, 961 [&](const uint32_t i, size_t /* thread */) { 962 size_t stream = stream_params_[i].id.ID(frame_dim_); 963 stream_options_[stream] = stream_options_[0]; 964 JXL_CHECK(PrepareStreamParams( 965 stream_params_[i].rect, cparams_, stream_params_[i].minShift, 966 stream_params_[i].maxShift, stream_params_[i].id, do_color, 967 groupwise)); 968 }, 969 "ChooseParams")); 970 { 971 // Clear out channels that have been copied to groups. 972 Image& full_image = stream_images_[0]; 973 size_t c = full_image.nb_meta_channels; 974 for (; c < full_image.channel.size(); c++) { 975 Channel& fc = full_image.channel[c]; 976 if (fc.w > frame_dim_.group_dim || fc.h > frame_dim_.group_dim) break; 977 } 978 for (; c < full_image.channel.size(); c++) { 979 full_image.channel[c].plane = ImageI(); 980 } 981 } 982 983 JXL_RETURN_IF_ERROR(ValidateChannelDimensions(gi, stream_options_[0])); 984 return true; 985 } 986 987 Status ModularFrameEncoder::ComputeTree(ThreadPool* pool) { 988 std::vector<ModularMultiplierInfo> multiplier_info; 989 if (!quants_.empty()) { 990 for (uint32_t stream_id = 0; stream_id < stream_images_.size(); 991 stream_id++) { 992 // skip non-modular stream_ids 993 if (stream_id > 0 && gi_channel_[stream_id].empty()) continue; 994 const Image& image = stream_images_[stream_id]; 995 const ModularOptions& options = stream_options_[stream_id]; 996 for (uint32_t i = image.nb_meta_channels; i < image.channel.size(); i++) { 997 if (i >= image.nb_meta_channels && 998 (image.channel[i].w > options.max_chan_size || 999 image.channel[i].h > options.max_chan_size)) { 1000 continue; 1001 } 1002 if (stream_id > 0 && gi_channel_[stream_id].empty()) continue; 1003 size_t ch_id = stream_id == 0 1004 ? i 1005 : gi_channel_[stream_id][i - image.nb_meta_channels]; 1006 uint32_t q = quants_[ch_id]; 1007 // Inform the tree splitting heuristics that each channel in each group 1008 // used this quantization factor. This will produce a tree with the 1009 // given multipliers. 1010 if (multiplier_info.empty() || 1011 multiplier_info.back().range[1][0] != stream_id || 1012 multiplier_info.back().multiplier != q) { 1013 StaticPropRange range; 1014 range[0] = {{i, i + 1}}; 1015 range[1] = {{stream_id, stream_id + 1}}; 1016 multiplier_info.push_back({range, static_cast<uint32_t>(q)}); 1017 } else { 1018 // Previous channel in the same group had the same quantization 1019 // factor. Don't provide two different ranges, as that creates 1020 // unnecessary nodes. 1021 multiplier_info.back().range[0][1] = i + 1; 1022 } 1023 } 1024 } 1025 // Merge group+channel settings that have the same channels and quantization 1026 // factors, to avoid unnecessary nodes. 1027 std::sort(multiplier_info.begin(), multiplier_info.end(), 1028 [](ModularMultiplierInfo a, ModularMultiplierInfo b) { 1029 return std::make_tuple(a.range, a.multiplier) < 1030 std::make_tuple(b.range, b.multiplier); 1031 }); 1032 size_t new_num = 1; 1033 for (size_t i = 1; i < multiplier_info.size(); i++) { 1034 ModularMultiplierInfo& prev = multiplier_info[new_num - 1]; 1035 ModularMultiplierInfo& cur = multiplier_info[i]; 1036 if (prev.range[0] == cur.range[0] && prev.multiplier == cur.multiplier && 1037 prev.range[1][1] == cur.range[1][0]) { 1038 prev.range[1][1] = cur.range[1][1]; 1039 } else { 1040 multiplier_info[new_num++] = multiplier_info[i]; 1041 } 1042 } 1043 multiplier_info.resize(new_num); 1044 } 1045 1046 if (!cparams_.custom_fixed_tree.empty()) { 1047 tree_ = cparams_.custom_fixed_tree; 1048 } else if (cparams_.speed_tier < SpeedTier::kFalcon || 1049 !cparams_.modular_mode) { 1050 // Avoid creating a tree with leaves that don't correspond to any pixels. 1051 std::vector<size_t> useful_splits; 1052 useful_splits.reserve(tree_splits_.size()); 1053 for (size_t chunk = 0; chunk < tree_splits_.size() - 1; chunk++) { 1054 bool has_pixels = false; 1055 size_t start = tree_splits_[chunk]; 1056 size_t stop = tree_splits_[chunk + 1]; 1057 for (size_t i = start; i < stop; i++) { 1058 if (!stream_images_[i].empty()) has_pixels = true; 1059 } 1060 if (has_pixels) { 1061 useful_splits.push_back(tree_splits_[chunk]); 1062 } 1063 } 1064 // Don't do anything if modular mode does not have any pixels in this image 1065 if (useful_splits.empty()) return true; 1066 useful_splits.push_back(tree_splits_.back()); 1067 1068 std::atomic_flag invalid_force_wp = ATOMIC_FLAG_INIT; 1069 1070 std::vector<Tree> trees(useful_splits.size() - 1); 1071 JXL_RETURN_IF_ERROR(RunOnPool( 1072 pool, 0, useful_splits.size() - 1, ThreadPool::NoInit, 1073 [&](const uint32_t chunk, size_t /* thread */) { 1074 // TODO(veluca): parallelize more. 1075 size_t total_pixels = 0; 1076 uint32_t start = useful_splits[chunk]; 1077 uint32_t stop = useful_splits[chunk + 1]; 1078 while (start < stop && stream_images_[start].empty()) ++start; 1079 while (start < stop && stream_images_[stop - 1].empty()) --stop; 1080 if (stream_options_[start].tree_kind != 1081 ModularOptions::TreeKind::kLearn) { 1082 for (size_t i = start; i < stop; i++) { 1083 for (const Channel& ch : stream_images_[i].channel) { 1084 total_pixels += ch.w * ch.h; 1085 } 1086 } 1087 trees[chunk] = 1088 PredefinedTree(stream_options_[start].tree_kind, total_pixels); 1089 return; 1090 } 1091 TreeSamples tree_samples; 1092 if (!tree_samples.SetPredictor(stream_options_[start].predictor, 1093 stream_options_[start].wp_tree_mode)) { 1094 invalid_force_wp.test_and_set(std::memory_order_acq_rel); 1095 return; 1096 } 1097 if (!tree_samples.SetProperties( 1098 stream_options_[start].splitting_heuristics_properties, 1099 stream_options_[start].wp_tree_mode)) { 1100 invalid_force_wp.test_and_set(std::memory_order_acq_rel); 1101 return; 1102 } 1103 uint32_t max_c = 0; 1104 std::vector<pixel_type> pixel_samples; 1105 std::vector<pixel_type> diff_samples; 1106 std::vector<uint32_t> group_pixel_count; 1107 std::vector<uint32_t> channel_pixel_count; 1108 for (size_t i = start; i < stop; i++) { 1109 max_c = std::max<uint32_t>(stream_images_[i].channel.size(), max_c); 1110 CollectPixelSamples(stream_images_[i], stream_options_[i], i, 1111 group_pixel_count, channel_pixel_count, 1112 pixel_samples, diff_samples); 1113 } 1114 StaticPropRange range; 1115 range[0] = {{0, max_c}}; 1116 range[1] = {{start, stop}}; 1117 1118 tree_samples.PreQuantizeProperties( 1119 range, multiplier_info, group_pixel_count, channel_pixel_count, 1120 pixel_samples, diff_samples, 1121 stream_options_[start].max_property_values); 1122 for (size_t i = start; i < stop; i++) { 1123 JXL_CHECK(ModularGenericCompress( 1124 stream_images_[i], stream_options_[i], /*writer=*/nullptr, 1125 /*aux_out=*/nullptr, 0, i, &tree_samples, &total_pixels)); 1126 } 1127 1128 // TODO(veluca): parallelize more. 1129 trees[chunk] = 1130 LearnTree(std::move(tree_samples), total_pixels, 1131 stream_options_[start], multiplier_info, range); 1132 }, 1133 "LearnTrees")); 1134 if (invalid_force_wp.test_and_set(std::memory_order_acq_rel)) { 1135 return JXL_FAILURE("PrepareEncoding: force_no_wp with {Weighted}"); 1136 } 1137 tree_.clear(); 1138 MergeTrees(trees, useful_splits, 0, useful_splits.size() - 1, &tree_); 1139 } else { 1140 // Fixed tree. 1141 size_t total_pixels = 0; 1142 for (const Image& img : stream_images_) { 1143 for (const Channel& ch : img.channel) { 1144 total_pixels += ch.w * ch.h; 1145 } 1146 } 1147 if (cparams_.speed_tier <= SpeedTier::kFalcon) { 1148 tree_ = 1149 PredefinedTree(ModularOptions::TreeKind::kWPFixedDC, total_pixels); 1150 } else if (cparams_.speed_tier <= SpeedTier::kThunder) { 1151 tree_ = PredefinedTree(ModularOptions::TreeKind::kGradientFixedDC, 1152 total_pixels); 1153 } else { 1154 tree_ = {PropertyDecisionNode::Leaf(Predictor::Gradient)}; 1155 } 1156 } 1157 tree_tokens_.resize(1); 1158 tree_tokens_[0].clear(); 1159 Tree decoded_tree; 1160 TokenizeTree(tree_, tree_tokens_.data(), &decoded_tree); 1161 JXL_ASSERT(tree_.size() == decoded_tree.size()); 1162 tree_ = std::move(decoded_tree); 1163 1164 /* TODO(szabadka) Add text output callback to cparams 1165 if (kPrintTree && WantDebugOutput(aux_out)) { 1166 if (frame_header.dc_level > 0) { 1167 PrintTree(tree_, aux_out->debug_prefix + "/dc_frame_level" + 1168 std::to_string(frame_header.dc_level) + "_tree"); 1169 } else { 1170 PrintTree(tree_, aux_out->debug_prefix + "/global_tree"); 1171 } 1172 } */ 1173 return true; 1174 } 1175 1176 Status ModularFrameEncoder::ComputeTokens(ThreadPool* pool) { 1177 size_t num_streams = stream_images_.size(); 1178 stream_headers_.resize(num_streams); 1179 tokens_.resize(num_streams); 1180 image_widths_.resize(num_streams); 1181 JXL_RETURN_IF_ERROR(RunOnPool( 1182 pool, 0, num_streams, ThreadPool::NoInit, 1183 [&](const uint32_t stream_id, size_t /* thread */) { 1184 AuxOut my_aux_out; 1185 tokens_[stream_id].clear(); 1186 JXL_CHECK(ModularGenericCompress( 1187 stream_images_[stream_id], stream_options_[stream_id], 1188 /*writer=*/nullptr, &my_aux_out, 0, stream_id, 1189 /*tree_samples=*/nullptr, 1190 /*total_pixels=*/nullptr, 1191 /*tree=*/&tree_, /*header=*/&stream_headers_[stream_id], 1192 /*tokens=*/&tokens_[stream_id], 1193 /*widths=*/&image_widths_[stream_id])); 1194 }, 1195 "ComputeTokens")); 1196 return true; 1197 } 1198 1199 Status ModularFrameEncoder::EncodeGlobalInfo(bool streaming_mode, 1200 BitWriter* writer, 1201 AuxOut* aux_out) { 1202 BitWriter::Allotment allotment(writer, 1); 1203 // If we are using brotli, or not using modular mode. 1204 if (tree_tokens_.empty() || tree_tokens_[0].empty()) { 1205 writer->Write(1, 0); 1206 allotment.ReclaimAndCharge(writer, kLayerModularTree, aux_out); 1207 return true; 1208 } 1209 writer->Write(1, 1); 1210 allotment.ReclaimAndCharge(writer, kLayerModularTree, aux_out); 1211 1212 // Write tree 1213 HistogramParams params = 1214 HistogramParams::ForModular(cparams_, extra_dc_precision, streaming_mode); 1215 { 1216 EntropyEncodingData tree_code; 1217 std::vector<uint8_t> tree_context_map; 1218 BuildAndEncodeHistograms(params, kNumTreeContexts, tree_tokens_, &tree_code, 1219 &tree_context_map, writer, kLayerModularTree, 1220 aux_out); 1221 WriteTokens(tree_tokens_[0], tree_code, tree_context_map, 0, writer, 1222 kLayerModularTree, aux_out); 1223 } 1224 params.streaming_mode = streaming_mode; 1225 params.add_missing_symbols = streaming_mode; 1226 params.image_widths = image_widths_; 1227 // Write histograms. 1228 BuildAndEncodeHistograms(params, (tree_.size() + 1) / 2, tokens_, &code_, 1229 &context_map_, writer, kLayerModularGlobal, aux_out); 1230 return true; 1231 } 1232 1233 Status ModularFrameEncoder::EncodeStream(BitWriter* writer, AuxOut* aux_out, 1234 size_t layer, 1235 const ModularStreamId& stream) { 1236 size_t stream_id = stream.ID(frame_dim_); 1237 if (stream_images_[stream_id].channel.empty()) { 1238 JXL_DEBUG_V(10, "Modular stream %" PRIuS " is empty.", stream_id); 1239 return true; // Image with no channels, header never gets decoded. 1240 } 1241 if (tokens_.empty()) { 1242 JXL_RETURN_IF_ERROR(ModularGenericCompress( 1243 stream_images_[stream_id], stream_options_[stream_id], writer, aux_out, 1244 layer, stream_id)); 1245 } else { 1246 JXL_RETURN_IF_ERROR( 1247 Bundle::Write(stream_headers_[stream_id], writer, layer, aux_out)); 1248 WriteTokens(tokens_[stream_id], code_, context_map_, 0, writer, layer, 1249 aux_out); 1250 } 1251 return true; 1252 } 1253 1254 void ModularFrameEncoder::ClearStreamData(const ModularStreamId& stream) { 1255 size_t stream_id = stream.ID(frame_dim_); 1256 Image empty_image; 1257 std::swap(stream_images_[stream_id], empty_image); 1258 } 1259 1260 void ModularFrameEncoder::ClearModularStreamData() { 1261 for (const auto& group : stream_params_) { 1262 ClearStreamData(group.id); 1263 } 1264 stream_params_.clear(); 1265 } 1266 1267 size_t ModularFrameEncoder::ComputeStreamingAbsoluteAcGroupId( 1268 size_t dc_group_id, size_t ac_group_id, 1269 const FrameDimensions& patch_dim) const { 1270 size_t dc_group_x = dc_group_id % frame_dim_.xsize_dc_groups; 1271 size_t dc_group_y = dc_group_id / frame_dim_.xsize_dc_groups; 1272 size_t ac_group_x = ac_group_id % patch_dim.xsize_groups; 1273 size_t ac_group_y = ac_group_id / patch_dim.xsize_groups; 1274 return (dc_group_x * 8 + ac_group_x) + 1275 (dc_group_y * 8 + ac_group_y) * frame_dim_.xsize_groups; 1276 } 1277 1278 Status ModularFrameEncoder::PrepareStreamParams(const Rect& rect, 1279 const CompressParams& cparams_, 1280 int minShift, int maxShift, 1281 const ModularStreamId& stream, 1282 bool do_color, bool groupwise) { 1283 size_t stream_id = stream.ID(frame_dim_); 1284 Image& full_image = stream_images_[0]; 1285 const size_t xsize = rect.xsize(); 1286 const size_t ysize = rect.ysize(); 1287 Image& gi = stream_images_[stream_id]; 1288 if (stream_id > 0) { 1289 JXL_ASSIGN_OR_RETURN(gi, 1290 Image::Create(xsize, ysize, full_image.bitdepth, 0)); 1291 // start at the first bigger-than-frame_dim.group_dim non-metachannel 1292 size_t c = full_image.nb_meta_channels; 1293 if (!groupwise) { 1294 for (; c < full_image.channel.size(); c++) { 1295 Channel& fc = full_image.channel[c]; 1296 if (fc.w > frame_dim_.group_dim || fc.h > frame_dim_.group_dim) break; 1297 } 1298 } 1299 for (; c < full_image.channel.size(); c++) { 1300 Channel& fc = full_image.channel[c]; 1301 int shift = std::min(fc.hshift, fc.vshift); 1302 if (shift > maxShift) continue; 1303 if (shift < minShift) continue; 1304 Rect r(rect.x0() >> fc.hshift, rect.y0() >> fc.vshift, 1305 rect.xsize() >> fc.hshift, rect.ysize() >> fc.vshift, fc.w, fc.h); 1306 if (r.xsize() == 0 || r.ysize() == 0) continue; 1307 gi_channel_[stream_id].push_back(c); 1308 JXL_ASSIGN_OR_RETURN(Channel gc, Channel::Create(r.xsize(), r.ysize())); 1309 gc.hshift = fc.hshift; 1310 gc.vshift = fc.vshift; 1311 for (size_t y = 0; y < r.ysize(); ++y) { 1312 memcpy(gc.Row(y), r.ConstRow(fc.plane, y), 1313 r.xsize() * sizeof(pixel_type)); 1314 } 1315 gi.channel.emplace_back(std::move(gc)); 1316 } 1317 1318 if (gi.channel.empty()) return true; 1319 // Do some per-group transforms 1320 1321 // Local palette 1322 // TODO(veluca): make this work with quantize-after-prediction in lossy 1323 // mode. 1324 if (cparams_.butteraugli_distance == 0.f && cparams_.palette_colors != 0 && 1325 cparams_.speed_tier < SpeedTier::kCheetah) { 1326 // all-channel palette (e.g. RGBA) 1327 if (gi.channel.size() - gi.nb_meta_channels > 1) { 1328 Transform maybe_palette(TransformId::kPalette); 1329 maybe_palette.begin_c = gi.nb_meta_channels; 1330 maybe_palette.num_c = gi.channel.size() - gi.nb_meta_channels; 1331 maybe_palette.nb_colors = std::abs(cparams_.palette_colors); 1332 maybe_palette.ordered_palette = cparams_.palette_colors >= 0; 1333 maybe_do_transform(gi, maybe_palette, cparams_, weighted::Header()); 1334 } 1335 // all-minus-one-channel palette (RGB with separate alpha, or CMY with 1336 // separate K) 1337 if (gi.channel.size() - gi.nb_meta_channels > 3) { 1338 Transform maybe_palette_3(TransformId::kPalette); 1339 maybe_palette_3.begin_c = gi.nb_meta_channels; 1340 maybe_palette_3.num_c = gi.channel.size() - gi.nb_meta_channels - 1; 1341 maybe_palette_3.nb_colors = std::abs(cparams_.palette_colors); 1342 maybe_palette_3.ordered_palette = cparams_.palette_colors >= 0; 1343 maybe_palette_3.lossy_palette = cparams_.lossy_palette; 1344 if (maybe_palette_3.lossy_palette) { 1345 maybe_palette_3.predictor = Predictor::Weighted; 1346 } 1347 maybe_do_transform(gi, maybe_palette_3, cparams_, weighted::Header()); 1348 } 1349 } 1350 1351 // Local channel palette 1352 if (cparams_.channel_colors_percent > 0 && 1353 cparams_.butteraugli_distance == 0.f && !cparams_.lossy_palette && 1354 cparams_.speed_tier < SpeedTier::kCheetah && 1355 !(cparams_.responsive && cparams_.decoding_speed_tier >= 1)) { 1356 // single channel palette (like FLIF's ChannelCompact) 1357 size_t nb_channels = gi.channel.size() - gi.nb_meta_channels; 1358 for (size_t i = 0; i < nb_channels; i++) { 1359 int32_t min; 1360 int32_t max; 1361 compute_minmax(gi.channel[gi.nb_meta_channels + i], &min, &max); 1362 int64_t colors = static_cast<int64_t>(max) - min + 1; 1363 JXL_DEBUG_V(10, "Channel %" PRIuS ": range=%i..%i", i, min, max); 1364 Transform maybe_palette_1(TransformId::kPalette); 1365 maybe_palette_1.begin_c = i + gi.nb_meta_channels; 1366 maybe_palette_1.num_c = 1; 1367 // simple heuristic: if less than X percent of the values in the range 1368 // actually occur, it is probably worth it to do a compaction 1369 // (but only if the channel palette is less than 80% the size of the 1370 // image itself) 1371 maybe_palette_1.nb_colors = std::min( 1372 static_cast<int>(xsize * ysize * 0.8), 1373 static_cast<int>(cparams_.channel_colors_percent / 100. * colors)); 1374 maybe_do_transform(gi, maybe_palette_1, cparams_, weighted::Header()); 1375 } 1376 } 1377 } 1378 1379 // lossless and no specific color transform specified: try Nothing, YCoCg, 1380 // and 17 RCTs 1381 if (cparams_.color_transform == ColorTransform::kNone && 1382 cparams_.IsLossless() && cparams_.colorspace < 0 && 1383 gi.channel.size() - gi.nb_meta_channels >= 3 && 1384 cparams_.responsive == JXL_FALSE && do_color && 1385 cparams_.speed_tier <= SpeedTier::kHare) { 1386 Transform sg(TransformId::kRCT); 1387 sg.begin_c = gi.nb_meta_channels; 1388 size_t nb_rcts_to_try = 0; 1389 switch (cparams_.speed_tier) { 1390 case SpeedTier::kLightning: 1391 case SpeedTier::kThunder: 1392 case SpeedTier::kFalcon: 1393 case SpeedTier::kCheetah: 1394 nb_rcts_to_try = 0; // Just do global YCoCg 1395 break; 1396 case SpeedTier::kHare: 1397 nb_rcts_to_try = 4; 1398 break; 1399 case SpeedTier::kWombat: 1400 nb_rcts_to_try = 5; 1401 break; 1402 case SpeedTier::kSquirrel: 1403 nb_rcts_to_try = 7; 1404 break; 1405 case SpeedTier::kKitten: 1406 nb_rcts_to_try = 9; 1407 break; 1408 case SpeedTier::kTectonicPlate: 1409 case SpeedTier::kGlacier: 1410 case SpeedTier::kTortoise: 1411 nb_rcts_to_try = 19; 1412 break; 1413 } 1414 float best_cost = std::numeric_limits<float>::max(); 1415 size_t best_rct = 0; 1416 // These should be 19 actually different transforms; the remaining ones 1417 // are equivalent to one of these (note that the first two are do-nothing 1418 // and YCoCg) modulo channel reordering (which only matters in the case of 1419 // MA-with-prev-channels-properties) and/or sign (e.g. RmG vs GmR) 1420 for (int i : {0 * 7 + 0, 0 * 7 + 6, 0 * 7 + 5, 1 * 7 + 3, 3 * 7 + 5, 1421 5 * 7 + 5, 1 * 7 + 5, 2 * 7 + 5, 1 * 7 + 1, 0 * 7 + 4, 1422 1 * 7 + 2, 2 * 7 + 1, 2 * 7 + 2, 2 * 7 + 3, 4 * 7 + 4, 1423 4 * 7 + 5, 0 * 7 + 2, 0 * 7 + 1, 0 * 7 + 3}) { 1424 if (nb_rcts_to_try == 0) break; 1425 sg.rct_type = i; 1426 nb_rcts_to_try--; 1427 if (do_transform(gi, sg, weighted::Header())) { 1428 float cost = EstimateCost(gi); 1429 if (cost < best_cost) { 1430 best_rct = i; 1431 best_cost = cost; 1432 } 1433 Transform t = gi.transform.back(); 1434 JXL_RETURN_IF_ERROR(t.Inverse(gi, weighted::Header(), nullptr)); 1435 gi.transform.pop_back(); 1436 } 1437 } 1438 // Apply the best RCT to the image for future encoding. 1439 sg.rct_type = best_rct; 1440 do_transform(gi, sg, weighted::Header()); 1441 } else { 1442 // No need to try anything, just use the default options. 1443 } 1444 size_t nb_wp_modes = 1; 1445 if (cparams_.speed_tier <= SpeedTier::kTortoise) { 1446 nb_wp_modes = 5; 1447 } else if (cparams_.speed_tier <= SpeedTier::kKitten) { 1448 nb_wp_modes = 2; 1449 } 1450 if (nb_wp_modes > 1 && 1451 (stream_options_[stream_id].predictor == Predictor::Weighted || 1452 stream_options_[stream_id].predictor == Predictor::Best || 1453 stream_options_[stream_id].predictor == Predictor::Variable)) { 1454 float best_cost = std::numeric_limits<float>::max(); 1455 stream_options_[stream_id].wp_mode = 0; 1456 for (size_t i = 0; i < nb_wp_modes; i++) { 1457 float cost = EstimateWPCost(gi, i); 1458 if (cost < best_cost) { 1459 best_cost = cost; 1460 stream_options_[stream_id].wp_mode = i; 1461 } 1462 } 1463 } 1464 return true; 1465 } 1466 1467 constexpr float q_deadzone = 0.62f; 1468 int QuantizeWP(const int32_t* qrow, size_t onerow, size_t c, size_t x, size_t y, 1469 size_t w, weighted::State* wp_state, float value, 1470 float inv_factor) { 1471 float svalue = value * inv_factor; 1472 PredictionResult pred = 1473 PredictNoTreeWP(w, qrow + x, onerow, x, y, Predictor::Weighted, wp_state); 1474 svalue -= pred.guess; 1475 if (svalue > -q_deadzone && svalue < q_deadzone) svalue = 0; 1476 int residual = roundf(svalue); 1477 if (residual > 2 || residual < -2) residual = roundf(svalue * 0.5) * 2; 1478 return residual + pred.guess; 1479 } 1480 1481 int QuantizeGradient(const int32_t* qrow, size_t onerow, size_t c, size_t x, 1482 size_t y, size_t w, float value, float inv_factor) { 1483 float svalue = value * inv_factor; 1484 PredictionResult pred = 1485 PredictNoTreeNoWP(w, qrow + x, onerow, x, y, Predictor::Gradient); 1486 svalue -= pred.guess; 1487 if (svalue > -q_deadzone && svalue < q_deadzone) svalue = 0; 1488 int residual = roundf(svalue); 1489 if (residual > 2 || residual < -2) residual = roundf(svalue * 0.5) * 2; 1490 return residual + pred.guess; 1491 } 1492 1493 Status ModularFrameEncoder::AddVarDCTDC(const FrameHeader& frame_header, 1494 const Image3F& dc, const Rect& r, 1495 size_t group_index, bool nl_dc, 1496 PassesEncoderState* enc_state, 1497 bool jpeg_transcode) { 1498 extra_dc_precision[group_index] = nl_dc ? 1 : 0; 1499 float mul = 1 << extra_dc_precision[group_index]; 1500 1501 size_t stream_id = ModularStreamId::VarDCTDC(group_index).ID(frame_dim_); 1502 stream_options_[stream_id].max_chan_size = 0xFFFFFF; 1503 stream_options_[stream_id].predictor = Predictor::Weighted; 1504 stream_options_[stream_id].wp_tree_mode = ModularOptions::TreeMode::kWPOnly; 1505 if (cparams_.speed_tier >= SpeedTier::kSquirrel) { 1506 stream_options_[stream_id].tree_kind = ModularOptions::TreeKind::kWPFixedDC; 1507 } 1508 if (cparams_.speed_tier < SpeedTier::kSquirrel && !nl_dc) { 1509 stream_options_[stream_id].predictor = 1510 (cparams_.speed_tier < SpeedTier::kKitten ? Predictor::Variable 1511 : Predictor::Best); 1512 stream_options_[stream_id].wp_tree_mode = 1513 ModularOptions::TreeMode::kDefault; 1514 stream_options_[stream_id].tree_kind = ModularOptions::TreeKind::kLearn; 1515 } 1516 if (cparams_.decoding_speed_tier >= 1) { 1517 stream_options_[stream_id].tree_kind = 1518 ModularOptions::TreeKind::kGradientFixedDC; 1519 } 1520 stream_options_[stream_id].histogram_params = 1521 stream_options_[0].histogram_params; 1522 1523 JXL_ASSIGN_OR_RETURN(stream_images_[stream_id], 1524 Image::Create(r.xsize(), r.ysize(), 8, 3)); 1525 if (nl_dc && stream_options_[stream_id].tree_kind == 1526 ModularOptions::TreeKind::kGradientFixedDC) { 1527 JXL_ASSERT(frame_header.chroma_subsampling.Is444()); 1528 for (size_t c : {1, 0, 2}) { 1529 float inv_factor = enc_state->shared.quantizer.GetInvDcStep(c) * mul; 1530 float y_factor = enc_state->shared.quantizer.GetDcStep(1) / mul; 1531 float cfl_factor = enc_state->shared.cmap.DCFactors()[c]; 1532 for (size_t y = 0; y < r.ysize(); y++) { 1533 int32_t* quant_row = 1534 stream_images_[stream_id].channel[c < 2 ? c ^ 1 : c].plane.Row(y); 1535 size_t stride = stream_images_[stream_id] 1536 .channel[c < 2 ? c ^ 1 : c] 1537 .plane.PixelsPerRow(); 1538 const float* row = r.ConstPlaneRow(dc, c, y); 1539 if (c == 1) { 1540 for (size_t x = 0; x < r.xsize(); x++) { 1541 quant_row[x] = QuantizeGradient(quant_row, stride, c, x, y, 1542 r.xsize(), row[x], inv_factor); 1543 } 1544 } else { 1545 int32_t* quant_row_y = 1546 stream_images_[stream_id].channel[0].plane.Row(y); 1547 for (size_t x = 0; x < r.xsize(); x++) { 1548 quant_row[x] = QuantizeGradient( 1549 quant_row, stride, c, x, y, r.xsize(), 1550 row[x] - quant_row_y[x] * (y_factor * cfl_factor), inv_factor); 1551 } 1552 } 1553 } 1554 } 1555 } else if (nl_dc) { 1556 JXL_ASSERT(frame_header.chroma_subsampling.Is444()); 1557 for (size_t c : {1, 0, 2}) { 1558 float inv_factor = enc_state->shared.quantizer.GetInvDcStep(c) * mul; 1559 float y_factor = enc_state->shared.quantizer.GetDcStep(1) / mul; 1560 float cfl_factor = enc_state->shared.cmap.DCFactors()[c]; 1561 weighted::Header header; 1562 weighted::State wp_state(header, r.xsize(), r.ysize()); 1563 for (size_t y = 0; y < r.ysize(); y++) { 1564 int32_t* quant_row = 1565 stream_images_[stream_id].channel[c < 2 ? c ^ 1 : c].plane.Row(y); 1566 size_t stride = stream_images_[stream_id] 1567 .channel[c < 2 ? c ^ 1 : c] 1568 .plane.PixelsPerRow(); 1569 const float* row = r.ConstPlaneRow(dc, c, y); 1570 if (c == 1) { 1571 for (size_t x = 0; x < r.xsize(); x++) { 1572 quant_row[x] = QuantizeWP(quant_row, stride, c, x, y, r.xsize(), 1573 &wp_state, row[x], inv_factor); 1574 wp_state.UpdateErrors(quant_row[x], x, y, r.xsize()); 1575 } 1576 } else { 1577 int32_t* quant_row_y = 1578 stream_images_[stream_id].channel[0].plane.Row(y); 1579 for (size_t x = 0; x < r.xsize(); x++) { 1580 quant_row[x] = QuantizeWP( 1581 quant_row, stride, c, x, y, r.xsize(), &wp_state, 1582 row[x] - quant_row_y[x] * (y_factor * cfl_factor), inv_factor); 1583 wp_state.UpdateErrors(quant_row[x], x, y, r.xsize()); 1584 } 1585 } 1586 } 1587 } 1588 } else if (frame_header.chroma_subsampling.Is444()) { 1589 for (size_t c : {1, 0, 2}) { 1590 float inv_factor = enc_state->shared.quantizer.GetInvDcStep(c) * mul; 1591 float y_factor = enc_state->shared.quantizer.GetDcStep(1) / mul; 1592 float cfl_factor = enc_state->shared.cmap.DCFactors()[c]; 1593 for (size_t y = 0; y < r.ysize(); y++) { 1594 int32_t* quant_row = 1595 stream_images_[stream_id].channel[c < 2 ? c ^ 1 : c].plane.Row(y); 1596 const float* row = r.ConstPlaneRow(dc, c, y); 1597 if (c == 1) { 1598 for (size_t x = 0; x < r.xsize(); x++) { 1599 quant_row[x] = roundf(row[x] * inv_factor); 1600 } 1601 } else { 1602 int32_t* quant_row_y = 1603 stream_images_[stream_id].channel[0].plane.Row(y); 1604 for (size_t x = 0; x < r.xsize(); x++) { 1605 quant_row[x] = 1606 roundf((row[x] - quant_row_y[x] * (y_factor * cfl_factor)) * 1607 inv_factor); 1608 } 1609 } 1610 } 1611 } 1612 } else { 1613 for (size_t c : {1, 0, 2}) { 1614 Rect rect(r.x0() >> frame_header.chroma_subsampling.HShift(c), 1615 r.y0() >> frame_header.chroma_subsampling.VShift(c), 1616 r.xsize() >> frame_header.chroma_subsampling.HShift(c), 1617 r.ysize() >> frame_header.chroma_subsampling.VShift(c)); 1618 float inv_factor = enc_state->shared.quantizer.GetInvDcStep(c) * mul; 1619 size_t ys = rect.ysize(); 1620 size_t xs = rect.xsize(); 1621 Channel& ch = stream_images_[stream_id].channel[c < 2 ? c ^ 1 : c]; 1622 ch.w = xs; 1623 ch.h = ys; 1624 JXL_RETURN_IF_ERROR(ch.shrink()); 1625 for (size_t y = 0; y < ys; y++) { 1626 int32_t* quant_row = ch.plane.Row(y); 1627 const float* row = rect.ConstPlaneRow(dc, c, y); 1628 for (size_t x = 0; x < xs; x++) { 1629 quant_row[x] = roundf(row[x] * inv_factor); 1630 } 1631 } 1632 } 1633 } 1634 1635 DequantDC(r, &enc_state->shared.dc_storage, &enc_state->shared.quant_dc, 1636 stream_images_[stream_id], enc_state->shared.quantizer.MulDC(), 1637 1.0 / mul, enc_state->shared.cmap.DCFactors(), 1638 frame_header.chroma_subsampling, enc_state->shared.block_ctx_map); 1639 return true; 1640 } 1641 1642 Status ModularFrameEncoder::AddACMetadata(const Rect& r, size_t group_index, 1643 bool jpeg_transcode, 1644 PassesEncoderState* enc_state) { 1645 size_t stream_id = ModularStreamId::ACMetadata(group_index).ID(frame_dim_); 1646 stream_options_[stream_id].max_chan_size = 0xFFFFFF; 1647 if (stream_options_[stream_id].predictor != Predictor::Weighted) { 1648 stream_options_[stream_id].wp_tree_mode = ModularOptions::TreeMode::kNoWP; 1649 } 1650 if (jpeg_transcode) { 1651 stream_options_[stream_id].tree_kind = 1652 ModularOptions::TreeKind::kJpegTranscodeACMeta; 1653 } else if (cparams_.speed_tier >= SpeedTier::kFalcon) { 1654 stream_options_[stream_id].tree_kind = 1655 ModularOptions::TreeKind::kFalconACMeta; 1656 } else if (cparams_.speed_tier > SpeedTier::kKitten) { 1657 stream_options_[stream_id].tree_kind = ModularOptions::TreeKind::kACMeta; 1658 } 1659 // If we are using a non-constant CfL field, and are in a slow enough mode, 1660 // re-enable tree computation for it. 1661 if (cparams_.speed_tier < SpeedTier::kSquirrel && 1662 cparams_.force_cfl_jpeg_recompression) { 1663 stream_options_[stream_id].tree_kind = ModularOptions::TreeKind::kLearn; 1664 } 1665 stream_options_[stream_id].histogram_params = 1666 stream_options_[0].histogram_params; 1667 // YToX, YToB, ACS + QF, EPF 1668 Image& image = stream_images_[stream_id]; 1669 JXL_ASSIGN_OR_RETURN(image, Image::Create(r.xsize(), r.ysize(), 8, 4)); 1670 static_assert(kColorTileDimInBlocks == 8, "Color tile size changed"); 1671 Rect cr(r.x0() >> 3, r.y0() >> 3, (r.xsize() + 7) >> 3, (r.ysize() + 7) >> 3); 1672 JXL_ASSIGN_OR_RETURN(image.channel[0], 1673 Channel::Create(cr.xsize(), cr.ysize(), 3, 3)); 1674 JXL_ASSIGN_OR_RETURN(image.channel[1], 1675 Channel::Create(cr.xsize(), cr.ysize(), 3, 3)); 1676 JXL_ASSIGN_OR_RETURN(image.channel[2], 1677 Channel::Create(r.xsize() * r.ysize(), 2, 0, 0)); 1678 ConvertPlaneAndClamp(cr, enc_state->shared.cmap.ytox_map, 1679 Rect(image.channel[0].plane), &image.channel[0].plane); 1680 ConvertPlaneAndClamp(cr, enc_state->shared.cmap.ytob_map, 1681 Rect(image.channel[1].plane), &image.channel[1].plane); 1682 size_t num = 0; 1683 for (size_t y = 0; y < r.ysize(); y++) { 1684 AcStrategyRow row_acs = enc_state->shared.ac_strategy.ConstRow(r, y); 1685 const int32_t* row_qf = r.ConstRow(enc_state->shared.raw_quant_field, y); 1686 const uint8_t* row_epf = r.ConstRow(enc_state->shared.epf_sharpness, y); 1687 int32_t* out_acs = image.channel[2].plane.Row(0); 1688 int32_t* out_qf = image.channel[2].plane.Row(1); 1689 int32_t* row_out_epf = image.channel[3].plane.Row(y); 1690 for (size_t x = 0; x < r.xsize(); x++) { 1691 row_out_epf[x] = row_epf[x]; 1692 if (!row_acs[x].IsFirstBlock()) continue; 1693 out_acs[num] = row_acs[x].RawStrategy(); 1694 out_qf[num] = row_qf[x] - 1; 1695 num++; 1696 } 1697 } 1698 image.channel[2].w = num; 1699 ac_metadata_size[group_index] = num; 1700 return true; 1701 } 1702 1703 Status ModularFrameEncoder::EncodeQuantTable( 1704 size_t size_x, size_t size_y, BitWriter* writer, 1705 const QuantEncoding& encoding, size_t idx, 1706 ModularFrameEncoder* modular_frame_encoder) { 1707 JXL_ASSERT(encoding.qraw.qtable != nullptr); 1708 JXL_ASSERT(size_x * size_y * 3 == encoding.qraw.qtable->size()); 1709 JXL_CHECK(F16Coder::Write(encoding.qraw.qtable_den, writer)); 1710 if (modular_frame_encoder) { 1711 JXL_CHECK(modular_frame_encoder->EncodeStream( 1712 writer, nullptr, 0, ModularStreamId::QuantTable(idx))); 1713 return true; 1714 } 1715 JXL_ASSIGN_OR_RETURN(Image image, Image::Create(size_x, size_y, 8, 3)); 1716 for (size_t c = 0; c < 3; c++) { 1717 for (size_t y = 0; y < size_y; y++) { 1718 int32_t* JXL_RESTRICT row = image.channel[c].Row(y); 1719 for (size_t x = 0; x < size_x; x++) { 1720 row[x] = (*encoding.qraw.qtable)[c * size_x * size_y + y * size_x + x]; 1721 } 1722 } 1723 } 1724 ModularOptions cfopts; 1725 JXL_CHECK(ModularGenericCompress(image, cfopts, writer)); 1726 return true; 1727 } 1728 1729 Status ModularFrameEncoder::AddQuantTable(size_t size_x, size_t size_y, 1730 const QuantEncoding& encoding, 1731 size_t idx) { 1732 size_t stream_id = ModularStreamId::QuantTable(idx).ID(frame_dim_); 1733 JXL_ASSERT(encoding.qraw.qtable != nullptr); 1734 JXL_ASSERT(size_x * size_y * 3 == encoding.qraw.qtable->size()); 1735 Image& image = stream_images_[stream_id]; 1736 JXL_ASSIGN_OR_RETURN(image, Image::Create(size_x, size_y, 8, 3)); 1737 for (size_t c = 0; c < 3; c++) { 1738 for (size_t y = 0; y < size_y; y++) { 1739 int32_t* JXL_RESTRICT row = image.channel[c].Row(y); 1740 for (size_t x = 0; x < size_x; x++) { 1741 row[x] = (*encoding.qraw.qtable)[c * size_x * size_y + y * size_x + x]; 1742 } 1743 } 1744 } 1745 return true; 1746 } 1747 } // namespace jxl