dec_cache.h (8933B)
1 // Copyright (c) the JPEG XL Project Authors. All rights reserved. 2 // 3 // Use of this source code is governed by a BSD-style 4 // license that can be found in the LICENSE file. 5 6 #ifndef LIB_JXL_DEC_CACHE_H_ 7 #define LIB_JXL_DEC_CACHE_H_ 8 9 #include <jxl/decode.h> 10 #include <jxl/types.h> 11 #include <stdint.h> 12 13 #include <algorithm> 14 #include <atomic> 15 #include <cmath> 16 #include <hwy/base.h> // HWY_ALIGN_MAX 17 #include <memory> 18 #include <vector> 19 20 #include "hwy/aligned_allocator.h" 21 #include "lib/jxl/ac_strategy.h" 22 #include "lib/jxl/base/common.h" // kMaxNumPasses 23 #include "lib/jxl/base/compiler_specific.h" 24 #include "lib/jxl/base/data_parallel.h" 25 #include "lib/jxl/base/status.h" 26 #include "lib/jxl/coeff_order.h" 27 #include "lib/jxl/common.h" 28 #include "lib/jxl/dct_util.h" 29 #include "lib/jxl/dec_ans.h" 30 #include "lib/jxl/dec_xyb.h" 31 #include "lib/jxl/frame_dimensions.h" 32 #include "lib/jxl/frame_header.h" 33 #include "lib/jxl/image.h" 34 #include "lib/jxl/image_bundle.h" 35 #include "lib/jxl/image_metadata.h" 36 #include "lib/jxl/passes_state.h" 37 #include "lib/jxl/render_pipeline/render_pipeline.h" 38 #include "lib/jxl/render_pipeline/render_pipeline_stage.h" 39 #include "lib/jxl/render_pipeline/stage_upsampling.h" 40 41 namespace jxl { 42 43 constexpr size_t kSigmaBorder = 1; 44 constexpr size_t kSigmaPadding = 2; 45 46 struct PixelCallback { 47 PixelCallback() = default; 48 PixelCallback(JxlImageOutInitCallback init, JxlImageOutRunCallback run, 49 JxlImageOutDestroyCallback destroy, void* init_opaque) 50 : init(init), run(run), destroy(destroy), init_opaque(init_opaque) { 51 #if JXL_ENABLE_ASSERT 52 const bool has_init = init != nullptr; 53 const bool has_run = run != nullptr; 54 const bool has_destroy = destroy != nullptr; 55 const bool healthy = (has_init == has_run) && (has_run == has_destroy); 56 JXL_ASSERT(healthy); 57 #endif 58 } 59 60 bool IsPresent() const { return run != nullptr; } 61 62 void* Init(size_t num_threads, size_t num_pixels) const { 63 return init(init_opaque, num_threads, num_pixels); 64 } 65 66 JxlImageOutInitCallback init = nullptr; 67 JxlImageOutRunCallback run = nullptr; 68 JxlImageOutDestroyCallback destroy = nullptr; 69 void* init_opaque = nullptr; 70 }; 71 72 struct ImageOutput { 73 // Pixel format of the output pixels, used for buffer and callback output. 74 JxlPixelFormat format; 75 // Output bit depth for unsigned data types, used for float to int conversion. 76 size_t bits_per_sample; 77 // Callback for line-by-line output. 78 PixelCallback callback; 79 // Pixel buffer for image output. 80 void* buffer; 81 size_t buffer_size; 82 // Length of a row of image_buffer in bytes (based on oriented width). 83 size_t stride; 84 }; 85 86 // Per-frame decoder state. All the images here should be accessed through a 87 // group rect (either with block units or pixel units). 88 struct PassesDecoderState { 89 PassesSharedState shared_storage; 90 // Allows avoiding copies for encoder loop. 91 const PassesSharedState* JXL_RESTRICT shared = &shared_storage; 92 93 // 8x upsampling stage for DC. 94 std::unique_ptr<RenderPipelineStage> upsampler8x; 95 96 // For ANS decoding. 97 std::vector<ANSCode> code; 98 std::vector<std::vector<uint8_t>> context_map; 99 100 // Multiplier to be applied to the quant matrices of the x channel. 101 float x_dm_multiplier; 102 float b_dm_multiplier; 103 104 // Sigma values for EPF. 105 ImageF sigma; 106 107 // Image dimensions before applying undo_orientation. 108 size_t width; 109 size_t height; 110 ImageOutput main_output; 111 std::vector<ImageOutput> extra_output; 112 113 // Whether to use int16 float-XYB-to-uint8-srgb conversion. 114 bool fast_xyb_srgb8_conversion; 115 116 // If true, the RGBA output will be unpremultiplied before writing to the 117 // output. 118 bool unpremul_alpha; 119 120 // The render pipeline will apply this orientation to bring the image to the 121 // intended display orientation. 122 Orientation undo_orientation; 123 124 // Used for seeding noise. 125 size_t visible_frame_index = 0; 126 size_t nonvisible_frame_index = 0; 127 128 // Keep track of the transform types used. 129 std::atomic<uint32_t> used_acs{0}; 130 131 // Storage for coefficients if in "accumulate" mode. 132 std::unique_ptr<ACImage> coefficients = make_unique<ACImageT<int32_t>>(); 133 134 // Rendering pipeline. 135 std::unique_ptr<RenderPipeline> render_pipeline; 136 137 // Storage for the current frame if it can be referenced by future frames. 138 ImageBundle frame_storage_for_referencing; 139 140 struct PipelineOptions { 141 bool use_slow_render_pipeline; 142 bool coalescing; 143 bool render_spotcolors; 144 bool render_noise; 145 }; 146 147 Status PreparePipeline(const FrameHeader& frame_header, ImageBundle* decoded, 148 PipelineOptions options); 149 150 // Information for colour conversions. 151 OutputEncodingInfo output_encoding_info; 152 153 // Initializes decoder-specific structures using information from *shared. 154 Status Init(const FrameHeader& frame_header) { 155 x_dm_multiplier = std::pow(1 / (1.25f), frame_header.x_qm_scale - 2.0f); 156 b_dm_multiplier = std::pow(1 / (1.25f), frame_header.b_qm_scale - 2.0f); 157 158 main_output.callback = PixelCallback(); 159 main_output.buffer = nullptr; 160 extra_output.clear(); 161 162 fast_xyb_srgb8_conversion = false; 163 unpremul_alpha = false; 164 undo_orientation = Orientation::kIdentity; 165 166 used_acs = 0; 167 168 upsampler8x = GetUpsamplingStage(shared->metadata->transform_data, 0, 3); 169 if (frame_header.loop_filter.epf_iters > 0) { 170 JXL_ASSIGN_OR_RETURN( 171 sigma, 172 ImageF::Create(shared->frame_dim.xsize_blocks + 2 * kSigmaPadding, 173 shared->frame_dim.ysize_blocks + 2 * kSigmaPadding)); 174 } 175 return true; 176 } 177 178 // Initialize the decoder state after all of DC is decoded. 179 Status InitForAC(size_t num_passes, ThreadPool* pool) { 180 shared_storage.coeff_order_size = 0; 181 for (uint8_t o = 0; o < AcStrategy::kNumValidStrategies; ++o) { 182 if (((1 << o) & used_acs) == 0) continue; 183 uint8_t ord = kStrategyOrder[o]; 184 shared_storage.coeff_order_size = 185 std::max(kCoeffOrderOffset[3 * (ord + 1)] * kDCTBlockSize, 186 shared_storage.coeff_order_size); 187 } 188 size_t sz = num_passes * shared_storage.coeff_order_size; 189 if (sz > shared_storage.coeff_orders.size()) { 190 shared_storage.coeff_orders.resize(sz); 191 } 192 return true; 193 } 194 }; 195 196 // Temp images required for decoding a single group. Reduces memory allocations 197 // for large images because we only initialize min(#threads, #groups) instances. 198 struct GroupDecCache { 199 Status InitOnce(size_t num_passes, size_t used_acs) { 200 for (size_t i = 0; i < num_passes; i++) { 201 if (num_nzeroes[i].xsize() == 0) { 202 // Allocate enough for a whole group - partial groups on the 203 // right/bottom border just use a subset. The valid size is passed via 204 // Rect. 205 206 JXL_ASSIGN_OR_RETURN( 207 num_nzeroes[i], 208 Image3I::Create(kGroupDimInBlocks, kGroupDimInBlocks)); 209 } 210 } 211 size_t max_block_area = 0; 212 213 for (uint8_t o = 0; o < AcStrategy::kNumValidStrategies; ++o) { 214 AcStrategy acs = AcStrategy::FromRawStrategy(o); 215 if ((used_acs & (1 << o)) == 0) continue; 216 size_t area = 217 acs.covered_blocks_x() * acs.covered_blocks_y() * kDCTBlockSize; 218 max_block_area = std::max(area, max_block_area); 219 } 220 221 if (max_block_area > max_block_area_) { 222 max_block_area_ = max_block_area; 223 // We need 3x float blocks for dequantized coefficients and 1x for scratch 224 // space for transforms. 225 float_memory_ = hwy::AllocateAligned<float>(max_block_area_ * 7); 226 // We need 3x int32 or int16 blocks for quantized coefficients. 227 int32_memory_ = hwy::AllocateAligned<int32_t>(max_block_area_ * 3); 228 int16_memory_ = hwy::AllocateAligned<int16_t>(max_block_area_ * 3); 229 } 230 231 dec_group_block = float_memory_.get(); 232 scratch_space = dec_group_block + max_block_area_ * 3; 233 dec_group_qblock = int32_memory_.get(); 234 dec_group_qblock16 = int16_memory_.get(); 235 return true; 236 } 237 238 Status InitDCBufferOnce() { 239 if (dc_buffer.xsize() == 0) { 240 JXL_ASSIGN_OR_RETURN( 241 dc_buffer, 242 ImageF::Create(kGroupDimInBlocks + kRenderPipelineXOffset * 2, 243 kGroupDimInBlocks + 4)); 244 } 245 return true; 246 } 247 248 // Scratch space used by DecGroupImpl(). 249 float* dec_group_block; 250 int32_t* dec_group_qblock; 251 int16_t* dec_group_qblock16; 252 253 // For TransformToPixels. 254 float* scratch_space; 255 // Note that scratch_space is never used at the same time as dec_group_qblock. 256 // Moreover, only one of dec_group_qblock16 is ever used. 257 // TODO(veluca): figure out if we can save allocations. 258 259 // AC decoding 260 Image3I num_nzeroes[kMaxNumPasses]; 261 262 // Buffer for DC upsampling. 263 ImageF dc_buffer; 264 265 private: 266 hwy::AlignedFreeUniquePtr<float[]> float_memory_; 267 hwy::AlignedFreeUniquePtr<int32_t[]> int32_memory_; 268 hwy::AlignedFreeUniquePtr<int16_t[]> int16_memory_; 269 size_t max_block_area_ = 0; 270 }; 271 272 } // namespace jxl 273 274 #endif // LIB_JXL_DEC_CACHE_H_