encode_streaming.cc (8643B)
1 // Copyright (c) the JPEG XL Project Authors. All rights reserved. 2 // 3 // Use of this source code is governed by a BSD-style 4 // license that can be found in the LICENSE file. 5 6 #include "lib/jpegli/encode_streaming.h" 7 8 #include <cmath> 9 10 #include "lib/jpegli/bit_writer.h" 11 #include "lib/jpegli/bitstream.h" 12 #include "lib/jpegli/entropy_coding.h" 13 #include "lib/jpegli/error.h" 14 #include "lib/jpegli/memory_manager.h" 15 #include "lib/jxl/base/bits.h" 16 17 #undef HWY_TARGET_INCLUDE 18 #define HWY_TARGET_INCLUDE "lib/jpegli/encode_streaming.cc" 19 #include <hwy/foreach_target.h> 20 #include <hwy/highway.h> 21 22 #include "lib/jpegli/dct-inl.h" 23 #include "lib/jpegli/entropy_coding-inl.h" 24 25 HWY_BEFORE_NAMESPACE(); 26 namespace jpegli { 27 namespace HWY_NAMESPACE { 28 29 static const int kStreamingModeCoefficients = 0; 30 static const int kStreamingModeTokens = 1; 31 static const int kStreamingModeBits = 2; 32 33 namespace { 34 void ZigZagShuffle(int32_t* JXL_RESTRICT block) { 35 // TODO(szabadka) SIMDify this. 36 int32_t tmp[DCTSIZE2]; 37 tmp[0] = block[0]; 38 tmp[1] = block[1]; 39 tmp[2] = block[8]; 40 tmp[3] = block[16]; 41 tmp[4] = block[9]; 42 tmp[5] = block[2]; 43 tmp[6] = block[3]; 44 tmp[7] = block[10]; 45 tmp[8] = block[17]; 46 tmp[9] = block[24]; 47 tmp[10] = block[32]; 48 tmp[11] = block[25]; 49 tmp[12] = block[18]; 50 tmp[13] = block[11]; 51 tmp[14] = block[4]; 52 tmp[15] = block[5]; 53 tmp[16] = block[12]; 54 tmp[17] = block[19]; 55 tmp[18] = block[26]; 56 tmp[19] = block[33]; 57 tmp[20] = block[40]; 58 tmp[21] = block[48]; 59 tmp[22] = block[41]; 60 tmp[23] = block[34]; 61 tmp[24] = block[27]; 62 tmp[25] = block[20]; 63 tmp[26] = block[13]; 64 tmp[27] = block[6]; 65 tmp[28] = block[7]; 66 tmp[29] = block[14]; 67 tmp[30] = block[21]; 68 tmp[31] = block[28]; 69 tmp[32] = block[35]; 70 tmp[33] = block[42]; 71 tmp[34] = block[49]; 72 tmp[35] = block[56]; 73 tmp[36] = block[57]; 74 tmp[37] = block[50]; 75 tmp[38] = block[43]; 76 tmp[39] = block[36]; 77 tmp[40] = block[29]; 78 tmp[41] = block[22]; 79 tmp[42] = block[15]; 80 tmp[43] = block[23]; 81 tmp[44] = block[30]; 82 tmp[45] = block[37]; 83 tmp[46] = block[44]; 84 tmp[47] = block[51]; 85 tmp[48] = block[58]; 86 tmp[49] = block[59]; 87 tmp[50] = block[52]; 88 tmp[51] = block[45]; 89 tmp[52] = block[38]; 90 tmp[53] = block[31]; 91 tmp[54] = block[39]; 92 tmp[55] = block[46]; 93 tmp[56] = block[53]; 94 tmp[57] = block[60]; 95 tmp[58] = block[61]; 96 tmp[59] = block[54]; 97 tmp[60] = block[47]; 98 tmp[61] = block[55]; 99 tmp[62] = block[62]; 100 tmp[63] = block[63]; 101 memcpy(block, tmp, DCTSIZE2 * sizeof(tmp[0])); 102 } 103 } // namespace 104 105 template <int kMode> 106 void ProcessiMCURow(j_compress_ptr cinfo) { 107 jpeg_comp_master* m = cinfo->master; 108 JpegBitWriter* bw = &m->bw; 109 int xsize_mcus = DivCeil(cinfo->image_width, 8 * cinfo->max_h_samp_factor); 110 int ysize_mcus = DivCeil(cinfo->image_height, 8 * cinfo->max_v_samp_factor); 111 int mcu_y = m->next_iMCU_row; 112 int32_t* block = m->block_tmp; 113 int32_t* symbols = m->block_tmp + DCTSIZE2; 114 int32_t* nonzero_idx = m->block_tmp + 3 * DCTSIZE2; 115 coeff_t* JXL_RESTRICT last_dc_coeff = m->last_dc_coeff; 116 bool adaptive_quant = m->use_adaptive_quantization && m->psnr_target == 0; 117 JBLOCKARRAY ba[kMaxComponents]; 118 if (kMode == kStreamingModeCoefficients) { 119 for (int c = 0; c < cinfo->num_components; ++c) { 120 jpeg_component_info* comp = &cinfo->comp_info[c]; 121 int by0 = mcu_y * comp->v_samp_factor; 122 int block_rows_left = comp->height_in_blocks - by0; 123 int max_block_rows = std::min(comp->v_samp_factor, block_rows_left); 124 ba[c] = (*cinfo->mem->access_virt_barray)( 125 reinterpret_cast<j_common_ptr>(cinfo), m->coeff_buffers[c], by0, 126 max_block_rows, true); 127 } 128 } 129 if (kMode == kStreamingModeTokens) { 130 TokenArray* ta = &m->token_arrays[m->cur_token_array]; 131 int max_tokens_per_mcu_row = MaxNumTokensPerMCURow(cinfo); 132 if (ta->num_tokens + max_tokens_per_mcu_row > m->num_tokens) { 133 if (ta->tokens) { 134 m->total_num_tokens += ta->num_tokens; 135 ++m->cur_token_array; 136 ta = &m->token_arrays[m->cur_token_array]; 137 } 138 m->num_tokens = 139 EstimateNumTokens(cinfo, mcu_y, ysize_mcus, m->total_num_tokens, 140 max_tokens_per_mcu_row); 141 ta->tokens = Allocate<Token>(cinfo, m->num_tokens, JPOOL_IMAGE); 142 m->next_token = ta->tokens; 143 } 144 } 145 const float* imcu_start[kMaxComponents]; 146 for (int c = 0; c < cinfo->num_components; ++c) { 147 jpeg_component_info* comp = &cinfo->comp_info[c]; 148 imcu_start[c] = m->raw_data[c]->Row(mcu_y * comp->v_samp_factor * DCTSIZE); 149 } 150 const float* qf = nullptr; 151 if (adaptive_quant) { 152 qf = m->quant_field.Row(0); 153 } 154 HuffmanCodeTable* dc_code = nullptr; 155 HuffmanCodeTable* ac_code = nullptr; 156 const size_t qf_stride = m->quant_field.stride(); 157 for (int mcu_x = 0; mcu_x < xsize_mcus; ++mcu_x) { 158 for (int c = 0; c < cinfo->num_components; ++c) { 159 jpeg_component_info* comp = &cinfo->comp_info[c]; 160 if (kMode == kStreamingModeBits) { 161 dc_code = &m->coding_tables[m->context_map[c]]; 162 ac_code = &m->coding_tables[m->context_map[c + 4]]; 163 } 164 float* JXL_RESTRICT qmc = m->quant_mul[c]; 165 const size_t stride = m->raw_data[c]->stride(); 166 const int h_factor = m->h_factor[c]; 167 const float* zero_bias_offset = m->zero_bias_offset[c]; 168 const float* zero_bias_mul = m->zero_bias_mul[c]; 169 float aq_strength = 0.0f; 170 for (int iy = 0; iy < comp->v_samp_factor; ++iy) { 171 for (int ix = 0; ix < comp->h_samp_factor; ++ix) { 172 size_t by = mcu_y * comp->v_samp_factor + iy; 173 size_t bx = mcu_x * comp->h_samp_factor + ix; 174 if (bx >= comp->width_in_blocks || by >= comp->height_in_blocks) { 175 if (kMode == kStreamingModeTokens) { 176 *m->next_token++ = Token(c, 0, 0); 177 *m->next_token++ = Token(c + 4, 0, 0); 178 } else if (kMode == kStreamingModeBits) { 179 WriteBits(bw, dc_code->depth[0], dc_code->code[0]); 180 WriteBits(bw, ac_code->depth[0], ac_code->code[0]); 181 } 182 continue; 183 } 184 if (adaptive_quant) { 185 aq_strength = qf[iy * qf_stride + bx * h_factor]; 186 } 187 const float* pixels = imcu_start[c] + (iy * stride + bx) * DCTSIZE; 188 ComputeCoefficientBlock(pixels, stride, qmc, last_dc_coeff[c], 189 aq_strength, zero_bias_offset, zero_bias_mul, 190 m->dct_buffer, block); 191 if (kMode == kStreamingModeCoefficients) { 192 JCOEF* cblock = &ba[c][iy][bx][0]; 193 for (int k = 0; k < DCTSIZE2; ++k) { 194 cblock[k] = block[kJPEGNaturalOrder[k]]; 195 } 196 } 197 block[0] -= last_dc_coeff[c]; 198 last_dc_coeff[c] += block[0]; 199 if (kMode == kStreamingModeTokens) { 200 ComputeTokensForBlock<int32_t, false>(block, 0, c, c + 4, 201 &m->next_token); 202 } else if (kMode == kStreamingModeBits) { 203 ZigZagShuffle(block); 204 const int num_nonzeros = CompactBlock(block, nonzero_idx); 205 const bool emit_eob = nonzero_idx[num_nonzeros - 1] < 1008; 206 ComputeSymbols(num_nonzeros, nonzero_idx, block, symbols); 207 WriteBlock(symbols, block, num_nonzeros, emit_eob, dc_code, ac_code, 208 bw); 209 } 210 } 211 } 212 } 213 } 214 if (kMode == kStreamingModeTokens) { 215 TokenArray* ta = &m->token_arrays[m->cur_token_array]; 216 ta->num_tokens = m->next_token - ta->tokens; 217 ScanTokenInfo* sti = &m->scan_token_info[0]; 218 sti->num_tokens = m->total_num_tokens + ta->num_tokens; 219 sti->restarts[0] = sti->num_tokens; 220 } 221 } 222 223 void ComputeCoefficientsForiMCURow(j_compress_ptr cinfo) { 224 ProcessiMCURow<kStreamingModeCoefficients>(cinfo); 225 } 226 227 void ComputeTokensForiMCURow(j_compress_ptr cinfo) { 228 ProcessiMCURow<kStreamingModeTokens>(cinfo); 229 } 230 231 void WriteiMCURow(j_compress_ptr cinfo) { 232 ProcessiMCURow<kStreamingModeBits>(cinfo); 233 } 234 235 // NOLINTNEXTLINE(google-readability-namespace-comments) 236 } // namespace HWY_NAMESPACE 237 } // namespace jpegli 238 HWY_AFTER_NAMESPACE(); 239 240 #if HWY_ONCE 241 namespace jpegli { 242 HWY_EXPORT(ComputeCoefficientsForiMCURow); 243 HWY_EXPORT(ComputeTokensForiMCURow); 244 HWY_EXPORT(WriteiMCURow); 245 246 void ComputeCoefficientsForiMCURow(j_compress_ptr cinfo) { 247 HWY_DYNAMIC_DISPATCH(ComputeCoefficientsForiMCURow)(cinfo); 248 } 249 250 void ComputeTokensForiMCURow(j_compress_ptr cinfo) { 251 HWY_DYNAMIC_DISPATCH(ComputeTokensForiMCURow)(cinfo); 252 } 253 254 void WriteiMCURow(j_compress_ptr cinfo) { 255 HWY_DYNAMIC_DISPATCH(WriteiMCURow)(cinfo); 256 } 257 258 } // namespace jpegli 259 #endif // HWY_ONCE