libjxl

FORK: libjxl patches used on blog
git clone https://git.neptards.moe/blog/libjxl.git
Log | Files | Refs | Submodules | README | LICENSE

encode_streaming.cc (8643B)


      1 // Copyright (c) the JPEG XL Project Authors. All rights reserved.
      2 //
      3 // Use of this source code is governed by a BSD-style
      4 // license that can be found in the LICENSE file.
      5 
      6 #include "lib/jpegli/encode_streaming.h"
      7 
      8 #include <cmath>
      9 
     10 #include "lib/jpegli/bit_writer.h"
     11 #include "lib/jpegli/bitstream.h"
     12 #include "lib/jpegli/entropy_coding.h"
     13 #include "lib/jpegli/error.h"
     14 #include "lib/jpegli/memory_manager.h"
     15 #include "lib/jxl/base/bits.h"
     16 
     17 #undef HWY_TARGET_INCLUDE
     18 #define HWY_TARGET_INCLUDE "lib/jpegli/encode_streaming.cc"
     19 #include <hwy/foreach_target.h>
     20 #include <hwy/highway.h>
     21 
     22 #include "lib/jpegli/dct-inl.h"
     23 #include "lib/jpegli/entropy_coding-inl.h"
     24 
     25 HWY_BEFORE_NAMESPACE();
     26 namespace jpegli {
     27 namespace HWY_NAMESPACE {
     28 
     29 static const int kStreamingModeCoefficients = 0;
     30 static const int kStreamingModeTokens = 1;
     31 static const int kStreamingModeBits = 2;
     32 
     33 namespace {
     34 void ZigZagShuffle(int32_t* JXL_RESTRICT block) {
     35   // TODO(szabadka) SIMDify this.
     36   int32_t tmp[DCTSIZE2];
     37   tmp[0] = block[0];
     38   tmp[1] = block[1];
     39   tmp[2] = block[8];
     40   tmp[3] = block[16];
     41   tmp[4] = block[9];
     42   tmp[5] = block[2];
     43   tmp[6] = block[3];
     44   tmp[7] = block[10];
     45   tmp[8] = block[17];
     46   tmp[9] = block[24];
     47   tmp[10] = block[32];
     48   tmp[11] = block[25];
     49   tmp[12] = block[18];
     50   tmp[13] = block[11];
     51   tmp[14] = block[4];
     52   tmp[15] = block[5];
     53   tmp[16] = block[12];
     54   tmp[17] = block[19];
     55   tmp[18] = block[26];
     56   tmp[19] = block[33];
     57   tmp[20] = block[40];
     58   tmp[21] = block[48];
     59   tmp[22] = block[41];
     60   tmp[23] = block[34];
     61   tmp[24] = block[27];
     62   tmp[25] = block[20];
     63   tmp[26] = block[13];
     64   tmp[27] = block[6];
     65   tmp[28] = block[7];
     66   tmp[29] = block[14];
     67   tmp[30] = block[21];
     68   tmp[31] = block[28];
     69   tmp[32] = block[35];
     70   tmp[33] = block[42];
     71   tmp[34] = block[49];
     72   tmp[35] = block[56];
     73   tmp[36] = block[57];
     74   tmp[37] = block[50];
     75   tmp[38] = block[43];
     76   tmp[39] = block[36];
     77   tmp[40] = block[29];
     78   tmp[41] = block[22];
     79   tmp[42] = block[15];
     80   tmp[43] = block[23];
     81   tmp[44] = block[30];
     82   tmp[45] = block[37];
     83   tmp[46] = block[44];
     84   tmp[47] = block[51];
     85   tmp[48] = block[58];
     86   tmp[49] = block[59];
     87   tmp[50] = block[52];
     88   tmp[51] = block[45];
     89   tmp[52] = block[38];
     90   tmp[53] = block[31];
     91   tmp[54] = block[39];
     92   tmp[55] = block[46];
     93   tmp[56] = block[53];
     94   tmp[57] = block[60];
     95   tmp[58] = block[61];
     96   tmp[59] = block[54];
     97   tmp[60] = block[47];
     98   tmp[61] = block[55];
     99   tmp[62] = block[62];
    100   tmp[63] = block[63];
    101   memcpy(block, tmp, DCTSIZE2 * sizeof(tmp[0]));
    102 }
    103 }  // namespace
    104 
    105 template <int kMode>
    106 void ProcessiMCURow(j_compress_ptr cinfo) {
    107   jpeg_comp_master* m = cinfo->master;
    108   JpegBitWriter* bw = &m->bw;
    109   int xsize_mcus = DivCeil(cinfo->image_width, 8 * cinfo->max_h_samp_factor);
    110   int ysize_mcus = DivCeil(cinfo->image_height, 8 * cinfo->max_v_samp_factor);
    111   int mcu_y = m->next_iMCU_row;
    112   int32_t* block = m->block_tmp;
    113   int32_t* symbols = m->block_tmp + DCTSIZE2;
    114   int32_t* nonzero_idx = m->block_tmp + 3 * DCTSIZE2;
    115   coeff_t* JXL_RESTRICT last_dc_coeff = m->last_dc_coeff;
    116   bool adaptive_quant = m->use_adaptive_quantization && m->psnr_target == 0;
    117   JBLOCKARRAY ba[kMaxComponents];
    118   if (kMode == kStreamingModeCoefficients) {
    119     for (int c = 0; c < cinfo->num_components; ++c) {
    120       jpeg_component_info* comp = &cinfo->comp_info[c];
    121       int by0 = mcu_y * comp->v_samp_factor;
    122       int block_rows_left = comp->height_in_blocks - by0;
    123       int max_block_rows = std::min(comp->v_samp_factor, block_rows_left);
    124       ba[c] = (*cinfo->mem->access_virt_barray)(
    125           reinterpret_cast<j_common_ptr>(cinfo), m->coeff_buffers[c], by0,
    126           max_block_rows, true);
    127     }
    128   }
    129   if (kMode == kStreamingModeTokens) {
    130     TokenArray* ta = &m->token_arrays[m->cur_token_array];
    131     int max_tokens_per_mcu_row = MaxNumTokensPerMCURow(cinfo);
    132     if (ta->num_tokens + max_tokens_per_mcu_row > m->num_tokens) {
    133       if (ta->tokens) {
    134         m->total_num_tokens += ta->num_tokens;
    135         ++m->cur_token_array;
    136         ta = &m->token_arrays[m->cur_token_array];
    137       }
    138       m->num_tokens =
    139           EstimateNumTokens(cinfo, mcu_y, ysize_mcus, m->total_num_tokens,
    140                             max_tokens_per_mcu_row);
    141       ta->tokens = Allocate<Token>(cinfo, m->num_tokens, JPOOL_IMAGE);
    142       m->next_token = ta->tokens;
    143     }
    144   }
    145   const float* imcu_start[kMaxComponents];
    146   for (int c = 0; c < cinfo->num_components; ++c) {
    147     jpeg_component_info* comp = &cinfo->comp_info[c];
    148     imcu_start[c] = m->raw_data[c]->Row(mcu_y * comp->v_samp_factor * DCTSIZE);
    149   }
    150   const float* qf = nullptr;
    151   if (adaptive_quant) {
    152     qf = m->quant_field.Row(0);
    153   }
    154   HuffmanCodeTable* dc_code = nullptr;
    155   HuffmanCodeTable* ac_code = nullptr;
    156   const size_t qf_stride = m->quant_field.stride();
    157   for (int mcu_x = 0; mcu_x < xsize_mcus; ++mcu_x) {
    158     for (int c = 0; c < cinfo->num_components; ++c) {
    159       jpeg_component_info* comp = &cinfo->comp_info[c];
    160       if (kMode == kStreamingModeBits) {
    161         dc_code = &m->coding_tables[m->context_map[c]];
    162         ac_code = &m->coding_tables[m->context_map[c + 4]];
    163       }
    164       float* JXL_RESTRICT qmc = m->quant_mul[c];
    165       const size_t stride = m->raw_data[c]->stride();
    166       const int h_factor = m->h_factor[c];
    167       const float* zero_bias_offset = m->zero_bias_offset[c];
    168       const float* zero_bias_mul = m->zero_bias_mul[c];
    169       float aq_strength = 0.0f;
    170       for (int iy = 0; iy < comp->v_samp_factor; ++iy) {
    171         for (int ix = 0; ix < comp->h_samp_factor; ++ix) {
    172           size_t by = mcu_y * comp->v_samp_factor + iy;
    173           size_t bx = mcu_x * comp->h_samp_factor + ix;
    174           if (bx >= comp->width_in_blocks || by >= comp->height_in_blocks) {
    175             if (kMode == kStreamingModeTokens) {
    176               *m->next_token++ = Token(c, 0, 0);
    177               *m->next_token++ = Token(c + 4, 0, 0);
    178             } else if (kMode == kStreamingModeBits) {
    179               WriteBits(bw, dc_code->depth[0], dc_code->code[0]);
    180               WriteBits(bw, ac_code->depth[0], ac_code->code[0]);
    181             }
    182             continue;
    183           }
    184           if (adaptive_quant) {
    185             aq_strength = qf[iy * qf_stride + bx * h_factor];
    186           }
    187           const float* pixels = imcu_start[c] + (iy * stride + bx) * DCTSIZE;
    188           ComputeCoefficientBlock(pixels, stride, qmc, last_dc_coeff[c],
    189                                   aq_strength, zero_bias_offset, zero_bias_mul,
    190                                   m->dct_buffer, block);
    191           if (kMode == kStreamingModeCoefficients) {
    192             JCOEF* cblock = &ba[c][iy][bx][0];
    193             for (int k = 0; k < DCTSIZE2; ++k) {
    194               cblock[k] = block[kJPEGNaturalOrder[k]];
    195             }
    196           }
    197           block[0] -= last_dc_coeff[c];
    198           last_dc_coeff[c] += block[0];
    199           if (kMode == kStreamingModeTokens) {
    200             ComputeTokensForBlock<int32_t, false>(block, 0, c, c + 4,
    201                                                   &m->next_token);
    202           } else if (kMode == kStreamingModeBits) {
    203             ZigZagShuffle(block);
    204             const int num_nonzeros = CompactBlock(block, nonzero_idx);
    205             const bool emit_eob = nonzero_idx[num_nonzeros - 1] < 1008;
    206             ComputeSymbols(num_nonzeros, nonzero_idx, block, symbols);
    207             WriteBlock(symbols, block, num_nonzeros, emit_eob, dc_code, ac_code,
    208                        bw);
    209           }
    210         }
    211       }
    212     }
    213   }
    214   if (kMode == kStreamingModeTokens) {
    215     TokenArray* ta = &m->token_arrays[m->cur_token_array];
    216     ta->num_tokens = m->next_token - ta->tokens;
    217     ScanTokenInfo* sti = &m->scan_token_info[0];
    218     sti->num_tokens = m->total_num_tokens + ta->num_tokens;
    219     sti->restarts[0] = sti->num_tokens;
    220   }
    221 }
    222 
    223 void ComputeCoefficientsForiMCURow(j_compress_ptr cinfo) {
    224   ProcessiMCURow<kStreamingModeCoefficients>(cinfo);
    225 }
    226 
    227 void ComputeTokensForiMCURow(j_compress_ptr cinfo) {
    228   ProcessiMCURow<kStreamingModeTokens>(cinfo);
    229 }
    230 
    231 void WriteiMCURow(j_compress_ptr cinfo) {
    232   ProcessiMCURow<kStreamingModeBits>(cinfo);
    233 }
    234 
    235 // NOLINTNEXTLINE(google-readability-namespace-comments)
    236 }  // namespace HWY_NAMESPACE
    237 }  // namespace jpegli
    238 HWY_AFTER_NAMESPACE();
    239 
    240 #if HWY_ONCE
    241 namespace jpegli {
    242 HWY_EXPORT(ComputeCoefficientsForiMCURow);
    243 HWY_EXPORT(ComputeTokensForiMCURow);
    244 HWY_EXPORT(WriteiMCURow);
    245 
    246 void ComputeCoefficientsForiMCURow(j_compress_ptr cinfo) {
    247   HWY_DYNAMIC_DISPATCH(ComputeCoefficientsForiMCURow)(cinfo);
    248 }
    249 
    250 void ComputeTokensForiMCURow(j_compress_ptr cinfo) {
    251   HWY_DYNAMIC_DISPATCH(ComputeTokensForiMCURow)(cinfo);
    252 }
    253 
    254 void WriteiMCURow(j_compress_ptr cinfo) {
    255   HWY_DYNAMIC_DISPATCH(WriteiMCURow)(cinfo);
    256 }
    257 
    258 }  // namespace jpegli
    259 #endif  // HWY_ONCE