libjxl

FORK: libjxl patches used on blog
git clone https://git.neptards.moe/blog/libjxl.git
Log | Files | Refs | Submodules | README | LICENSE

decode_scan.cc (17084B)


      1 // Copyright (c) the JPEG XL Project Authors. All rights reserved.
      2 //
      3 // Use of this source code is governed by a BSD-style
      4 // license that can be found in the LICENSE file.
      5 
      6 #include "lib/jpegli/decode_scan.h"
      7 
      8 #include <string.h>
      9 
     10 #include <hwy/base.h>
     11 
     12 #include "lib/jpegli/decode_internal.h"
     13 #include "lib/jpegli/error.h"
     14 #include "lib/jxl/base/status.h"
     15 
     16 namespace jpegli {
     17 namespace {
     18 
     19 // Max 14 block per MCU (when 1 channel is subsampled)
     20 // Max 64 nonzero coefficients per block
     21 // Max 16 symbol bits plus 11 extra bits per nonzero symbol
     22 // Max 2 bytes per 8 bits (worst case is all bytes are escaped 0xff)
     23 constexpr int kMaxMCUByteSize = 6048;
     24 
     25 // Helper structure to read bits from the entropy coded data segment.
     26 struct BitReaderState {
     27   BitReaderState(const uint8_t* data, const size_t len, size_t pos)
     28       : data_(data), len_(len), start_pos_(pos) {
     29     Reset(pos);
     30   }
     31 
     32   void Reset(size_t pos) {
     33     pos_ = pos;
     34     val_ = 0;
     35     bits_left_ = 0;
     36     next_marker_pos_ = len_;
     37     FillBitWindow();
     38   }
     39 
     40   // Returns the next byte and skips the 0xff/0x00 escape sequences.
     41   uint8_t GetNextByte() {
     42     if (pos_ >= next_marker_pos_) {
     43       ++pos_;
     44       return 0;
     45     }
     46     uint8_t c = data_[pos_++];
     47     if (c == 0xff) {
     48       uint8_t escape = pos_ < len_ ? data_[pos_] : 0;
     49       if (escape == 0) {
     50         ++pos_;
     51       } else {
     52         // 0xff was followed by a non-zero byte, which means that we found the
     53         // start of the next marker segment.
     54         next_marker_pos_ = pos_ - 1;
     55       }
     56     }
     57     return c;
     58   }
     59 
     60   void FillBitWindow() {
     61     if (bits_left_ <= 16) {
     62       while (bits_left_ <= 56) {
     63         val_ <<= 8;
     64         val_ |= static_cast<uint64_t>(GetNextByte());
     65         bits_left_ += 8;
     66       }
     67     }
     68   }
     69 
     70   int ReadBits(int nbits) {
     71     FillBitWindow();
     72     uint64_t val = (val_ >> (bits_left_ - nbits)) & ((1ULL << nbits) - 1);
     73     bits_left_ -= nbits;
     74     return val;
     75   }
     76 
     77   // Sets *pos to the next stream position, and *bit_pos to the bit position
     78   // within the next byte where parsing should continue.
     79   // Returns false if the stream ended too early.
     80   bool FinishStream(size_t* pos, size_t* bit_pos) {
     81     *bit_pos = (8 - (bits_left_ & 7)) & 7;
     82     // Give back some bytes that we did not use.
     83     int unused_bytes_left = DivCeil(bits_left_, 8);
     84     while (unused_bytes_left-- > 0) {
     85       --pos_;
     86       // If we give back a 0 byte, we need to check if it was a 0xff/0x00 escape
     87       // sequence, and if yes, we need to give back one more byte.
     88       if (((pos_ == len_ && pos_ == next_marker_pos_) ||
     89            (pos_ > 0 && pos_ < next_marker_pos_ && data_[pos_] == 0)) &&
     90           (data_[pos_ - 1] == 0xff)) {
     91         --pos_;
     92       }
     93     }
     94     if (pos_ >= next_marker_pos_) {
     95       *pos = next_marker_pos_;
     96       if (pos_ > next_marker_pos_ || *bit_pos > 0) {
     97         // Data ran out before the scan was complete.
     98         return false;
     99       }
    100     }
    101     *pos = pos_;
    102     return true;
    103   }
    104 
    105   const uint8_t* data_;
    106   const size_t len_;
    107   size_t pos_;
    108   uint64_t val_;
    109   int bits_left_;
    110   size_t next_marker_pos_;
    111   size_t start_pos_;
    112 };
    113 
    114 // Returns the next Huffman-coded symbol.
    115 int ReadSymbol(const HuffmanTableEntry* table, BitReaderState* br) {
    116   int nbits;
    117   br->FillBitWindow();
    118   int val = (br->val_ >> (br->bits_left_ - 8)) & 0xff;
    119   table += val;
    120   nbits = table->bits - 8;
    121   if (nbits > 0) {
    122     br->bits_left_ -= 8;
    123     table += table->value;
    124     val = (br->val_ >> (br->bits_left_ - nbits)) & ((1 << nbits) - 1);
    125     table += val;
    126   }
    127   br->bits_left_ -= table->bits;
    128   return table->value;
    129 }
    130 
    131 /**
    132  * Returns the DC diff or AC value for extra bits value x and prefix code s.
    133  *
    134  * CCITT Rec. T.81 (1992 E)
    135  * Table F.1 – Difference magnitude categories for DC coding
    136  *  SSSS | DIFF values
    137  * ------+--------------------------
    138  *     0 | 0
    139  *     1 | –1, 1
    140  *     2 | –3, –2, 2, 3
    141  *     3 | –7..–4, 4..7
    142  * ......|..........................
    143  *    11 | –2047..–1024, 1024..2047
    144  *
    145  * CCITT Rec. T.81 (1992 E)
    146  * Table F.2 – Categories assigned to coefficient values
    147  * [ Same as Table F.1, but does not include SSSS equal to 0 and 11]
    148  *
    149  *
    150  * CCITT Rec. T.81 (1992 E)
    151  * F.1.2.1.1 Structure of DC code table
    152  * For each category,... additional bits... appended... to uniquely identify
    153  * which difference... occurred... When DIFF is positive... SSSS... bits of DIFF
    154  * are appended. When DIFF is negative... SSSS... bits of (DIFF – 1) are
    155  * appended... Most significant bit... is 0 for negative differences and 1 for
    156  * positive differences.
    157  *
    158  * In other words the upper half of extra bits range represents DIFF as is.
    159  * The lower half represents the negative DIFFs with an offset.
    160  */
    161 int HuffExtend(int x, int s) {
    162   JXL_DASSERT(s >= 1);
    163   int half = 1 << (s - 1);
    164   if (x >= half) {
    165     JXL_DASSERT(x < (1 << s));
    166     return x;
    167   } else {
    168     return x - (1 << s) + 1;
    169   }
    170 }
    171 
    172 // Decodes one 8x8 block of DCT coefficients from the bit stream.
    173 bool DecodeDCTBlock(const HuffmanTableEntry* dc_huff,
    174                     const HuffmanTableEntry* ac_huff, int Ss, int Se, int Al,
    175                     int* eobrun, BitReaderState* br, coeff_t* last_dc_coeff,
    176                     coeff_t* coeffs) {
    177   // Nowadays multiplication is even faster than variable shift.
    178   int Am = 1 << Al;
    179   bool eobrun_allowed = Ss > 0;
    180   if (Ss == 0) {
    181     int s = ReadSymbol(dc_huff, br);
    182     if (s >= kJpegDCAlphabetSize) {
    183       return false;
    184     }
    185     int diff = 0;
    186     if (s > 0) {
    187       int bits = br->ReadBits(s);
    188       diff = HuffExtend(bits, s);
    189     }
    190     int coeff = diff + *last_dc_coeff;
    191     const int dc_coeff = coeff * Am;
    192     coeffs[0] = dc_coeff;
    193     // TODO(eustas): is there a more elegant / explicit way to check this?
    194     if (dc_coeff != coeffs[0]) {
    195       return false;
    196     }
    197     *last_dc_coeff = coeff;
    198     ++Ss;
    199   }
    200   if (Ss > Se) {
    201     return true;
    202   }
    203   if (*eobrun > 0) {
    204     --(*eobrun);
    205     return true;
    206   }
    207   for (int k = Ss; k <= Se; k++) {
    208     int sr = ReadSymbol(ac_huff, br);
    209     if (sr >= kJpegHuffmanAlphabetSize) {
    210       return false;
    211     }
    212     int r = sr >> 4;
    213     int s = sr & 15;
    214     if (s > 0) {
    215       k += r;
    216       if (k > Se) {
    217         return false;
    218       }
    219       if (s + Al >= kJpegDCAlphabetSize) {
    220         return false;
    221       }
    222       int bits = br->ReadBits(s);
    223       int coeff = HuffExtend(bits, s);
    224       coeffs[kJPEGNaturalOrder[k]] = coeff * Am;
    225     } else if (r == 15) {
    226       k += 15;
    227     } else {
    228       *eobrun = 1 << r;
    229       if (r > 0) {
    230         if (!eobrun_allowed) {
    231           return false;
    232         }
    233         *eobrun += br->ReadBits(r);
    234       }
    235       break;
    236     }
    237   }
    238   --(*eobrun);
    239   return true;
    240 }
    241 
    242 bool RefineDCTBlock(const HuffmanTableEntry* ac_huff, int Ss, int Se, int Al,
    243                     int* eobrun, BitReaderState* br, coeff_t* coeffs) {
    244   // Nowadays multiplication is even faster than variable shift.
    245   int Am = 1 << Al;
    246   bool eobrun_allowed = Ss > 0;
    247   if (Ss == 0) {
    248     int s = br->ReadBits(1);
    249     coeff_t dc_coeff = coeffs[0];
    250     dc_coeff |= s * Am;
    251     coeffs[0] = dc_coeff;
    252     ++Ss;
    253   }
    254   if (Ss > Se) {
    255     return true;
    256   }
    257   int p1 = Am;
    258   int m1 = -Am;
    259   int k = Ss;
    260   int r;
    261   int s;
    262   bool in_zero_run = false;
    263   if (*eobrun <= 0) {
    264     for (; k <= Se; k++) {
    265       s = ReadSymbol(ac_huff, br);
    266       if (s >= kJpegHuffmanAlphabetSize) {
    267         return false;
    268       }
    269       r = s >> 4;
    270       s &= 15;
    271       if (s) {
    272         if (s != 1) {
    273           return false;
    274         }
    275         s = br->ReadBits(1) ? p1 : m1;
    276         in_zero_run = false;
    277       } else {
    278         if (r != 15) {
    279           *eobrun = 1 << r;
    280           if (r > 0) {
    281             if (!eobrun_allowed) {
    282               return false;
    283             }
    284             *eobrun += br->ReadBits(r);
    285           }
    286           break;
    287         }
    288         in_zero_run = true;
    289       }
    290       do {
    291         coeff_t thiscoef = coeffs[kJPEGNaturalOrder[k]];
    292         if (thiscoef != 0) {
    293           if (br->ReadBits(1)) {
    294             if ((thiscoef & p1) == 0) {
    295               if (thiscoef >= 0) {
    296                 thiscoef += p1;
    297               } else {
    298                 thiscoef += m1;
    299               }
    300             }
    301           }
    302           coeffs[kJPEGNaturalOrder[k]] = thiscoef;
    303         } else {
    304           if (--r < 0) {
    305             break;
    306           }
    307         }
    308         k++;
    309       } while (k <= Se);
    310       if (s) {
    311         if (k > Se) {
    312           return false;
    313         }
    314         coeffs[kJPEGNaturalOrder[k]] = s;
    315       }
    316     }
    317   }
    318   if (in_zero_run) {
    319     return false;
    320   }
    321   if (*eobrun > 0) {
    322     for (; k <= Se; k++) {
    323       coeff_t thiscoef = coeffs[kJPEGNaturalOrder[k]];
    324       if (thiscoef != 0) {
    325         if (br->ReadBits(1)) {
    326           if ((thiscoef & p1) == 0) {
    327             if (thiscoef >= 0) {
    328               thiscoef += p1;
    329             } else {
    330               thiscoef += m1;
    331             }
    332           }
    333         }
    334         coeffs[kJPEGNaturalOrder[k]] = thiscoef;
    335       }
    336     }
    337   }
    338   --(*eobrun);
    339   return true;
    340 }
    341 
    342 void SaveMCUCodingState(j_decompress_ptr cinfo) {
    343   jpeg_decomp_master* m = cinfo->master;
    344   memcpy(m->mcu_.last_dc_coeff, m->last_dc_coeff_, sizeof(m->last_dc_coeff_));
    345   m->mcu_.eobrun = m->eobrun_;
    346   size_t offset = 0;
    347   for (int i = 0; i < cinfo->comps_in_scan; ++i) {
    348     const jpeg_component_info* comp = cinfo->cur_comp_info[i];
    349     int c = comp->component_index;
    350     size_t block_x = m->scan_mcu_col_ * comp->MCU_width;
    351     for (int iy = 0; iy < comp->MCU_height; ++iy) {
    352       size_t block_y = m->scan_mcu_row_ * comp->MCU_height + iy;
    353       size_t biy = block_y % comp->v_samp_factor;
    354       if (block_y >= comp->height_in_blocks) {
    355         continue;
    356       }
    357       size_t nblocks =
    358           std::min<size_t>(comp->MCU_width, comp->width_in_blocks - block_x);
    359       size_t ncoeffs = nblocks * DCTSIZE2;
    360       coeff_t* coeffs = &m->coeff_rows[c][biy][block_x][0];
    361       memcpy(&m->mcu_.coeffs[offset], coeffs, ncoeffs * sizeof(coeffs[0]));
    362       offset += ncoeffs;
    363     }
    364   }
    365 }
    366 
    367 void RestoreMCUCodingState(j_decompress_ptr cinfo) {
    368   jpeg_decomp_master* m = cinfo->master;
    369   memcpy(m->last_dc_coeff_, m->mcu_.last_dc_coeff, sizeof(m->last_dc_coeff_));
    370   m->eobrun_ = m->mcu_.eobrun;
    371   size_t offset = 0;
    372   for (int i = 0; i < cinfo->comps_in_scan; ++i) {
    373     const jpeg_component_info* comp = cinfo->cur_comp_info[i];
    374     int c = comp->component_index;
    375     size_t block_x = m->scan_mcu_col_ * comp->MCU_width;
    376     for (int iy = 0; iy < comp->MCU_height; ++iy) {
    377       size_t block_y = m->scan_mcu_row_ * comp->MCU_height + iy;
    378       size_t biy = block_y % comp->v_samp_factor;
    379       if (block_y >= comp->height_in_blocks) {
    380         continue;
    381       }
    382       size_t nblocks =
    383           std::min<size_t>(comp->MCU_width, comp->width_in_blocks - block_x);
    384       size_t ncoeffs = nblocks * DCTSIZE2;
    385       coeff_t* coeffs = &m->coeff_rows[c][biy][block_x][0];
    386       memcpy(coeffs, &m->mcu_.coeffs[offset], ncoeffs * sizeof(coeffs[0]));
    387       offset += ncoeffs;
    388     }
    389   }
    390 }
    391 
    392 bool FinishScan(j_decompress_ptr cinfo, const uint8_t* data, const size_t len,
    393                 size_t* pos, size_t* bit_pos) {
    394   jpeg_decomp_master* m = cinfo->master;
    395   if (m->eobrun_ > 0) {
    396     JPEGLI_ERROR("End-of-block run too long.");
    397   }
    398   m->eobrun_ = -1;
    399   memset(m->last_dc_coeff_, 0, sizeof(m->last_dc_coeff_));
    400   if (*bit_pos == 0) {
    401     return true;
    402   }
    403   if (data[*pos] == 0xff) {
    404     // After last br.FinishStream we checked that there is at least 2 bytes
    405     // in the buffer.
    406     JXL_DASSERT(*pos + 1 < len);
    407     // br.FinishStream would have detected an early marker.
    408     JXL_DASSERT(data[*pos + 1] == 0);
    409     *pos += 2;
    410   } else {
    411     *pos += 1;
    412   }
    413   *bit_pos = 0;
    414   return true;
    415 }
    416 
    417 }  // namespace
    418 
    419 void PrepareForiMCURow(j_decompress_ptr cinfo) {
    420   jpeg_decomp_master* m = cinfo->master;
    421   for (int i = 0; i < cinfo->comps_in_scan; ++i) {
    422     const jpeg_component_info* comp = cinfo->cur_comp_info[i];
    423     int c = comp->component_index;
    424     int by0 = cinfo->input_iMCU_row * comp->v_samp_factor;
    425     int block_rows_left = comp->height_in_blocks - by0;
    426     int max_block_rows = std::min(comp->v_samp_factor, block_rows_left);
    427     int offset = m->streaming_mode_ ? 0 : by0;
    428     m->coeff_rows[c] = (*cinfo->mem->access_virt_barray)(
    429         reinterpret_cast<j_common_ptr>(cinfo), m->coef_arrays[c], offset,
    430         max_block_rows, TRUE);
    431   }
    432 }
    433 
    434 int ProcessScan(j_decompress_ptr cinfo, const uint8_t* const data,
    435                 const size_t len, size_t* pos, size_t* bit_pos) {
    436   if (len == 0) {
    437     return kNeedMoreInput;
    438   }
    439   jpeg_decomp_master* m = cinfo->master;
    440   for (;;) {
    441     // Handle the restart intervals.
    442     if (cinfo->restart_interval > 0 && m->restarts_to_go_ == 0) {
    443       if (!FinishScan(cinfo, data, len, pos, bit_pos)) {
    444         return kNeedMoreInput;
    445       }
    446       // Go to the next marker, warn if we had to skip any data.
    447       size_t num_skipped = 0;
    448       while (*pos + 1 < len && (data[*pos] != 0xff || data[*pos + 1] == 0 ||
    449                                 data[*pos + 1] == 0xff)) {
    450         ++(*pos);
    451         ++num_skipped;
    452       }
    453       if (num_skipped > 0) {
    454         JPEGLI_WARN("Skipped %d bytes before restart marker",
    455                     static_cast<int>(num_skipped));
    456       }
    457       if (*pos + 2 > len) {
    458         return kNeedMoreInput;
    459       }
    460       cinfo->unread_marker = data[*pos + 1];
    461       *pos += 2;
    462       return kHandleRestart;
    463     }
    464 
    465     size_t start_pos = *pos;
    466     BitReaderState br(data, len, start_pos);
    467     if (*bit_pos > 0) {
    468       br.ReadBits(*bit_pos);
    469     }
    470     if (start_pos + kMaxMCUByteSize > len) {
    471       SaveMCUCodingState(cinfo);
    472     }
    473 
    474     // Decode one MCU.
    475     HWY_ALIGN_MAX static coeff_t sink_block[DCTSIZE2] = {0};
    476     bool scan_ok = true;
    477     for (int i = 0; i < cinfo->comps_in_scan; ++i) {
    478       const jpeg_component_info* comp = cinfo->cur_comp_info[i];
    479       int c = comp->component_index;
    480       const HuffmanTableEntry* dc_lut =
    481           &m->dc_huff_lut_[comp->dc_tbl_no * kJpegHuffmanLutSize];
    482       const HuffmanTableEntry* ac_lut =
    483           &m->ac_huff_lut_[comp->ac_tbl_no * kJpegHuffmanLutSize];
    484       for (int iy = 0; iy < comp->MCU_height; ++iy) {
    485         size_t block_y = m->scan_mcu_row_ * comp->MCU_height + iy;
    486         int biy = block_y % comp->v_samp_factor;
    487         for (int ix = 0; ix < comp->MCU_width; ++ix) {
    488           size_t block_x = m->scan_mcu_col_ * comp->MCU_width + ix;
    489           coeff_t* coeffs;
    490           if (block_x >= comp->width_in_blocks ||
    491               block_y >= comp->height_in_blocks) {
    492             // Note that it is OK that sink_block is uninitialized because
    493             // it will never be used in any branches, even in the RefineDCTBlock
    494             // case, because only DC scans can be interleaved and we don't use
    495             // the zero-ness of the DC coeff in the DC refinement code-path.
    496             coeffs = sink_block;
    497           } else {
    498             coeffs = &m->coeff_rows[c][biy][block_x][0];
    499           }
    500           if (cinfo->Ah == 0) {
    501             if (!DecodeDCTBlock(dc_lut, ac_lut, cinfo->Ss, cinfo->Se, cinfo->Al,
    502                                 &m->eobrun_, &br,
    503                                 &m->last_dc_coeff_[comp->component_index],
    504                                 coeffs)) {
    505               scan_ok = false;
    506             }
    507           } else {
    508             if (!RefineDCTBlock(ac_lut, cinfo->Ss, cinfo->Se, cinfo->Al,
    509                                 &m->eobrun_, &br, coeffs)) {
    510               scan_ok = false;
    511             }
    512           }
    513         }
    514       }
    515     }
    516     size_t new_pos;
    517     size_t new_bit_pos;
    518     bool stream_ok = br.FinishStream(&new_pos, &new_bit_pos);
    519     if (new_pos + 2 > len) {
    520       // If reading stopped within the last two bytes, we have to request more
    521       // input even if FinishStream() returned true, since the Huffman code
    522       // reader could have peaked ahead some bits past the current input chunk
    523       // and thus the last prefix code length could have been wrong. We can do
    524       // this because a valid JPEG bit stream has two extra bytes at the end.
    525       RestoreMCUCodingState(cinfo);
    526       return kNeedMoreInput;
    527     }
    528     *pos = new_pos;
    529     *bit_pos = new_bit_pos;
    530     if (!stream_ok) {
    531       // We hit a marker during parsing.
    532       JXL_DASSERT(data[*pos] == 0xff);
    533       JXL_DASSERT(data[*pos + 1] != 0);
    534       RestoreMCUCodingState(cinfo);
    535       JPEGLI_WARN("Incomplete scan detected.");
    536       return JPEG_SCAN_COMPLETED;
    537     }
    538     if (!scan_ok) {
    539       JPEGLI_ERROR("Failed to decode DCT block");
    540     }
    541     if (m->restarts_to_go_ > 0) {
    542       --m->restarts_to_go_;
    543     }
    544     ++m->scan_mcu_col_;
    545     if (m->scan_mcu_col_ == cinfo->MCUs_per_row) {
    546       ++m->scan_mcu_row_;
    547       m->scan_mcu_col_ = 0;
    548       if (m->scan_mcu_row_ == cinfo->MCU_rows_in_scan) {
    549         if (!FinishScan(cinfo, data, len, pos, bit_pos)) {
    550           return kNeedMoreInput;
    551         }
    552         break;
    553       } else if ((m->scan_mcu_row_ % m->mcu_rows_per_iMCU_row_) == 0) {
    554         // Current iMCU row is done.
    555         break;
    556       }
    557     }
    558   }
    559   ++cinfo->input_iMCU_row;
    560   if (cinfo->input_iMCU_row < cinfo->total_iMCU_rows) {
    561     PrepareForiMCURow(cinfo);
    562     return JPEG_ROW_COMPLETED;
    563   }
    564   return JPEG_SCAN_COMPLETED;
    565 }
    566 
    567 }  // namespace jpegli