decode_scan.cc (17084B)
1 // Copyright (c) the JPEG XL Project Authors. All rights reserved. 2 // 3 // Use of this source code is governed by a BSD-style 4 // license that can be found in the LICENSE file. 5 6 #include "lib/jpegli/decode_scan.h" 7 8 #include <string.h> 9 10 #include <hwy/base.h> 11 12 #include "lib/jpegli/decode_internal.h" 13 #include "lib/jpegli/error.h" 14 #include "lib/jxl/base/status.h" 15 16 namespace jpegli { 17 namespace { 18 19 // Max 14 block per MCU (when 1 channel is subsampled) 20 // Max 64 nonzero coefficients per block 21 // Max 16 symbol bits plus 11 extra bits per nonzero symbol 22 // Max 2 bytes per 8 bits (worst case is all bytes are escaped 0xff) 23 constexpr int kMaxMCUByteSize = 6048; 24 25 // Helper structure to read bits from the entropy coded data segment. 26 struct BitReaderState { 27 BitReaderState(const uint8_t* data, const size_t len, size_t pos) 28 : data_(data), len_(len), start_pos_(pos) { 29 Reset(pos); 30 } 31 32 void Reset(size_t pos) { 33 pos_ = pos; 34 val_ = 0; 35 bits_left_ = 0; 36 next_marker_pos_ = len_; 37 FillBitWindow(); 38 } 39 40 // Returns the next byte and skips the 0xff/0x00 escape sequences. 41 uint8_t GetNextByte() { 42 if (pos_ >= next_marker_pos_) { 43 ++pos_; 44 return 0; 45 } 46 uint8_t c = data_[pos_++]; 47 if (c == 0xff) { 48 uint8_t escape = pos_ < len_ ? data_[pos_] : 0; 49 if (escape == 0) { 50 ++pos_; 51 } else { 52 // 0xff was followed by a non-zero byte, which means that we found the 53 // start of the next marker segment. 54 next_marker_pos_ = pos_ - 1; 55 } 56 } 57 return c; 58 } 59 60 void FillBitWindow() { 61 if (bits_left_ <= 16) { 62 while (bits_left_ <= 56) { 63 val_ <<= 8; 64 val_ |= static_cast<uint64_t>(GetNextByte()); 65 bits_left_ += 8; 66 } 67 } 68 } 69 70 int ReadBits(int nbits) { 71 FillBitWindow(); 72 uint64_t val = (val_ >> (bits_left_ - nbits)) & ((1ULL << nbits) - 1); 73 bits_left_ -= nbits; 74 return val; 75 } 76 77 // Sets *pos to the next stream position, and *bit_pos to the bit position 78 // within the next byte where parsing should continue. 79 // Returns false if the stream ended too early. 80 bool FinishStream(size_t* pos, size_t* bit_pos) { 81 *bit_pos = (8 - (bits_left_ & 7)) & 7; 82 // Give back some bytes that we did not use. 83 int unused_bytes_left = DivCeil(bits_left_, 8); 84 while (unused_bytes_left-- > 0) { 85 --pos_; 86 // If we give back a 0 byte, we need to check if it was a 0xff/0x00 escape 87 // sequence, and if yes, we need to give back one more byte. 88 if (((pos_ == len_ && pos_ == next_marker_pos_) || 89 (pos_ > 0 && pos_ < next_marker_pos_ && data_[pos_] == 0)) && 90 (data_[pos_ - 1] == 0xff)) { 91 --pos_; 92 } 93 } 94 if (pos_ >= next_marker_pos_) { 95 *pos = next_marker_pos_; 96 if (pos_ > next_marker_pos_ || *bit_pos > 0) { 97 // Data ran out before the scan was complete. 98 return false; 99 } 100 } 101 *pos = pos_; 102 return true; 103 } 104 105 const uint8_t* data_; 106 const size_t len_; 107 size_t pos_; 108 uint64_t val_; 109 int bits_left_; 110 size_t next_marker_pos_; 111 size_t start_pos_; 112 }; 113 114 // Returns the next Huffman-coded symbol. 115 int ReadSymbol(const HuffmanTableEntry* table, BitReaderState* br) { 116 int nbits; 117 br->FillBitWindow(); 118 int val = (br->val_ >> (br->bits_left_ - 8)) & 0xff; 119 table += val; 120 nbits = table->bits - 8; 121 if (nbits > 0) { 122 br->bits_left_ -= 8; 123 table += table->value; 124 val = (br->val_ >> (br->bits_left_ - nbits)) & ((1 << nbits) - 1); 125 table += val; 126 } 127 br->bits_left_ -= table->bits; 128 return table->value; 129 } 130 131 /** 132 * Returns the DC diff or AC value for extra bits value x and prefix code s. 133 * 134 * CCITT Rec. T.81 (1992 E) 135 * Table F.1 – Difference magnitude categories for DC coding 136 * SSSS | DIFF values 137 * ------+-------------------------- 138 * 0 | 0 139 * 1 | –1, 1 140 * 2 | –3, –2, 2, 3 141 * 3 | –7..–4, 4..7 142 * ......|.......................... 143 * 11 | –2047..–1024, 1024..2047 144 * 145 * CCITT Rec. T.81 (1992 E) 146 * Table F.2 – Categories assigned to coefficient values 147 * [ Same as Table F.1, but does not include SSSS equal to 0 and 11] 148 * 149 * 150 * CCITT Rec. T.81 (1992 E) 151 * F.1.2.1.1 Structure of DC code table 152 * For each category,... additional bits... appended... to uniquely identify 153 * which difference... occurred... When DIFF is positive... SSSS... bits of DIFF 154 * are appended. When DIFF is negative... SSSS... bits of (DIFF – 1) are 155 * appended... Most significant bit... is 0 for negative differences and 1 for 156 * positive differences. 157 * 158 * In other words the upper half of extra bits range represents DIFF as is. 159 * The lower half represents the negative DIFFs with an offset. 160 */ 161 int HuffExtend(int x, int s) { 162 JXL_DASSERT(s >= 1); 163 int half = 1 << (s - 1); 164 if (x >= half) { 165 JXL_DASSERT(x < (1 << s)); 166 return x; 167 } else { 168 return x - (1 << s) + 1; 169 } 170 } 171 172 // Decodes one 8x8 block of DCT coefficients from the bit stream. 173 bool DecodeDCTBlock(const HuffmanTableEntry* dc_huff, 174 const HuffmanTableEntry* ac_huff, int Ss, int Se, int Al, 175 int* eobrun, BitReaderState* br, coeff_t* last_dc_coeff, 176 coeff_t* coeffs) { 177 // Nowadays multiplication is even faster than variable shift. 178 int Am = 1 << Al; 179 bool eobrun_allowed = Ss > 0; 180 if (Ss == 0) { 181 int s = ReadSymbol(dc_huff, br); 182 if (s >= kJpegDCAlphabetSize) { 183 return false; 184 } 185 int diff = 0; 186 if (s > 0) { 187 int bits = br->ReadBits(s); 188 diff = HuffExtend(bits, s); 189 } 190 int coeff = diff + *last_dc_coeff; 191 const int dc_coeff = coeff * Am; 192 coeffs[0] = dc_coeff; 193 // TODO(eustas): is there a more elegant / explicit way to check this? 194 if (dc_coeff != coeffs[0]) { 195 return false; 196 } 197 *last_dc_coeff = coeff; 198 ++Ss; 199 } 200 if (Ss > Se) { 201 return true; 202 } 203 if (*eobrun > 0) { 204 --(*eobrun); 205 return true; 206 } 207 for (int k = Ss; k <= Se; k++) { 208 int sr = ReadSymbol(ac_huff, br); 209 if (sr >= kJpegHuffmanAlphabetSize) { 210 return false; 211 } 212 int r = sr >> 4; 213 int s = sr & 15; 214 if (s > 0) { 215 k += r; 216 if (k > Se) { 217 return false; 218 } 219 if (s + Al >= kJpegDCAlphabetSize) { 220 return false; 221 } 222 int bits = br->ReadBits(s); 223 int coeff = HuffExtend(bits, s); 224 coeffs[kJPEGNaturalOrder[k]] = coeff * Am; 225 } else if (r == 15) { 226 k += 15; 227 } else { 228 *eobrun = 1 << r; 229 if (r > 0) { 230 if (!eobrun_allowed) { 231 return false; 232 } 233 *eobrun += br->ReadBits(r); 234 } 235 break; 236 } 237 } 238 --(*eobrun); 239 return true; 240 } 241 242 bool RefineDCTBlock(const HuffmanTableEntry* ac_huff, int Ss, int Se, int Al, 243 int* eobrun, BitReaderState* br, coeff_t* coeffs) { 244 // Nowadays multiplication is even faster than variable shift. 245 int Am = 1 << Al; 246 bool eobrun_allowed = Ss > 0; 247 if (Ss == 0) { 248 int s = br->ReadBits(1); 249 coeff_t dc_coeff = coeffs[0]; 250 dc_coeff |= s * Am; 251 coeffs[0] = dc_coeff; 252 ++Ss; 253 } 254 if (Ss > Se) { 255 return true; 256 } 257 int p1 = Am; 258 int m1 = -Am; 259 int k = Ss; 260 int r; 261 int s; 262 bool in_zero_run = false; 263 if (*eobrun <= 0) { 264 for (; k <= Se; k++) { 265 s = ReadSymbol(ac_huff, br); 266 if (s >= kJpegHuffmanAlphabetSize) { 267 return false; 268 } 269 r = s >> 4; 270 s &= 15; 271 if (s) { 272 if (s != 1) { 273 return false; 274 } 275 s = br->ReadBits(1) ? p1 : m1; 276 in_zero_run = false; 277 } else { 278 if (r != 15) { 279 *eobrun = 1 << r; 280 if (r > 0) { 281 if (!eobrun_allowed) { 282 return false; 283 } 284 *eobrun += br->ReadBits(r); 285 } 286 break; 287 } 288 in_zero_run = true; 289 } 290 do { 291 coeff_t thiscoef = coeffs[kJPEGNaturalOrder[k]]; 292 if (thiscoef != 0) { 293 if (br->ReadBits(1)) { 294 if ((thiscoef & p1) == 0) { 295 if (thiscoef >= 0) { 296 thiscoef += p1; 297 } else { 298 thiscoef += m1; 299 } 300 } 301 } 302 coeffs[kJPEGNaturalOrder[k]] = thiscoef; 303 } else { 304 if (--r < 0) { 305 break; 306 } 307 } 308 k++; 309 } while (k <= Se); 310 if (s) { 311 if (k > Se) { 312 return false; 313 } 314 coeffs[kJPEGNaturalOrder[k]] = s; 315 } 316 } 317 } 318 if (in_zero_run) { 319 return false; 320 } 321 if (*eobrun > 0) { 322 for (; k <= Se; k++) { 323 coeff_t thiscoef = coeffs[kJPEGNaturalOrder[k]]; 324 if (thiscoef != 0) { 325 if (br->ReadBits(1)) { 326 if ((thiscoef & p1) == 0) { 327 if (thiscoef >= 0) { 328 thiscoef += p1; 329 } else { 330 thiscoef += m1; 331 } 332 } 333 } 334 coeffs[kJPEGNaturalOrder[k]] = thiscoef; 335 } 336 } 337 } 338 --(*eobrun); 339 return true; 340 } 341 342 void SaveMCUCodingState(j_decompress_ptr cinfo) { 343 jpeg_decomp_master* m = cinfo->master; 344 memcpy(m->mcu_.last_dc_coeff, m->last_dc_coeff_, sizeof(m->last_dc_coeff_)); 345 m->mcu_.eobrun = m->eobrun_; 346 size_t offset = 0; 347 for (int i = 0; i < cinfo->comps_in_scan; ++i) { 348 const jpeg_component_info* comp = cinfo->cur_comp_info[i]; 349 int c = comp->component_index; 350 size_t block_x = m->scan_mcu_col_ * comp->MCU_width; 351 for (int iy = 0; iy < comp->MCU_height; ++iy) { 352 size_t block_y = m->scan_mcu_row_ * comp->MCU_height + iy; 353 size_t biy = block_y % comp->v_samp_factor; 354 if (block_y >= comp->height_in_blocks) { 355 continue; 356 } 357 size_t nblocks = 358 std::min<size_t>(comp->MCU_width, comp->width_in_blocks - block_x); 359 size_t ncoeffs = nblocks * DCTSIZE2; 360 coeff_t* coeffs = &m->coeff_rows[c][biy][block_x][0]; 361 memcpy(&m->mcu_.coeffs[offset], coeffs, ncoeffs * sizeof(coeffs[0])); 362 offset += ncoeffs; 363 } 364 } 365 } 366 367 void RestoreMCUCodingState(j_decompress_ptr cinfo) { 368 jpeg_decomp_master* m = cinfo->master; 369 memcpy(m->last_dc_coeff_, m->mcu_.last_dc_coeff, sizeof(m->last_dc_coeff_)); 370 m->eobrun_ = m->mcu_.eobrun; 371 size_t offset = 0; 372 for (int i = 0; i < cinfo->comps_in_scan; ++i) { 373 const jpeg_component_info* comp = cinfo->cur_comp_info[i]; 374 int c = comp->component_index; 375 size_t block_x = m->scan_mcu_col_ * comp->MCU_width; 376 for (int iy = 0; iy < comp->MCU_height; ++iy) { 377 size_t block_y = m->scan_mcu_row_ * comp->MCU_height + iy; 378 size_t biy = block_y % comp->v_samp_factor; 379 if (block_y >= comp->height_in_blocks) { 380 continue; 381 } 382 size_t nblocks = 383 std::min<size_t>(comp->MCU_width, comp->width_in_blocks - block_x); 384 size_t ncoeffs = nblocks * DCTSIZE2; 385 coeff_t* coeffs = &m->coeff_rows[c][biy][block_x][0]; 386 memcpy(coeffs, &m->mcu_.coeffs[offset], ncoeffs * sizeof(coeffs[0])); 387 offset += ncoeffs; 388 } 389 } 390 } 391 392 bool FinishScan(j_decompress_ptr cinfo, const uint8_t* data, const size_t len, 393 size_t* pos, size_t* bit_pos) { 394 jpeg_decomp_master* m = cinfo->master; 395 if (m->eobrun_ > 0) { 396 JPEGLI_ERROR("End-of-block run too long."); 397 } 398 m->eobrun_ = -1; 399 memset(m->last_dc_coeff_, 0, sizeof(m->last_dc_coeff_)); 400 if (*bit_pos == 0) { 401 return true; 402 } 403 if (data[*pos] == 0xff) { 404 // After last br.FinishStream we checked that there is at least 2 bytes 405 // in the buffer. 406 JXL_DASSERT(*pos + 1 < len); 407 // br.FinishStream would have detected an early marker. 408 JXL_DASSERT(data[*pos + 1] == 0); 409 *pos += 2; 410 } else { 411 *pos += 1; 412 } 413 *bit_pos = 0; 414 return true; 415 } 416 417 } // namespace 418 419 void PrepareForiMCURow(j_decompress_ptr cinfo) { 420 jpeg_decomp_master* m = cinfo->master; 421 for (int i = 0; i < cinfo->comps_in_scan; ++i) { 422 const jpeg_component_info* comp = cinfo->cur_comp_info[i]; 423 int c = comp->component_index; 424 int by0 = cinfo->input_iMCU_row * comp->v_samp_factor; 425 int block_rows_left = comp->height_in_blocks - by0; 426 int max_block_rows = std::min(comp->v_samp_factor, block_rows_left); 427 int offset = m->streaming_mode_ ? 0 : by0; 428 m->coeff_rows[c] = (*cinfo->mem->access_virt_barray)( 429 reinterpret_cast<j_common_ptr>(cinfo), m->coef_arrays[c], offset, 430 max_block_rows, TRUE); 431 } 432 } 433 434 int ProcessScan(j_decompress_ptr cinfo, const uint8_t* const data, 435 const size_t len, size_t* pos, size_t* bit_pos) { 436 if (len == 0) { 437 return kNeedMoreInput; 438 } 439 jpeg_decomp_master* m = cinfo->master; 440 for (;;) { 441 // Handle the restart intervals. 442 if (cinfo->restart_interval > 0 && m->restarts_to_go_ == 0) { 443 if (!FinishScan(cinfo, data, len, pos, bit_pos)) { 444 return kNeedMoreInput; 445 } 446 // Go to the next marker, warn if we had to skip any data. 447 size_t num_skipped = 0; 448 while (*pos + 1 < len && (data[*pos] != 0xff || data[*pos + 1] == 0 || 449 data[*pos + 1] == 0xff)) { 450 ++(*pos); 451 ++num_skipped; 452 } 453 if (num_skipped > 0) { 454 JPEGLI_WARN("Skipped %d bytes before restart marker", 455 static_cast<int>(num_skipped)); 456 } 457 if (*pos + 2 > len) { 458 return kNeedMoreInput; 459 } 460 cinfo->unread_marker = data[*pos + 1]; 461 *pos += 2; 462 return kHandleRestart; 463 } 464 465 size_t start_pos = *pos; 466 BitReaderState br(data, len, start_pos); 467 if (*bit_pos > 0) { 468 br.ReadBits(*bit_pos); 469 } 470 if (start_pos + kMaxMCUByteSize > len) { 471 SaveMCUCodingState(cinfo); 472 } 473 474 // Decode one MCU. 475 HWY_ALIGN_MAX static coeff_t sink_block[DCTSIZE2] = {0}; 476 bool scan_ok = true; 477 for (int i = 0; i < cinfo->comps_in_scan; ++i) { 478 const jpeg_component_info* comp = cinfo->cur_comp_info[i]; 479 int c = comp->component_index; 480 const HuffmanTableEntry* dc_lut = 481 &m->dc_huff_lut_[comp->dc_tbl_no * kJpegHuffmanLutSize]; 482 const HuffmanTableEntry* ac_lut = 483 &m->ac_huff_lut_[comp->ac_tbl_no * kJpegHuffmanLutSize]; 484 for (int iy = 0; iy < comp->MCU_height; ++iy) { 485 size_t block_y = m->scan_mcu_row_ * comp->MCU_height + iy; 486 int biy = block_y % comp->v_samp_factor; 487 for (int ix = 0; ix < comp->MCU_width; ++ix) { 488 size_t block_x = m->scan_mcu_col_ * comp->MCU_width + ix; 489 coeff_t* coeffs; 490 if (block_x >= comp->width_in_blocks || 491 block_y >= comp->height_in_blocks) { 492 // Note that it is OK that sink_block is uninitialized because 493 // it will never be used in any branches, even in the RefineDCTBlock 494 // case, because only DC scans can be interleaved and we don't use 495 // the zero-ness of the DC coeff in the DC refinement code-path. 496 coeffs = sink_block; 497 } else { 498 coeffs = &m->coeff_rows[c][biy][block_x][0]; 499 } 500 if (cinfo->Ah == 0) { 501 if (!DecodeDCTBlock(dc_lut, ac_lut, cinfo->Ss, cinfo->Se, cinfo->Al, 502 &m->eobrun_, &br, 503 &m->last_dc_coeff_[comp->component_index], 504 coeffs)) { 505 scan_ok = false; 506 } 507 } else { 508 if (!RefineDCTBlock(ac_lut, cinfo->Ss, cinfo->Se, cinfo->Al, 509 &m->eobrun_, &br, coeffs)) { 510 scan_ok = false; 511 } 512 } 513 } 514 } 515 } 516 size_t new_pos; 517 size_t new_bit_pos; 518 bool stream_ok = br.FinishStream(&new_pos, &new_bit_pos); 519 if (new_pos + 2 > len) { 520 // If reading stopped within the last two bytes, we have to request more 521 // input even if FinishStream() returned true, since the Huffman code 522 // reader could have peaked ahead some bits past the current input chunk 523 // and thus the last prefix code length could have been wrong. We can do 524 // this because a valid JPEG bit stream has two extra bytes at the end. 525 RestoreMCUCodingState(cinfo); 526 return kNeedMoreInput; 527 } 528 *pos = new_pos; 529 *bit_pos = new_bit_pos; 530 if (!stream_ok) { 531 // We hit a marker during parsing. 532 JXL_DASSERT(data[*pos] == 0xff); 533 JXL_DASSERT(data[*pos + 1] != 0); 534 RestoreMCUCodingState(cinfo); 535 JPEGLI_WARN("Incomplete scan detected."); 536 return JPEG_SCAN_COMPLETED; 537 } 538 if (!scan_ok) { 539 JPEGLI_ERROR("Failed to decode DCT block"); 540 } 541 if (m->restarts_to_go_ > 0) { 542 --m->restarts_to_go_; 543 } 544 ++m->scan_mcu_col_; 545 if (m->scan_mcu_col_ == cinfo->MCUs_per_row) { 546 ++m->scan_mcu_row_; 547 m->scan_mcu_col_ = 0; 548 if (m->scan_mcu_row_ == cinfo->MCU_rows_in_scan) { 549 if (!FinishScan(cinfo, data, len, pos, bit_pos)) { 550 return kNeedMoreInput; 551 } 552 break; 553 } else if ((m->scan_mcu_row_ % m->mcu_rows_per_iMCU_row_) == 0) { 554 // Current iMCU row is done. 555 break; 556 } 557 } 558 } 559 ++cinfo->input_iMCU_row; 560 if (cinfo->input_iMCU_row < cinfo->total_iMCU_rows) { 561 PrepareForiMCURow(cinfo); 562 return JPEG_ROW_COMPLETED; 563 } 564 return JPEG_SCAN_COMPLETED; 565 } 566 567 } // namespace jpegli