libjxl

FORK: libjxl patches used on blog
git clone https://git.neptards.moe/blog/libjxl.git
Log | Files | Refs | Submodules | README | LICENSE

no_png.cc (6552B)


      1 // Copyright (c) the JPEG XL Project Authors. All rights reserved.
      2 //
      3 // Use of this source code is governed by a BSD-style
      4 // license that can be found in the LICENSE file.
      5 
      6 #include "tools/wasm_demo/no_png.h"
      7 
      8 #include <array>
      9 #include <cstdlib>
     10 #include <cstring>
     11 
     12 namespace {
     13 
     14 std::array<uint32_t, 256> makeCrc32Lut() {
     15   std::array<uint32_t, 256> result;
     16   for (uint32_t i = 0; i < 256; ++i) {
     17     constexpr uint32_t poly = 0xEDB88320;
     18     uint32_t v = i;
     19     for (size_t i = 0; i < 8; ++i) {
     20       uint32_t mask = ~((v & 1) - 1);
     21       v = (v >> 1) ^ (poly & mask);
     22     }
     23     result[i] = v;
     24   }
     25   return result;
     26 }
     27 
     28 const std::array<uint32_t, 256> kCrc32Lut = makeCrc32Lut();
     29 
     30 const std::array<uint32_t, 8> kPngMagic = {137, 80, 78, 71, 13, 10, 26, 10};
     31 
     32 // No need to SIMDify it, only small blocks are actually checksummed.
     33 uint32_t CalculateCrc32(const uint8_t* start, const uint8_t* end) {
     34   uint32_t result = ~0;
     35   for (const uint8_t* data = start; data < end; ++data) {
     36     result ^= *data;
     37     result = (result >> 8) ^ kCrc32Lut[result & 0xFF];
     38   }
     39   return ~result;
     40 }
     41 
     42 void AdlerCopy(const uint8_t* src, uint8_t* dst, size_t length, uint32_t* s1,
     43                uint32_t* s2) {
     44   // TODO(eustas): SIMD-ify and use multithreading.
     45 
     46   // Precondition: s1, s2 normalized; length <= 65535
     47   uint32_t a = *s1;
     48   uint32_t b = *s2;
     49 
     50   for (size_t i = 0; i < length; ++i) {
     51     const uint8_t v = src[i];
     52     a += v;
     53     b += a;
     54     dst[i] = v;
     55   }
     56 
     57   // Postcondition: s1, s2 normalized.
     58   *s1 = a % 65521;
     59   *s2 = b % 65521;
     60 }
     61 
     62 constexpr size_t kMaxDeflateBlock = 65535;
     63 constexpr uint32_t kIhdrSize = 13;
     64 constexpr uint32_t kCicpSize = 4;
     65 
     66 void WriteU8(uint8_t*& dst, uint8_t value) { *(dst++) = value; }
     67 
     68 void WriteU16(uint8_t*& dst, uint16_t value) {
     69   memcpy(dst, &value, 2);
     70   dst += 2;
     71 }
     72 
     73 void WriteU32(uint8_t*& dst, uint32_t value) {
     74   memcpy(dst, &value, 4);
     75   dst += 4;
     76 }
     77 
     78 void WriteU32BE(uint8_t*& dst, uint32_t value) {
     79   WriteU32(dst, __builtin_bswap32(value));
     80 }
     81 
     82 }  // namespace
     83 
     84 extern "C" {
     85 
     86 uint8_t* WrapPixelsToPng(size_t width, size_t height, size_t bit_depth,
     87                          bool has_alpha, const uint8_t* input,
     88                          const std::vector<uint8_t>& icc,
     89                          const std::vector<uint8_t>& cicp,
     90                          uint32_t* output_size) {
     91   size_t row_size = width * (bit_depth / 8) * (3 + static_cast<int>(has_alpha));
     92   size_t data_size = height * (row_size + 1);
     93   size_t num_deflate_blocks =
     94       (data_size + kMaxDeflateBlock - 1) / kMaxDeflateBlock;
     95   size_t idat_size = data_size + num_deflate_blocks * 5 + 6;
     96   // 64k is enough for everyone
     97   bool has_iccp = !icc.empty() && (icc.size() <= kMaxDeflateBlock);
     98   size_t iccp_size = 3 + icc.size() + 5 + 6;  // name + data + deflate-wrapping
     99   bool has_cicp = (cicp.size() == kCicpSize);
    100   size_t total_size = 0;
    101   total_size += kPngMagic.size();
    102   total_size += 12 + kIhdrSize;
    103   total_size += has_cicp ? (kCicpSize + 12) : 0;
    104   total_size += has_iccp ? (iccp_size + 12) : 0;
    105   total_size += 12 + idat_size;
    106   total_size += 12;  // IEND
    107 
    108   uint8_t* output = static_cast<uint8_t*>(malloc(total_size));
    109   if (!output) {
    110     return nullptr;
    111   }
    112   uint8_t* dst = output;
    113   *output_size = total_size;
    114 
    115   for (const uint32_t& c : kPngMagic) {
    116     *(dst++) = c;
    117   }
    118 
    119   // IHDR
    120   WriteU32BE(dst, kIhdrSize);
    121   uint8_t* chunk_start = dst;
    122   WriteU32(dst, 0x52444849);
    123   WriteU32BE(dst, width);
    124   WriteU32BE(dst, height);
    125   WriteU8(dst, bit_depth);
    126   WriteU8(dst, has_alpha ? 6 : 2);
    127   WriteU8(dst, 0);  // compression: deflate
    128   WriteU8(dst, 0);  // filters: standard
    129   WriteU8(dst, 0);  // interlace: no
    130   uint32_t crc32 = CalculateCrc32(chunk_start, dst);
    131   WriteU32BE(dst, crc32);
    132 
    133   if (has_cicp) {
    134     // cICP
    135     WriteU32BE(dst, kCicpSize);
    136     uint8_t* chunk_start = dst;
    137     WriteU32(dst, 0x50434963);
    138     for (size_t i = 0; i < kCicpSize; ++i) {
    139       WriteU8(dst, cicp[i]);
    140     }
    141     uint32_t crc32 = CalculateCrc32(chunk_start, dst);
    142     WriteU32BE(dst, crc32);
    143   }
    144 
    145   if (has_iccp) {
    146     // iCCP
    147     WriteU32BE(dst, iccp_size);
    148     uint8_t* chunk_start = dst;
    149     WriteU32(dst, 0x50434369);
    150     WriteU8(dst, '1');   // Profile name
    151     WriteU8(dst, 0);     // NUL terminator
    152     WriteU8(dst, 0);     // Compression method: deflate
    153     WriteU8(dst, 0x08);  // CM = 8 (deflate), CINFO = 0 (window size = 2**(0+8))
    154     WriteU8(dst, 29);    // FCHECK; (FCHECK + 256* CMF) % 31 = 0
    155     uint32_t adler_s1 = 1;
    156     uint32_t adler_s2 = 0;
    157     WriteU8(dst, 1);  // btype = 00 (uncompressed), last
    158     uint16_t block_size = static_cast<uint16_t>(icc.size());
    159     WriteU16(dst, block_size);
    160     WriteU16(dst, ~block_size);
    161     AdlerCopy(icc.data(), dst, block_size, &adler_s1, &adler_s2);
    162     dst += block_size;
    163     uint32_t adler = (adler_s2 << 8) | adler_s1;
    164     WriteU32BE(dst, adler);
    165     uint32_t crc32 = CalculateCrc32(chunk_start, dst);
    166     WriteU32BE(dst, crc32);
    167   }
    168 
    169   // IDAT
    170   WriteU32BE(dst, idat_size);
    171   WriteU32(dst, 0x54414449);
    172   size_t offset = 0;
    173   size_t bytes_to_next_row = 0;
    174   uint32_t adler_s1 = 1;
    175   uint32_t adler_s2 = 0;
    176   WriteU8(dst, 0x08);  // CM = 8 (deflate), CINFO = 0 (window size = 2**(0+8))
    177   WriteU8(dst, 29);    // FCHECK; (FCHECK + 256* CMF) % 31 = 0
    178   for (size_t i = 0; i < num_deflate_blocks; ++i) {
    179     size_t block_size = data_size - offset;
    180     if (block_size > kMaxDeflateBlock) {
    181       block_size = kMaxDeflateBlock;
    182     }
    183     bool is_last = ((i + 1) == num_deflate_blocks);
    184     WriteU8(dst, static_cast<uint8_t>(is_last));  // btype = 00 (uncompressed)
    185     offset += block_size;
    186 
    187     WriteU16(dst, block_size);
    188     WriteU16(dst, ~block_size);
    189     while (block_size > 0) {
    190       if (bytes_to_next_row == 0) {
    191         WriteU8(dst, 0);  // filter: raw
    192         adler_s2 += adler_s1;
    193         bytes_to_next_row = row_size;
    194         block_size--;
    195         continue;
    196       }
    197       size_t bytes_to_copy = std::min(block_size, bytes_to_next_row);
    198       AdlerCopy(input, dst, bytes_to_copy, &adler_s1, &adler_s2);
    199       dst += bytes_to_copy;
    200       input += bytes_to_copy;
    201       block_size -= bytes_to_copy;
    202       bytes_to_next_row -= bytes_to_copy;
    203     }
    204   }
    205   // Fake Adler works well in Chrome; so let's not waste CPU cycles.
    206   uint32_t adler = 0;  // (adler_s2 << 8) | adler_s1;
    207   WriteU32BE(dst, adler);
    208   WriteU32BE(dst, 0);  // Fake CRC32
    209 
    210   // IEND
    211   WriteU32BE(dst, 0);
    212   chunk_start = dst;
    213   WriteU32(dst, 0x444E4549);
    214   // TODO(eustas): this is fixed value; precalculate?
    215   crc32 = CalculateCrc32(chunk_start, dst);
    216   WriteU32BE(dst, crc32);
    217 
    218   return output;
    219 }
    220 
    221 }  // extern "C"