no_png.cc (6552B)
1 // Copyright (c) the JPEG XL Project Authors. All rights reserved. 2 // 3 // Use of this source code is governed by a BSD-style 4 // license that can be found in the LICENSE file. 5 6 #include "tools/wasm_demo/no_png.h" 7 8 #include <array> 9 #include <cstdlib> 10 #include <cstring> 11 12 namespace { 13 14 std::array<uint32_t, 256> makeCrc32Lut() { 15 std::array<uint32_t, 256> result; 16 for (uint32_t i = 0; i < 256; ++i) { 17 constexpr uint32_t poly = 0xEDB88320; 18 uint32_t v = i; 19 for (size_t i = 0; i < 8; ++i) { 20 uint32_t mask = ~((v & 1) - 1); 21 v = (v >> 1) ^ (poly & mask); 22 } 23 result[i] = v; 24 } 25 return result; 26 } 27 28 const std::array<uint32_t, 256> kCrc32Lut = makeCrc32Lut(); 29 30 const std::array<uint32_t, 8> kPngMagic = {137, 80, 78, 71, 13, 10, 26, 10}; 31 32 // No need to SIMDify it, only small blocks are actually checksummed. 33 uint32_t CalculateCrc32(const uint8_t* start, const uint8_t* end) { 34 uint32_t result = ~0; 35 for (const uint8_t* data = start; data < end; ++data) { 36 result ^= *data; 37 result = (result >> 8) ^ kCrc32Lut[result & 0xFF]; 38 } 39 return ~result; 40 } 41 42 void AdlerCopy(const uint8_t* src, uint8_t* dst, size_t length, uint32_t* s1, 43 uint32_t* s2) { 44 // TODO(eustas): SIMD-ify and use multithreading. 45 46 // Precondition: s1, s2 normalized; length <= 65535 47 uint32_t a = *s1; 48 uint32_t b = *s2; 49 50 for (size_t i = 0; i < length; ++i) { 51 const uint8_t v = src[i]; 52 a += v; 53 b += a; 54 dst[i] = v; 55 } 56 57 // Postcondition: s1, s2 normalized. 58 *s1 = a % 65521; 59 *s2 = b % 65521; 60 } 61 62 constexpr size_t kMaxDeflateBlock = 65535; 63 constexpr uint32_t kIhdrSize = 13; 64 constexpr uint32_t kCicpSize = 4; 65 66 void WriteU8(uint8_t*& dst, uint8_t value) { *(dst++) = value; } 67 68 void WriteU16(uint8_t*& dst, uint16_t value) { 69 memcpy(dst, &value, 2); 70 dst += 2; 71 } 72 73 void WriteU32(uint8_t*& dst, uint32_t value) { 74 memcpy(dst, &value, 4); 75 dst += 4; 76 } 77 78 void WriteU32BE(uint8_t*& dst, uint32_t value) { 79 WriteU32(dst, __builtin_bswap32(value)); 80 } 81 82 } // namespace 83 84 extern "C" { 85 86 uint8_t* WrapPixelsToPng(size_t width, size_t height, size_t bit_depth, 87 bool has_alpha, const uint8_t* input, 88 const std::vector<uint8_t>& icc, 89 const std::vector<uint8_t>& cicp, 90 uint32_t* output_size) { 91 size_t row_size = width * (bit_depth / 8) * (3 + static_cast<int>(has_alpha)); 92 size_t data_size = height * (row_size + 1); 93 size_t num_deflate_blocks = 94 (data_size + kMaxDeflateBlock - 1) / kMaxDeflateBlock; 95 size_t idat_size = data_size + num_deflate_blocks * 5 + 6; 96 // 64k is enough for everyone 97 bool has_iccp = !icc.empty() && (icc.size() <= kMaxDeflateBlock); 98 size_t iccp_size = 3 + icc.size() + 5 + 6; // name + data + deflate-wrapping 99 bool has_cicp = (cicp.size() == kCicpSize); 100 size_t total_size = 0; 101 total_size += kPngMagic.size(); 102 total_size += 12 + kIhdrSize; 103 total_size += has_cicp ? (kCicpSize + 12) : 0; 104 total_size += has_iccp ? (iccp_size + 12) : 0; 105 total_size += 12 + idat_size; 106 total_size += 12; // IEND 107 108 uint8_t* output = static_cast<uint8_t*>(malloc(total_size)); 109 if (!output) { 110 return nullptr; 111 } 112 uint8_t* dst = output; 113 *output_size = total_size; 114 115 for (const uint32_t& c : kPngMagic) { 116 *(dst++) = c; 117 } 118 119 // IHDR 120 WriteU32BE(dst, kIhdrSize); 121 uint8_t* chunk_start = dst; 122 WriteU32(dst, 0x52444849); 123 WriteU32BE(dst, width); 124 WriteU32BE(dst, height); 125 WriteU8(dst, bit_depth); 126 WriteU8(dst, has_alpha ? 6 : 2); 127 WriteU8(dst, 0); // compression: deflate 128 WriteU8(dst, 0); // filters: standard 129 WriteU8(dst, 0); // interlace: no 130 uint32_t crc32 = CalculateCrc32(chunk_start, dst); 131 WriteU32BE(dst, crc32); 132 133 if (has_cicp) { 134 // cICP 135 WriteU32BE(dst, kCicpSize); 136 uint8_t* chunk_start = dst; 137 WriteU32(dst, 0x50434963); 138 for (size_t i = 0; i < kCicpSize; ++i) { 139 WriteU8(dst, cicp[i]); 140 } 141 uint32_t crc32 = CalculateCrc32(chunk_start, dst); 142 WriteU32BE(dst, crc32); 143 } 144 145 if (has_iccp) { 146 // iCCP 147 WriteU32BE(dst, iccp_size); 148 uint8_t* chunk_start = dst; 149 WriteU32(dst, 0x50434369); 150 WriteU8(dst, '1'); // Profile name 151 WriteU8(dst, 0); // NUL terminator 152 WriteU8(dst, 0); // Compression method: deflate 153 WriteU8(dst, 0x08); // CM = 8 (deflate), CINFO = 0 (window size = 2**(0+8)) 154 WriteU8(dst, 29); // FCHECK; (FCHECK + 256* CMF) % 31 = 0 155 uint32_t adler_s1 = 1; 156 uint32_t adler_s2 = 0; 157 WriteU8(dst, 1); // btype = 00 (uncompressed), last 158 uint16_t block_size = static_cast<uint16_t>(icc.size()); 159 WriteU16(dst, block_size); 160 WriteU16(dst, ~block_size); 161 AdlerCopy(icc.data(), dst, block_size, &adler_s1, &adler_s2); 162 dst += block_size; 163 uint32_t adler = (adler_s2 << 8) | adler_s1; 164 WriteU32BE(dst, adler); 165 uint32_t crc32 = CalculateCrc32(chunk_start, dst); 166 WriteU32BE(dst, crc32); 167 } 168 169 // IDAT 170 WriteU32BE(dst, idat_size); 171 WriteU32(dst, 0x54414449); 172 size_t offset = 0; 173 size_t bytes_to_next_row = 0; 174 uint32_t adler_s1 = 1; 175 uint32_t adler_s2 = 0; 176 WriteU8(dst, 0x08); // CM = 8 (deflate), CINFO = 0 (window size = 2**(0+8)) 177 WriteU8(dst, 29); // FCHECK; (FCHECK + 256* CMF) % 31 = 0 178 for (size_t i = 0; i < num_deflate_blocks; ++i) { 179 size_t block_size = data_size - offset; 180 if (block_size > kMaxDeflateBlock) { 181 block_size = kMaxDeflateBlock; 182 } 183 bool is_last = ((i + 1) == num_deflate_blocks); 184 WriteU8(dst, static_cast<uint8_t>(is_last)); // btype = 00 (uncompressed) 185 offset += block_size; 186 187 WriteU16(dst, block_size); 188 WriteU16(dst, ~block_size); 189 while (block_size > 0) { 190 if (bytes_to_next_row == 0) { 191 WriteU8(dst, 0); // filter: raw 192 adler_s2 += adler_s1; 193 bytes_to_next_row = row_size; 194 block_size--; 195 continue; 196 } 197 size_t bytes_to_copy = std::min(block_size, bytes_to_next_row); 198 AdlerCopy(input, dst, bytes_to_copy, &adler_s1, &adler_s2); 199 dst += bytes_to_copy; 200 input += bytes_to_copy; 201 block_size -= bytes_to_copy; 202 bytes_to_next_row -= bytes_to_copy; 203 } 204 } 205 // Fake Adler works well in Chrome; so let's not waste CPU cycles. 206 uint32_t adler = 0; // (adler_s2 << 8) | adler_s1; 207 WriteU32BE(dst, adler); 208 WriteU32BE(dst, 0); // Fake CRC32 209 210 // IEND 211 WriteU32BE(dst, 0); 212 chunk_start = dst; 213 WriteU32(dst, 0x444E4549); 214 // TODO(eustas): this is fixed value; precalculate? 215 crc32 = CalculateCrc32(chunk_start, dst); 216 WriteU32BE(dst, crc32); 217 218 return output; 219 } 220 221 } // extern "C"