huffman.cc (11931B)
1 // Copyright (c) the JPEG XL Project Authors. All rights reserved. 2 // 3 // Use of this source code is governed by a BSD-style 4 // license that can be found in the LICENSE file. 5 6 #include "lib/jpegli/huffman.h" 7 8 #include <limits> 9 #include <vector> 10 11 #include "lib/jpegli/common.h" 12 #include "lib/jpegli/error.h" 13 14 namespace jpegli { 15 16 // Returns the table width of the next 2nd level table, count is the histogram 17 // of bit lengths for the remaining symbols, len is the code length of the next 18 // processed symbol. 19 static inline int NextTableBitSize(const int* count, int len) { 20 int left = 1 << (len - kJpegHuffmanRootTableBits); 21 while (len < static_cast<int>(kJpegHuffmanMaxBitLength)) { 22 left -= count[len]; 23 if (left <= 0) break; 24 ++len; 25 left <<= 1; 26 } 27 return len - kJpegHuffmanRootTableBits; 28 } 29 30 void BuildJpegHuffmanTable(const uint32_t* count, const uint32_t* symbols, 31 HuffmanTableEntry* lut) { 32 HuffmanTableEntry code; // current table entry 33 HuffmanTableEntry* table; // next available space in table 34 int len; // current code length 35 int idx; // symbol index 36 int key; // prefix code 37 int reps; // number of replicate key values in current table 38 int low; // low bits for current root entry 39 int table_bits; // key length of current table 40 int table_size; // size of current table 41 42 // Make a local copy of the input bit length histogram. 43 int tmp_count[kJpegHuffmanMaxBitLength + 1] = {0}; 44 int total_count = 0; 45 for (len = 1; len <= static_cast<int>(kJpegHuffmanMaxBitLength); ++len) { 46 tmp_count[len] = count[len]; 47 total_count += tmp_count[len]; 48 } 49 50 table = lut; 51 table_bits = kJpegHuffmanRootTableBits; 52 table_size = 1 << table_bits; 53 54 // Special case code with only one value. 55 if (total_count == 1) { 56 code.bits = 0; 57 code.value = symbols[0]; 58 for (key = 0; key < table_size; ++key) { 59 table[key] = code; 60 } 61 return; 62 } 63 64 // Fill in root table. 65 key = 0; 66 idx = 0; 67 for (len = 1; len <= kJpegHuffmanRootTableBits; ++len) { 68 for (; tmp_count[len] > 0; --tmp_count[len]) { 69 code.bits = len; 70 code.value = symbols[idx++]; 71 reps = 1 << (kJpegHuffmanRootTableBits - len); 72 while (reps--) { 73 table[key++] = code; 74 } 75 } 76 } 77 78 // Fill in 2nd level tables and add pointers to root table. 79 table += table_size; 80 table_size = 0; 81 low = 0; 82 for (len = kJpegHuffmanRootTableBits + 1; 83 len <= static_cast<int>(kJpegHuffmanMaxBitLength); ++len) { 84 for (; tmp_count[len] > 0; --tmp_count[len]) { 85 // Start a new sub-table if the previous one is full. 86 if (low >= table_size) { 87 table += table_size; 88 table_bits = NextTableBitSize(tmp_count, len); 89 table_size = 1 << table_bits; 90 low = 0; 91 lut[key].bits = table_bits + kJpegHuffmanRootTableBits; 92 lut[key].value = (table - lut) - key; 93 ++key; 94 } 95 code.bits = len - kJpegHuffmanRootTableBits; 96 code.value = symbols[idx++]; 97 reps = 1 << (table_bits - code.bits); 98 while (reps--) { 99 table[low++] = code; 100 } 101 } 102 } 103 } 104 105 // A node of a Huffman tree. 106 struct HuffmanTree { 107 HuffmanTree(uint32_t count, int16_t left, int16_t right) 108 : total_count(count), index_left(left), index_right_or_value(right) {} 109 uint32_t total_count; 110 int16_t index_left; 111 int16_t index_right_or_value; 112 }; 113 114 void SetDepth(const HuffmanTree& p, HuffmanTree* pool, uint8_t* depth, 115 uint8_t level) { 116 if (p.index_left >= 0) { 117 ++level; 118 SetDepth(pool[p.index_left], pool, depth, level); 119 SetDepth(pool[p.index_right_or_value], pool, depth, level); 120 } else { 121 depth[p.index_right_or_value] = level; 122 } 123 } 124 125 // Sort the root nodes, least popular first. 126 static JXL_INLINE bool Compare(const HuffmanTree& v0, const HuffmanTree& v1) { 127 return v0.total_count < v1.total_count; 128 } 129 130 // This function will create a Huffman tree. 131 // 132 // The catch here is that the tree cannot be arbitrarily deep. 133 // Brotli specifies a maximum depth of 15 bits for "code trees" 134 // and 7 bits for "code length code trees." 135 // 136 // count_limit is the value that is to be faked as the minimum value 137 // and this minimum value is raised until the tree matches the 138 // maximum length requirement. 139 // 140 // This algorithm is not of excellent performance for very long data blocks, 141 // especially when population counts are longer than 2**tree_limit, but 142 // we are not planning to use this with extremely long blocks. 143 // 144 // See http://en.wikipedia.org/wiki/Huffman_coding 145 void CreateHuffmanTree(const uint32_t* data, const size_t length, 146 const int tree_limit, uint8_t* depth) { 147 // For block sizes below 64 kB, we never need to do a second iteration 148 // of this loop. Probably all of our block sizes will be smaller than 149 // that, so this loop is mostly of academic interest. If we actually 150 // would need this, we would be better off with the Katajainen algorithm. 151 for (uint32_t count_limit = 1;; count_limit *= 2) { 152 std::vector<HuffmanTree> tree; 153 tree.reserve(2 * length + 1); 154 155 for (size_t i = length; i != 0;) { 156 --i; 157 if (data[i]) { 158 const uint32_t count = std::max(data[i], count_limit - 1); 159 tree.emplace_back(count, -1, static_cast<int16_t>(i)); 160 } 161 } 162 163 const size_t n = tree.size(); 164 if (n == 1) { 165 // Fake value; will be fixed on upper level. 166 depth[tree[0].index_right_or_value] = 1; 167 break; 168 } 169 170 std::stable_sort(tree.begin(), tree.end(), Compare); 171 172 // The nodes are: 173 // [0, n): the sorted leaf nodes that we start with. 174 // [n]: we add a sentinel here. 175 // [n + 1, 2n): new parent nodes are added here, starting from 176 // (n+1). These are naturally in ascending order. 177 // [2n]: we add a sentinel at the end as well. 178 // There will be (2n+1) elements at the end. 179 const HuffmanTree sentinel(std::numeric_limits<uint32_t>::max(), -1, -1); 180 tree.push_back(sentinel); 181 tree.push_back(sentinel); 182 183 size_t i = 0; // Points to the next leaf node. 184 size_t j = n + 1; // Points to the next non-leaf node. 185 for (size_t k = n - 1; k != 0; --k) { 186 size_t left; 187 size_t right; 188 if (tree[i].total_count <= tree[j].total_count) { 189 left = i; 190 ++i; 191 } else { 192 left = j; 193 ++j; 194 } 195 if (tree[i].total_count <= tree[j].total_count) { 196 right = i; 197 ++i; 198 } else { 199 right = j; 200 ++j; 201 } 202 203 // The sentinel node becomes the parent node. 204 size_t j_end = tree.size() - 1; 205 tree[j_end].total_count = 206 tree[left].total_count + tree[right].total_count; 207 tree[j_end].index_left = static_cast<int16_t>(left); 208 tree[j_end].index_right_or_value = static_cast<int16_t>(right); 209 210 // Add back the last sentinel node. 211 tree.push_back(sentinel); 212 } 213 JXL_DASSERT(tree.size() == 2 * n + 1); 214 SetDepth(tree[2 * n - 1], tree.data(), depth, 0); 215 216 // We need to pack the Huffman tree in tree_limit bits. 217 // If this was not successful, add fake entities to the lowest values 218 // and retry. 219 if (*std::max_element(&depth[0], &depth[length]) <= tree_limit) { 220 break; 221 } 222 } 223 } 224 225 void ValidateHuffmanTable(j_common_ptr cinfo, const JHUFF_TBL* table, 226 bool is_dc) { 227 size_t total_symbols = 0; 228 size_t total_p = 0; 229 size_t max_depth = 0; 230 for (size_t d = 1; d <= kJpegHuffmanMaxBitLength; ++d) { 231 uint8_t count = table->bits[d]; 232 if (count) { 233 total_symbols += count; 234 total_p += (1u << (kJpegHuffmanMaxBitLength - d)) * count; 235 max_depth = d; 236 } 237 } 238 total_p += 1u << (kJpegHuffmanMaxBitLength - max_depth); // sentinel symbol 239 if (total_symbols == 0) { 240 JPEGLI_ERROR("Empty Huffman table"); 241 } 242 if (total_symbols > kJpegHuffmanAlphabetSize) { 243 JPEGLI_ERROR("Too many symbols in Huffman table"); 244 } 245 if (total_p != (1u << kJpegHuffmanMaxBitLength)) { 246 JPEGLI_ERROR("Invalid bit length distribution"); 247 } 248 uint8_t symbol_seen[kJpegHuffmanAlphabetSize] = {}; 249 for (size_t i = 0; i < total_symbols; ++i) { 250 uint8_t symbol = table->huffval[i]; 251 if (symbol_seen[symbol]) { 252 JPEGLI_ERROR("Duplicate symbol %d in Huffman table", symbol); 253 } 254 symbol_seen[symbol] = 1; 255 } 256 } 257 258 void AddStandardHuffmanTables(j_common_ptr cinfo, bool is_dc) { 259 // Huffman tables from the JPEG standard. 260 static constexpr JHUFF_TBL kStandardDCTables[2] = { 261 // DC luma 262 {{0, 0, 1, 5, 1, 1, 1, 1, 1, 1}, 263 {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}, 264 FALSE}, 265 // DC chroma 266 {{0, 0, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1}, 267 {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}, 268 FALSE}}; 269 static constexpr JHUFF_TBL kStandardACTables[2] = { 270 // AC luma 271 {{0, 0, 2, 1, 3, 3, 2, 4, 3, 5, 5, 4, 4, 0, 0, 1, 125}, 272 {0x01, 0x02, 0x03, 0x00, 0x04, 0x11, 0x05, 0x12, 0x21, 0x31, 0x41, 0x06, 273 0x13, 0x51, 0x61, 0x07, 0x22, 0x71, 0x14, 0x32, 0x81, 0x91, 0xa1, 0x08, 274 0x23, 0x42, 0xb1, 0xc1, 0x15, 0x52, 0xd1, 0xf0, 0x24, 0x33, 0x62, 0x72, 275 0x82, 0x09, 0x0a, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x25, 0x26, 0x27, 0x28, 276 0x29, 0x2a, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x43, 0x44, 0x45, 277 0x46, 0x47, 0x48, 0x49, 0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 278 0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x73, 0x74, 0x75, 279 0x76, 0x77, 0x78, 0x79, 0x7a, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 280 0x8a, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9a, 0xa2, 0xa3, 281 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 282 0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 283 0xca, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xe1, 0xe2, 284 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xf1, 0xf2, 0xf3, 0xf4, 285 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa}, 286 FALSE}, 287 // AC chroma 288 {{0, 0, 2, 1, 2, 4, 4, 3, 4, 7, 5, 4, 4, 0, 1, 2, 119}, 289 {0x00, 0x01, 0x02, 0x03, 0x11, 0x04, 0x05, 0x21, 0x31, 0x06, 0x12, 0x41, 290 0x51, 0x07, 0x61, 0x71, 0x13, 0x22, 0x32, 0x81, 0x08, 0x14, 0x42, 0x91, 291 0xa1, 0xb1, 0xc1, 0x09, 0x23, 0x33, 0x52, 0xf0, 0x15, 0x62, 0x72, 0xd1, 292 0x0a, 0x16, 0x24, 0x34, 0xe1, 0x25, 0xf1, 0x17, 0x18, 0x19, 0x1a, 0x26, 293 0x27, 0x28, 0x29, 0x2a, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x43, 0x44, 294 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 295 0x59, 0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x73, 0x74, 296 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 297 0x88, 0x89, 0x8a, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9a, 298 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4, 299 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 300 0xc8, 0xc9, 0xca, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 301 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xf2, 0xf3, 0xf4, 302 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa}, 303 FALSE}}; 304 const JHUFF_TBL* std_tables = is_dc ? kStandardDCTables : kStandardACTables; 305 JHUFF_TBL** tables; 306 if (cinfo->is_decompressor) { 307 j_decompress_ptr cinfo_d = reinterpret_cast<j_decompress_ptr>(cinfo); 308 tables = is_dc ? cinfo_d->dc_huff_tbl_ptrs : cinfo_d->ac_huff_tbl_ptrs; 309 } else { 310 j_compress_ptr cinfo_c = reinterpret_cast<j_compress_ptr>(cinfo); 311 tables = is_dc ? cinfo_c->dc_huff_tbl_ptrs : cinfo_c->ac_huff_tbl_ptrs; 312 } 313 for (int i = 0; i < 2; ++i) { 314 if (tables[i] == nullptr) { 315 tables[i] = jpegli_alloc_huff_table(cinfo); 316 memcpy(tables[i], &std_tables[i], sizeof(JHUFF_TBL)); 317 ValidateHuffmanTable(cinfo, tables[i], is_dc); 318 } 319 } 320 } 321 322 } // namespace jpegli