libjxl

FORK: libjxl patches used on blog
git clone https://git.neptards.moe/blog/libjxl.git
Log | Files | Refs | Submodules | README | LICENSE

ans_common.h (5978B)


      1 // Copyright (c) the JPEG XL Project Authors. All rights reserved.
      2 //
      3 // Use of this source code is governed by a BSD-style
      4 // license that can be found in the LICENSE file.
      5 
      6 #ifndef LIB_JXL_ANS_COMMON_H_
      7 #define LIB_JXL_ANS_COMMON_H_
      8 
      9 #include <stdint.h>
     10 
     11 #include <algorithm>
     12 #include <hwy/cache_control.h>  // Prefetch
     13 #include <vector>
     14 
     15 #include "lib/jxl/ans_params.h"
     16 #include "lib/jxl/base/byte_order.h"
     17 #include "lib/jxl/base/compiler_specific.h"
     18 #include "lib/jxl/base/status.h"
     19 
     20 namespace jxl {
     21 
     22 // Returns the precision (number of bits) that should be used to store
     23 // a histogram count such that Log2Floor(count) == logcount.
     24 static JXL_INLINE uint32_t GetPopulationCountPrecision(uint32_t logcount,
     25                                                        uint32_t shift) {
     26   int32_t r = std::min<int>(
     27       logcount, static_cast<int>(shift) -
     28                     static_cast<int>((ANS_LOG_TAB_SIZE - logcount) >> 1));
     29   if (r < 0) return 0;
     30   return r;
     31 }
     32 
     33 // Returns a histogram where the counts are positive, differ by at most 1,
     34 // and add up to total_count. The bigger counts (if any) are at the beginning
     35 // of the histogram.
     36 std::vector<int32_t> CreateFlatHistogram(int length, int total_count);
     37 
     38 // An alias table implements a mapping from the [0, ANS_TAB_SIZE) range into
     39 // the [0, ANS_MAX_ALPHABET_SIZE) range, satisfying the following conditions:
     40 // - each symbol occurs as many times as specified by any valid distribution
     41 //   of frequencies of the symbols. A valid distribution here is an array of
     42 //   ANS_MAX_ALPHABET_SIZE that contains numbers in the range [0, ANS_TAB_SIZE],
     43 //   and whose sum is ANS_TAB_SIZE.
     44 // - lookups can be done in constant time, and also return how many smaller
     45 //   input values map into the same symbol, according to some well-defined order
     46 //   of input values.
     47 // - the space used by the alias table is given by a small constant times the
     48 //   index of the largest symbol with nonzero probability in the distribution.
     49 // Each of the entries in the table covers a range of `entry_size` values in the
     50 // [0, ANS_TAB_SIZE) range; consecutive entries represent consecutive
     51 // sub-ranges. In the range covered by entry `i`, the first `cutoff` values map
     52 // to symbol `i`, while the others map to symbol `right_value`.
     53 //
     54 // TODO(veluca): consider making the order used for computing offsets easier to
     55 // define - it is currently defined by the algorithm to compute the alias table.
     56 // Beware of breaking the implicit assumption that symbols that come after the
     57 // cutoff value should have an offset at least as big as the cutoff.
     58 
     59 struct AliasTable {
     60   struct Symbol {
     61     size_t value;
     62     size_t offset;
     63     size_t freq;
     64   };
     65 
     66 // Working set size matters here (~64 tables x 256 entries).
     67 // offsets0 is always zero (beginning of [0] side among the same symbol).
     68 // offsets1 is an offset of (pos >= cutoff) side decremented by cutoff.
     69 #pragma pack(push, 1)
     70   struct Entry {
     71     uint8_t cutoff;       // < kEntrySizeMinus1 when used by ANS.
     72     uint8_t right_value;  // < alphabet size.
     73     uint16_t freq0;
     74 
     75     // Only used if `greater` (see Lookup)
     76     uint16_t offsets1;         // <= ANS_TAB_SIZE
     77     uint16_t freq1_xor_freq0;  // for branchless ternary in Lookup
     78   };
     79 #pragma pack(pop)
     80 
     81   // Dividing `value` by `entry_size` determines `i`, the entry which is
     82   // responsible for the input. If the remainder is below `cutoff`, then the
     83   // mapped symbol is `i`; since `offsets[0]` stores the number of occurrences
     84   // of `i` "before" the start of this entry, the offset of the input will be
     85   // `offsets[0] + remainder`. If the remainder is above cutoff, the mapped
     86   // symbol is `right_value`; since `offsets[1]` stores the number of
     87   // occurrences of `right_value` "before" this entry, minus the `cutoff` value,
     88   // the input offset is then `remainder + offsets[1]`.
     89   static JXL_INLINE Symbol Lookup(const Entry* JXL_RESTRICT table, size_t value,
     90                                   size_t log_entry_size,
     91                                   size_t entry_size_minus_1) {
     92     const size_t i = value >> log_entry_size;
     93     const size_t pos = value & entry_size_minus_1;
     94 
     95 #if JXL_BYTE_ORDER_LITTLE
     96     uint64_t entry;
     97     memcpy(&entry, &table[i].cutoff, sizeof(entry));
     98     const size_t cutoff = entry & 0xFF;              // = MOVZX
     99     const size_t right_value = (entry >> 8) & 0xFF;  // = MOVZX
    100     const size_t freq0 = (entry >> 16) & 0xFFFF;
    101 #else
    102     // Generates multiple loads with complex addressing.
    103     const size_t cutoff = table[i].cutoff;
    104     const size_t right_value = table[i].right_value;
    105     const size_t freq0 = table[i].freq0;
    106 #endif
    107 
    108     const bool greater = pos >= cutoff;
    109 
    110 #if JXL_BYTE_ORDER_LITTLE
    111     const uint64_t conditional = greater ? entry : 0;  // = CMOV
    112     const size_t offsets1_or_0 = (conditional >> 32) & 0xFFFF;
    113     const size_t freq1_xor_freq0_or_0 = conditional >> 48;
    114 #else
    115     const size_t offsets1_or_0 = greater ? table[i].offsets1 : 0;
    116     const size_t freq1_xor_freq0_or_0 = greater ? table[i].freq1_xor_freq0 : 0;
    117 #endif
    118 
    119     // WARNING: moving this code may interfere with CMOV heuristics.
    120     Symbol s;
    121     s.value = greater ? right_value : i;
    122     s.offset = offsets1_or_0 + pos;
    123     s.freq = freq0 ^ freq1_xor_freq0_or_0;  // = greater ? freq1 : freq0
    124     // XOR avoids implementation-defined conversion from unsigned to signed.
    125     // Alternatives considered: BEXTR is 2 cycles on HSW, SET+shift causes
    126     // spills, simple ternary has a long dependency chain.
    127 
    128     return s;
    129   }
    130 
    131   static HWY_INLINE void Prefetch(const Entry* JXL_RESTRICT table, size_t value,
    132                                   size_t log_entry_size) {
    133     const size_t i = value >> log_entry_size;
    134     hwy::Prefetch(table + i);
    135   }
    136 };
    137 
    138 // Computes an alias table for a given distribution.
    139 void InitAliasTable(std::vector<int32_t> distribution, uint32_t range,
    140                     size_t log_alpha_size, AliasTable::Entry* JXL_RESTRICT a);
    141 
    142 }  // namespace jxl
    143 
    144 #endif  // LIB_JXL_ANS_COMMON_H_