libjxl

FORK: libjxl patches used on blog
git clone https://git.neptards.moe/blog/libjxl.git
Log | Files | Refs | Submodules | README | LICENSE

benchmark_stats.cc (13804B)


      1 // Copyright (c) the JPEG XL Project Authors. All rights reserved.
      2 //
      3 // Use of this source code is governed by a BSD-style
      4 // license that can be found in the LICENSE file.
      5 
      6 #include "tools/benchmark/benchmark_stats.h"
      7 
      8 #include <stdarg.h>
      9 #include <stddef.h>
     10 #include <stdio.h>
     11 #include <string.h>
     12 
     13 #include <algorithm>
     14 #include <cmath>
     15 
     16 #include "lib/jxl/base/printf_macros.h"
     17 #include "lib/jxl/base/status.h"
     18 #include "tools/benchmark/benchmark_args.h"
     19 
     20 namespace jpegxl {
     21 namespace tools {
     22 
     23 #define ADD_NAME(val, name) \
     24   case JXL_ENC_STAT_##val:  \
     25     return name
     26 const char* JxlStatsName(JxlEncoderStatsKey key) {
     27   switch (key) {
     28     ADD_NAME(HEADER_BITS, "Header bits");
     29     ADD_NAME(TOC_BITS, "TOC bits");
     30     ADD_NAME(DICTIONARY_BITS, "Patch dictionary bits");
     31     ADD_NAME(SPLINES_BITS, "Splines bits");
     32     ADD_NAME(NOISE_BITS, "Noise bits");
     33     ADD_NAME(QUANT_BITS, "Quantizer bits");
     34     ADD_NAME(MODULAR_TREE_BITS, "Modular tree bits");
     35     ADD_NAME(MODULAR_GLOBAL_BITS, "Modular global bits");
     36     ADD_NAME(DC_BITS, "DC bits");
     37     ADD_NAME(MODULAR_DC_GROUP_BITS, "Modular DC group bits");
     38     ADD_NAME(CONTROL_FIELDS_BITS, "Control field bits");
     39     ADD_NAME(COEF_ORDER_BITS, "Coeff order bits");
     40     ADD_NAME(AC_HISTOGRAM_BITS, "AC histogram bits");
     41     ADD_NAME(AC_BITS, "AC token bits");
     42     ADD_NAME(MODULAR_AC_GROUP_BITS, "Modular AC group bits");
     43     ADD_NAME(NUM_SMALL_BLOCKS, "Number of small blocks");
     44     ADD_NAME(NUM_DCT4X8_BLOCKS, "Number of 4x8 blocks");
     45     ADD_NAME(NUM_AFV_BLOCKS, "Number of AFV blocks");
     46     ADD_NAME(NUM_DCT8_BLOCKS, "Number of 8x8 blocks");
     47     ADD_NAME(NUM_DCT8X32_BLOCKS, "Number of 8x32 blocks");
     48     ADD_NAME(NUM_DCT16_BLOCKS, "Number of 16x16 blocks");
     49     ADD_NAME(NUM_DCT16X32_BLOCKS, "Number of 16x32 blocks");
     50     ADD_NAME(NUM_DCT32_BLOCKS, "Number of 32x32 blocks");
     51     ADD_NAME(NUM_DCT32X64_BLOCKS, "Number of 32x64 blocks");
     52     ADD_NAME(NUM_DCT64_BLOCKS, "Number of 64x64 blocks");
     53     ADD_NAME(NUM_BUTTERAUGLI_ITERS, "Butteraugli iters");
     54     default:
     55       return "";
     56   };
     57   return "";
     58 }
     59 #undef ADD_NAME
     60 
     61 void JxlStats::Print() const {
     62   for (int i = 0; i < JXL_ENC_NUM_STATS; ++i) {
     63     JxlEncoderStatsKey key = static_cast<JxlEncoderStatsKey>(i);
     64     size_t value = JxlEncoderStatsGet(stats.get(), key);
     65     if (value) printf("%-25s  %10" PRIuS "\n", JxlStatsName(key), value);
     66   }
     67 }
     68 
     69 namespace {
     70 
     71 // Computes longest codec name from Args()->codec, for table alignment.
     72 uint32_t ComputeLargestCodecName() {
     73   std::vector<std::string> methods = SplitString(Args()->codec, ',');
     74   size_t max = strlen("Aggregate:");  // Include final row's name
     75   for (const auto& method : methods) {
     76     max = std::max(max, method.size());
     77   }
     78   return max;
     79 }
     80 
     81 // The benchmark result is a table of heterogeneous data, the column type
     82 // specifies its data type. The type affects how it is printed as well as how
     83 // aggregate values are computed.
     84 enum ColumnType {
     85   // Formatted string
     86   TYPE_STRING,
     87   // Positive size, prints 0 as "---"
     88   TYPE_SIZE,
     89   // Floating point value (double precision) which is interpreted as
     90   // "not applicable" if <= 0, must be strictly positive to be valid but can be
     91   // set to 0 or negative to be printed as "---", for example for a speed that
     92   // is not measured.
     93   TYPE_POSITIVE_FLOAT,
     94   // Counts of some event
     95   TYPE_COUNT,
     96 };
     97 
     98 struct ColumnDescriptor {
     99   // Column name
    100   std::string label;
    101   // Total width to render the values of this column. If t his is a floating
    102   // point value, make sure this is large enough to contain a space and the
    103   // point, plus precision digits after the point, plus the max amount of
    104   // integer digits you expect in front of the point.
    105   uint32_t width;
    106   // Amount of digits after the point, or 0 if not a floating point value.
    107   uint32_t precision;
    108   ColumnType type;
    109   bool more;  // Whether to print only if more_columns is enabled
    110 };
    111 
    112 ColumnDescriptor ExtraMetricDescriptor() {
    113   ColumnDescriptor d{{"DO NOT USE"}, 12, 4, TYPE_POSITIVE_FLOAT, false};
    114   return d;
    115 }
    116 
    117 // To add or change a column to the benchmark ASCII table output, add/change
    118 // an entry here with table header line 1, table header line 2, width of the
    119 // column, precision after the point in case of floating point, and the
    120 // data type. Then add/change the corresponding formula or formatting in
    121 // the function ComputeColumns.
    122 std::vector<ColumnDescriptor> GetColumnDescriptors(size_t num_extra_metrics) {
    123   // clang-format off
    124   std::vector<ColumnDescriptor> result = {
    125       {{"Encoding"}, ComputeLargestCodecName() + 1, 0, TYPE_STRING, false},
    126       {{"kPixels"},        10,  0, TYPE_SIZE, false},
    127       {{"Bytes"},           9,  0, TYPE_SIZE, false},
    128       {{"BPP"},            13,  7, TYPE_POSITIVE_FLOAT, false},
    129       {{"E MP/s"},          8,  3, TYPE_POSITIVE_FLOAT, false},
    130       {{"D MP/s"},          8,  3, TYPE_POSITIVE_FLOAT, false},
    131       {{"Max norm"},       13,  8, TYPE_POSITIVE_FLOAT, false},
    132       {{"SSIMULACRA2"},    13,  8, TYPE_POSITIVE_FLOAT, false},
    133       {{"PSNR"},            7,  2, TYPE_POSITIVE_FLOAT, false},
    134       {{"pnorm"},          13,  8, TYPE_POSITIVE_FLOAT, false},
    135       {{"BPP*pnorm"},      16, 12, TYPE_POSITIVE_FLOAT, false},
    136       {{"QABPP"},           8,  3, TYPE_POSITIVE_FLOAT, false},
    137       {{"Bugs"},            7,  5, TYPE_COUNT, false},
    138   };
    139   // clang-format on
    140 
    141   for (size_t i = 0; i < num_extra_metrics; i++) {
    142     result.push_back(ExtraMetricDescriptor());
    143   }
    144 
    145   return result;
    146 }
    147 
    148 // Computes throughput [megapixels/s] as reported in the report table
    149 double ComputeSpeed(size_t pixels, double time_s) {
    150   if (time_s == 0.0) return 0;
    151   return pixels * 1E-6 / time_s;
    152 }
    153 
    154 std::string FormatFloat(const ColumnDescriptor& label, double value) {
    155   std::string result =
    156       StringPrintf("%*.*f", label.width - 1, label.precision, value);
    157 
    158   // Reduce precision if the value is too wide for the column. However, keep
    159   // at least one digit to the right of the point, and especially the integer
    160   // digits.
    161   if (result.size() >= label.width) {
    162     size_t point = result.rfind('.');
    163     if (point != std::string::npos) {
    164       int end = std::max<int>(point + 2, label.width - 1);
    165       result.resize(end);
    166     }
    167   }
    168   return result;
    169 }
    170 
    171 }  // namespace
    172 
    173 std::string StringPrintf(const char* format, ...) {
    174   char buf[2000];
    175   va_list args;
    176   va_start(args, format);
    177   vsnprintf(buf, sizeof(buf), format, args);
    178   va_end(args);
    179   return std::string(buf);
    180 }
    181 
    182 void BenchmarkStats::Assimilate(const BenchmarkStats& victim) {
    183   total_input_files += victim.total_input_files;
    184   total_input_pixels += victim.total_input_pixels;
    185   total_compressed_size += victim.total_compressed_size;
    186   total_adj_compressed_size += victim.total_adj_compressed_size;
    187   total_time_encode += victim.total_time_encode;
    188   total_time_decode += victim.total_time_decode;
    189   max_distance += pow(victim.max_distance, 2.0) * victim.total_input_pixels;
    190   distance_p_norm += victim.distance_p_norm;
    191   ssimulacra2 += victim.ssimulacra2;
    192   psnr += victim.psnr;
    193   distances.insert(distances.end(), victim.distances.begin(),
    194                    victim.distances.end());
    195   total_errors += victim.total_errors;
    196   jxl_stats.Assimilate(victim.jxl_stats);
    197   if (extra_metrics.size() < victim.extra_metrics.size()) {
    198     extra_metrics.resize(victim.extra_metrics.size());
    199   }
    200   for (size_t i = 0; i < victim.extra_metrics.size(); i++) {
    201     extra_metrics[i] += victim.extra_metrics[i];
    202   }
    203 }
    204 
    205 void BenchmarkStats::PrintMoreStats() const {
    206   if (Args()->print_more_stats) {
    207     jxl_stats.Print();
    208   }
    209   if (Args()->print_distance_percentiles) {
    210     std::vector<float> sorted = distances;
    211     std::sort(sorted.begin(), sorted.end());
    212     int p50idx = 0.5 * distances.size();
    213     int p90idx = 0.9 * distances.size();
    214     printf("50th/90th percentile distance: %.8f  %.8f\n", sorted[p50idx],
    215            sorted[p90idx]);
    216   }
    217 }
    218 
    219 std::vector<ColumnValue> BenchmarkStats::ComputeColumns(
    220     const std::string& codec_desc, size_t corpus_size) const {
    221   JXL_CHECK(total_input_files == corpus_size);
    222   const double comp_bpp = total_compressed_size * 8.0 / total_input_pixels;
    223   const double adj_comp_bpp =
    224       total_adj_compressed_size * 8.0 / total_input_pixels;
    225   // Note: this is not affected by alpha nor bit depth.
    226   const double compression_speed =
    227       ComputeSpeed(total_input_pixels, total_time_encode);
    228   const double decompression_speed =
    229       ComputeSpeed(total_input_pixels, total_time_decode);
    230   const double psnr_avg = psnr / total_input_pixels;
    231   const double p_norm_avg = distance_p_norm / total_input_pixels;
    232   const double ssimulacra2_avg = ssimulacra2 / total_input_pixels;
    233   const double bpp_p_norm = p_norm_avg * comp_bpp;
    234 
    235   const double max_distance_avg =
    236       sqrt(max_distance / static_cast<double>(total_input_pixels));
    237 
    238   std::vector<ColumnValue> values(
    239       GetColumnDescriptors(extra_metrics.size()).size());
    240 
    241   values[0].s = codec_desc;
    242   values[1].i = total_input_pixels / 1000;
    243   values[2].i = total_compressed_size;
    244   values[3].f = comp_bpp;
    245   values[4].f = compression_speed;
    246   values[5].f = decompression_speed;
    247   values[6].f = static_cast<double>(max_distance_avg);
    248   values[7].f = ssimulacra2_avg;
    249   values[8].f = psnr_avg;
    250   values[9].f = p_norm_avg;
    251   values[10].f = bpp_p_norm;
    252   values[11].f = adj_comp_bpp;
    253   values[12].i = total_errors;
    254   for (size_t i = 0; i < extra_metrics.size(); i++) {
    255     values[13 + i].f = extra_metrics[i] / total_input_files;
    256   }
    257   return values;
    258 }
    259 
    260 static std::string PrintFormattedEntries(
    261     size_t num_extra_metrics, const std::vector<ColumnValue>& values) {
    262   const auto& descriptors = GetColumnDescriptors(num_extra_metrics);
    263 
    264   std::string out;
    265   for (size_t i = 0; i < descriptors.size(); i++) {
    266     if (!Args()->more_columns && descriptors[i].more) continue;
    267     std::string value;
    268     if (descriptors[i].type == TYPE_STRING) {
    269       value = values[i].s;
    270     } else if (descriptors[i].type == TYPE_SIZE) {
    271       value = values[i].i ? StringPrintf("%" PRIdS, values[i].i) : "---";
    272     } else if (descriptors[i].type == TYPE_POSITIVE_FLOAT) {
    273       value = FormatFloat(descriptors[i], values[i].f);
    274       value = FormatFloat(descriptors[i], values[i].f);
    275     } else if (descriptors[i].type == TYPE_COUNT) {
    276       value = StringPrintf("%" PRIdS, values[i].i);
    277     }
    278 
    279     int numspaces = descriptors[i].width - value.size();
    280     if (numspaces < 1) {
    281       numspaces = 1;
    282     }
    283     // All except the first one are right-aligned, the first one is the name,
    284     // others are numbers with digits matching from the right.
    285     if (i == 0) out += value;
    286     out += std::string(numspaces, ' ');
    287     if (i != 0) out += value;
    288   }
    289   return out + "\n";
    290 }
    291 
    292 std::string BenchmarkStats::PrintLine(const std::string& codec_desc,
    293                                       size_t corpus_size) const {
    294   std::vector<ColumnValue> values = ComputeColumns(codec_desc, corpus_size);
    295   return PrintFormattedEntries(extra_metrics.size(), values);
    296 }
    297 
    298 std::string PrintHeader(const std::vector<std::string>& extra_metrics_names) {
    299   std::string out;
    300   // Extra metrics are handled separately.
    301   const auto& descriptors = GetColumnDescriptors(0);
    302   for (size_t i = 0; i < descriptors.size(); i++) {
    303     if (!Args()->more_columns && descriptors[i].more) continue;
    304     const std::string& label = descriptors[i].label;
    305     int numspaces = descriptors[i].width - label.size();
    306     // All except the first one are right-aligned.
    307     if (i == 0) out += label;
    308     out += std::string(numspaces, ' ');
    309     if (i != 0) out += label;
    310   }
    311   for (const std::string& em : extra_metrics_names) {
    312     int numspaces = ExtraMetricDescriptor().width - em.size();
    313     JXL_CHECK(numspaces >= 1);
    314     out += std::string(numspaces, ' ');
    315     out += em;
    316   }
    317   out += '\n';
    318   for (const auto& descriptor : descriptors) {
    319     if (!Args()->more_columns && descriptor.more) continue;
    320     out += std::string(descriptor.width, '-');
    321   }
    322   out += std::string(ExtraMetricDescriptor().width * extra_metrics_names.size(),
    323                      '-');
    324   return out + "\n";
    325 }
    326 
    327 std::string PrintAggregate(
    328     size_t num_extra_metrics,
    329     const std::vector<std::vector<ColumnValue>>& aggregate) {
    330   const auto& descriptors = GetColumnDescriptors(num_extra_metrics);
    331 
    332   for (size_t i = 0; i < aggregate.size(); i++) {
    333     // Check when statistics has wrong amount of column entries
    334     JXL_CHECK(aggregate[i].size() == descriptors.size());
    335   }
    336 
    337   std::vector<ColumnValue> result(descriptors.size());
    338 
    339   // Statistics for the aggregate row are combined together with different
    340   // formulas than Assimilate uses for combining the statistics of files.
    341   for (size_t i = 0; i < descriptors.size(); i++) {
    342     if (descriptors[i].type == TYPE_STRING) {
    343       // "---" for the Iters column since this does not have meaning for
    344       // the aggregate stats.
    345       result[i].s = i == 0 ? "Aggregate:" : "---";
    346       continue;
    347     }
    348     if (descriptors[i].type == TYPE_COUNT) {
    349       size_t sum = 0;
    350       for (size_t j = 0; j < aggregate.size(); j++) {
    351         sum += aggregate[j][i].i;
    352       }
    353       result[i].i = sum;
    354       continue;
    355     }
    356 
    357     ColumnType type = descriptors[i].type;
    358 
    359     double logsum = 0;
    360     size_t numvalid = 0;
    361     for (size_t j = 0; j < aggregate.size(); j++) {
    362       double value =
    363           (type == TYPE_SIZE) ? aggregate[j][i].i : aggregate[j][i].f;
    364       if (value > 0) {
    365         numvalid++;
    366         logsum += std::log2(value);
    367       }
    368     }
    369     double geomean = numvalid ? std::exp2(logsum / numvalid) : 0.0;
    370 
    371     if (type == TYPE_SIZE || type == TYPE_COUNT) {
    372       result[i].i = static_cast<size_t>(geomean + 0.5);
    373     } else if (type == TYPE_POSITIVE_FLOAT) {
    374       result[i].f = geomean;
    375     } else {
    376       JXL_ABORT("unknown entry type");
    377     }
    378   }
    379 
    380   return PrintFormattedEntries(num_extra_metrics, result);
    381 }
    382 
    383 }  // namespace tools
    384 }  // namespace jpegxl