benchmark_stats.cc (13804B)
1 // Copyright (c) the JPEG XL Project Authors. All rights reserved. 2 // 3 // Use of this source code is governed by a BSD-style 4 // license that can be found in the LICENSE file. 5 6 #include "tools/benchmark/benchmark_stats.h" 7 8 #include <stdarg.h> 9 #include <stddef.h> 10 #include <stdio.h> 11 #include <string.h> 12 13 #include <algorithm> 14 #include <cmath> 15 16 #include "lib/jxl/base/printf_macros.h" 17 #include "lib/jxl/base/status.h" 18 #include "tools/benchmark/benchmark_args.h" 19 20 namespace jpegxl { 21 namespace tools { 22 23 #define ADD_NAME(val, name) \ 24 case JXL_ENC_STAT_##val: \ 25 return name 26 const char* JxlStatsName(JxlEncoderStatsKey key) { 27 switch (key) { 28 ADD_NAME(HEADER_BITS, "Header bits"); 29 ADD_NAME(TOC_BITS, "TOC bits"); 30 ADD_NAME(DICTIONARY_BITS, "Patch dictionary bits"); 31 ADD_NAME(SPLINES_BITS, "Splines bits"); 32 ADD_NAME(NOISE_BITS, "Noise bits"); 33 ADD_NAME(QUANT_BITS, "Quantizer bits"); 34 ADD_NAME(MODULAR_TREE_BITS, "Modular tree bits"); 35 ADD_NAME(MODULAR_GLOBAL_BITS, "Modular global bits"); 36 ADD_NAME(DC_BITS, "DC bits"); 37 ADD_NAME(MODULAR_DC_GROUP_BITS, "Modular DC group bits"); 38 ADD_NAME(CONTROL_FIELDS_BITS, "Control field bits"); 39 ADD_NAME(COEF_ORDER_BITS, "Coeff order bits"); 40 ADD_NAME(AC_HISTOGRAM_BITS, "AC histogram bits"); 41 ADD_NAME(AC_BITS, "AC token bits"); 42 ADD_NAME(MODULAR_AC_GROUP_BITS, "Modular AC group bits"); 43 ADD_NAME(NUM_SMALL_BLOCKS, "Number of small blocks"); 44 ADD_NAME(NUM_DCT4X8_BLOCKS, "Number of 4x8 blocks"); 45 ADD_NAME(NUM_AFV_BLOCKS, "Number of AFV blocks"); 46 ADD_NAME(NUM_DCT8_BLOCKS, "Number of 8x8 blocks"); 47 ADD_NAME(NUM_DCT8X32_BLOCKS, "Number of 8x32 blocks"); 48 ADD_NAME(NUM_DCT16_BLOCKS, "Number of 16x16 blocks"); 49 ADD_NAME(NUM_DCT16X32_BLOCKS, "Number of 16x32 blocks"); 50 ADD_NAME(NUM_DCT32_BLOCKS, "Number of 32x32 blocks"); 51 ADD_NAME(NUM_DCT32X64_BLOCKS, "Number of 32x64 blocks"); 52 ADD_NAME(NUM_DCT64_BLOCKS, "Number of 64x64 blocks"); 53 ADD_NAME(NUM_BUTTERAUGLI_ITERS, "Butteraugli iters"); 54 default: 55 return ""; 56 }; 57 return ""; 58 } 59 #undef ADD_NAME 60 61 void JxlStats::Print() const { 62 for (int i = 0; i < JXL_ENC_NUM_STATS; ++i) { 63 JxlEncoderStatsKey key = static_cast<JxlEncoderStatsKey>(i); 64 size_t value = JxlEncoderStatsGet(stats.get(), key); 65 if (value) printf("%-25s %10" PRIuS "\n", JxlStatsName(key), value); 66 } 67 } 68 69 namespace { 70 71 // Computes longest codec name from Args()->codec, for table alignment. 72 uint32_t ComputeLargestCodecName() { 73 std::vector<std::string> methods = SplitString(Args()->codec, ','); 74 size_t max = strlen("Aggregate:"); // Include final row's name 75 for (const auto& method : methods) { 76 max = std::max(max, method.size()); 77 } 78 return max; 79 } 80 81 // The benchmark result is a table of heterogeneous data, the column type 82 // specifies its data type. The type affects how it is printed as well as how 83 // aggregate values are computed. 84 enum ColumnType { 85 // Formatted string 86 TYPE_STRING, 87 // Positive size, prints 0 as "---" 88 TYPE_SIZE, 89 // Floating point value (double precision) which is interpreted as 90 // "not applicable" if <= 0, must be strictly positive to be valid but can be 91 // set to 0 or negative to be printed as "---", for example for a speed that 92 // is not measured. 93 TYPE_POSITIVE_FLOAT, 94 // Counts of some event 95 TYPE_COUNT, 96 }; 97 98 struct ColumnDescriptor { 99 // Column name 100 std::string label; 101 // Total width to render the values of this column. If t his is a floating 102 // point value, make sure this is large enough to contain a space and the 103 // point, plus precision digits after the point, plus the max amount of 104 // integer digits you expect in front of the point. 105 uint32_t width; 106 // Amount of digits after the point, or 0 if not a floating point value. 107 uint32_t precision; 108 ColumnType type; 109 bool more; // Whether to print only if more_columns is enabled 110 }; 111 112 ColumnDescriptor ExtraMetricDescriptor() { 113 ColumnDescriptor d{{"DO NOT USE"}, 12, 4, TYPE_POSITIVE_FLOAT, false}; 114 return d; 115 } 116 117 // To add or change a column to the benchmark ASCII table output, add/change 118 // an entry here with table header line 1, table header line 2, width of the 119 // column, precision after the point in case of floating point, and the 120 // data type. Then add/change the corresponding formula or formatting in 121 // the function ComputeColumns. 122 std::vector<ColumnDescriptor> GetColumnDescriptors(size_t num_extra_metrics) { 123 // clang-format off 124 std::vector<ColumnDescriptor> result = { 125 {{"Encoding"}, ComputeLargestCodecName() + 1, 0, TYPE_STRING, false}, 126 {{"kPixels"}, 10, 0, TYPE_SIZE, false}, 127 {{"Bytes"}, 9, 0, TYPE_SIZE, false}, 128 {{"BPP"}, 13, 7, TYPE_POSITIVE_FLOAT, false}, 129 {{"E MP/s"}, 8, 3, TYPE_POSITIVE_FLOAT, false}, 130 {{"D MP/s"}, 8, 3, TYPE_POSITIVE_FLOAT, false}, 131 {{"Max norm"}, 13, 8, TYPE_POSITIVE_FLOAT, false}, 132 {{"SSIMULACRA2"}, 13, 8, TYPE_POSITIVE_FLOAT, false}, 133 {{"PSNR"}, 7, 2, TYPE_POSITIVE_FLOAT, false}, 134 {{"pnorm"}, 13, 8, TYPE_POSITIVE_FLOAT, false}, 135 {{"BPP*pnorm"}, 16, 12, TYPE_POSITIVE_FLOAT, false}, 136 {{"QABPP"}, 8, 3, TYPE_POSITIVE_FLOAT, false}, 137 {{"Bugs"}, 7, 5, TYPE_COUNT, false}, 138 }; 139 // clang-format on 140 141 for (size_t i = 0; i < num_extra_metrics; i++) { 142 result.push_back(ExtraMetricDescriptor()); 143 } 144 145 return result; 146 } 147 148 // Computes throughput [megapixels/s] as reported in the report table 149 double ComputeSpeed(size_t pixels, double time_s) { 150 if (time_s == 0.0) return 0; 151 return pixels * 1E-6 / time_s; 152 } 153 154 std::string FormatFloat(const ColumnDescriptor& label, double value) { 155 std::string result = 156 StringPrintf("%*.*f", label.width - 1, label.precision, value); 157 158 // Reduce precision if the value is too wide for the column. However, keep 159 // at least one digit to the right of the point, and especially the integer 160 // digits. 161 if (result.size() >= label.width) { 162 size_t point = result.rfind('.'); 163 if (point != std::string::npos) { 164 int end = std::max<int>(point + 2, label.width - 1); 165 result.resize(end); 166 } 167 } 168 return result; 169 } 170 171 } // namespace 172 173 std::string StringPrintf(const char* format, ...) { 174 char buf[2000]; 175 va_list args; 176 va_start(args, format); 177 vsnprintf(buf, sizeof(buf), format, args); 178 va_end(args); 179 return std::string(buf); 180 } 181 182 void BenchmarkStats::Assimilate(const BenchmarkStats& victim) { 183 total_input_files += victim.total_input_files; 184 total_input_pixels += victim.total_input_pixels; 185 total_compressed_size += victim.total_compressed_size; 186 total_adj_compressed_size += victim.total_adj_compressed_size; 187 total_time_encode += victim.total_time_encode; 188 total_time_decode += victim.total_time_decode; 189 max_distance += pow(victim.max_distance, 2.0) * victim.total_input_pixels; 190 distance_p_norm += victim.distance_p_norm; 191 ssimulacra2 += victim.ssimulacra2; 192 psnr += victim.psnr; 193 distances.insert(distances.end(), victim.distances.begin(), 194 victim.distances.end()); 195 total_errors += victim.total_errors; 196 jxl_stats.Assimilate(victim.jxl_stats); 197 if (extra_metrics.size() < victim.extra_metrics.size()) { 198 extra_metrics.resize(victim.extra_metrics.size()); 199 } 200 for (size_t i = 0; i < victim.extra_metrics.size(); i++) { 201 extra_metrics[i] += victim.extra_metrics[i]; 202 } 203 } 204 205 void BenchmarkStats::PrintMoreStats() const { 206 if (Args()->print_more_stats) { 207 jxl_stats.Print(); 208 } 209 if (Args()->print_distance_percentiles) { 210 std::vector<float> sorted = distances; 211 std::sort(sorted.begin(), sorted.end()); 212 int p50idx = 0.5 * distances.size(); 213 int p90idx = 0.9 * distances.size(); 214 printf("50th/90th percentile distance: %.8f %.8f\n", sorted[p50idx], 215 sorted[p90idx]); 216 } 217 } 218 219 std::vector<ColumnValue> BenchmarkStats::ComputeColumns( 220 const std::string& codec_desc, size_t corpus_size) const { 221 JXL_CHECK(total_input_files == corpus_size); 222 const double comp_bpp = total_compressed_size * 8.0 / total_input_pixels; 223 const double adj_comp_bpp = 224 total_adj_compressed_size * 8.0 / total_input_pixels; 225 // Note: this is not affected by alpha nor bit depth. 226 const double compression_speed = 227 ComputeSpeed(total_input_pixels, total_time_encode); 228 const double decompression_speed = 229 ComputeSpeed(total_input_pixels, total_time_decode); 230 const double psnr_avg = psnr / total_input_pixels; 231 const double p_norm_avg = distance_p_norm / total_input_pixels; 232 const double ssimulacra2_avg = ssimulacra2 / total_input_pixels; 233 const double bpp_p_norm = p_norm_avg * comp_bpp; 234 235 const double max_distance_avg = 236 sqrt(max_distance / static_cast<double>(total_input_pixels)); 237 238 std::vector<ColumnValue> values( 239 GetColumnDescriptors(extra_metrics.size()).size()); 240 241 values[0].s = codec_desc; 242 values[1].i = total_input_pixels / 1000; 243 values[2].i = total_compressed_size; 244 values[3].f = comp_bpp; 245 values[4].f = compression_speed; 246 values[5].f = decompression_speed; 247 values[6].f = static_cast<double>(max_distance_avg); 248 values[7].f = ssimulacra2_avg; 249 values[8].f = psnr_avg; 250 values[9].f = p_norm_avg; 251 values[10].f = bpp_p_norm; 252 values[11].f = adj_comp_bpp; 253 values[12].i = total_errors; 254 for (size_t i = 0; i < extra_metrics.size(); i++) { 255 values[13 + i].f = extra_metrics[i] / total_input_files; 256 } 257 return values; 258 } 259 260 static std::string PrintFormattedEntries( 261 size_t num_extra_metrics, const std::vector<ColumnValue>& values) { 262 const auto& descriptors = GetColumnDescriptors(num_extra_metrics); 263 264 std::string out; 265 for (size_t i = 0; i < descriptors.size(); i++) { 266 if (!Args()->more_columns && descriptors[i].more) continue; 267 std::string value; 268 if (descriptors[i].type == TYPE_STRING) { 269 value = values[i].s; 270 } else if (descriptors[i].type == TYPE_SIZE) { 271 value = values[i].i ? StringPrintf("%" PRIdS, values[i].i) : "---"; 272 } else if (descriptors[i].type == TYPE_POSITIVE_FLOAT) { 273 value = FormatFloat(descriptors[i], values[i].f); 274 value = FormatFloat(descriptors[i], values[i].f); 275 } else if (descriptors[i].type == TYPE_COUNT) { 276 value = StringPrintf("%" PRIdS, values[i].i); 277 } 278 279 int numspaces = descriptors[i].width - value.size(); 280 if (numspaces < 1) { 281 numspaces = 1; 282 } 283 // All except the first one are right-aligned, the first one is the name, 284 // others are numbers with digits matching from the right. 285 if (i == 0) out += value; 286 out += std::string(numspaces, ' '); 287 if (i != 0) out += value; 288 } 289 return out + "\n"; 290 } 291 292 std::string BenchmarkStats::PrintLine(const std::string& codec_desc, 293 size_t corpus_size) const { 294 std::vector<ColumnValue> values = ComputeColumns(codec_desc, corpus_size); 295 return PrintFormattedEntries(extra_metrics.size(), values); 296 } 297 298 std::string PrintHeader(const std::vector<std::string>& extra_metrics_names) { 299 std::string out; 300 // Extra metrics are handled separately. 301 const auto& descriptors = GetColumnDescriptors(0); 302 for (size_t i = 0; i < descriptors.size(); i++) { 303 if (!Args()->more_columns && descriptors[i].more) continue; 304 const std::string& label = descriptors[i].label; 305 int numspaces = descriptors[i].width - label.size(); 306 // All except the first one are right-aligned. 307 if (i == 0) out += label; 308 out += std::string(numspaces, ' '); 309 if (i != 0) out += label; 310 } 311 for (const std::string& em : extra_metrics_names) { 312 int numspaces = ExtraMetricDescriptor().width - em.size(); 313 JXL_CHECK(numspaces >= 1); 314 out += std::string(numspaces, ' '); 315 out += em; 316 } 317 out += '\n'; 318 for (const auto& descriptor : descriptors) { 319 if (!Args()->more_columns && descriptor.more) continue; 320 out += std::string(descriptor.width, '-'); 321 } 322 out += std::string(ExtraMetricDescriptor().width * extra_metrics_names.size(), 323 '-'); 324 return out + "\n"; 325 } 326 327 std::string PrintAggregate( 328 size_t num_extra_metrics, 329 const std::vector<std::vector<ColumnValue>>& aggregate) { 330 const auto& descriptors = GetColumnDescriptors(num_extra_metrics); 331 332 for (size_t i = 0; i < aggregate.size(); i++) { 333 // Check when statistics has wrong amount of column entries 334 JXL_CHECK(aggregate[i].size() == descriptors.size()); 335 } 336 337 std::vector<ColumnValue> result(descriptors.size()); 338 339 // Statistics for the aggregate row are combined together with different 340 // formulas than Assimilate uses for combining the statistics of files. 341 for (size_t i = 0; i < descriptors.size(); i++) { 342 if (descriptors[i].type == TYPE_STRING) { 343 // "---" for the Iters column since this does not have meaning for 344 // the aggregate stats. 345 result[i].s = i == 0 ? "Aggregate:" : "---"; 346 continue; 347 } 348 if (descriptors[i].type == TYPE_COUNT) { 349 size_t sum = 0; 350 for (size_t j = 0; j < aggregate.size(); j++) { 351 sum += aggregate[j][i].i; 352 } 353 result[i].i = sum; 354 continue; 355 } 356 357 ColumnType type = descriptors[i].type; 358 359 double logsum = 0; 360 size_t numvalid = 0; 361 for (size_t j = 0; j < aggregate.size(); j++) { 362 double value = 363 (type == TYPE_SIZE) ? aggregate[j][i].i : aggregate[j][i].f; 364 if (value > 0) { 365 numvalid++; 366 logsum += std::log2(value); 367 } 368 } 369 double geomean = numvalid ? std::exp2(logsum / numvalid) : 0.0; 370 371 if (type == TYPE_SIZE || type == TYPE_COUNT) { 372 result[i].i = static_cast<size_t>(geomean + 0.5); 373 } else if (type == TYPE_POSITIVE_FLOAT) { 374 result[i].f = geomean; 375 } else { 376 JXL_ABORT("unknown entry type"); 377 } 378 } 379 380 return PrintFormattedEntries(num_extra_metrics, result); 381 } 382 383 } // namespace tools 384 } // namespace jpegxl