libjxl

FORK: libjxl patches used on blog
git clone https://git.neptards.moe/blog/libjxl.git
Log | Files | Refs | Submodules | README | LICENSE

tf_gbench.cc (5437B)


      1 // Copyright (c) the JPEG XL Project Authors. All rights reserved.
      2 //
      3 // Use of this source code is governed by a BSD-style
      4 // license that can be found in the LICENSE file.
      5 
      6 #include "benchmark/benchmark.h"
      7 #include "lib/jxl/image_ops.h"
      8 
      9 #undef HWY_TARGET_INCLUDE
     10 #define HWY_TARGET_INCLUDE "lib/jxl/tf_gbench.cc"
     11 #include <hwy/foreach_target.h>
     12 #include <hwy/highway.h>
     13 
     14 #include "lib/jxl/cms/transfer_functions-inl.h"
     15 
     16 HWY_BEFORE_NAMESPACE();
     17 namespace jxl {
     18 namespace HWY_NAMESPACE {
     19 namespace {
     20 
     21 #define RUN_BENCHMARK(F)                                            \
     22   constexpr size_t kNum = 1 << 12;                                  \
     23   HWY_FULL(float) d;                                                \
     24   /* Three parallel runs, as this will run on R, G and B. */        \
     25   auto sum1 = Zero(d);                                              \
     26   auto sum2 = Zero(d);                                              \
     27   auto sum3 = Zero(d);                                              \
     28   for (auto _ : state) {                                            \
     29     auto x = Set(d, 1e-5);                                          \
     30     auto v1 = Set(d, 1e-5);                                         \
     31     auto v2 = Set(d, 1.1e-5);                                       \
     32     auto v3 = Set(d, 1.2e-5);                                       \
     33     for (size_t i = 0; i < kNum; i++) {                             \
     34       sum1 += F(d, v1);                                             \
     35       sum2 += F(d, v2);                                             \
     36       sum3 += F(d, v3);                                             \
     37       v1 += x;                                                      \
     38       v2 += x;                                                      \
     39       v3 += x;                                                      \
     40     }                                                               \
     41   }                                                                 \
     42   /* floats per second */                                           \
     43   state.SetItemsProcessed(kNum* state.iterations() * Lanes(d) * 3); \
     44   benchmark::DoNotOptimize(sum1 + sum2 + sum3);
     45 
     46 #define RUN_BENCHMARK_SCALAR(F, I)                           \
     47   constexpr size_t kNum = 1 << 12;                           \
     48   /* Three parallel runs, as this will run on R, G and B. */ \
     49   float sum1 = 0, sum2 = 0, sum3 = 0;                        \
     50   for (auto _ : state) {                                     \
     51     float x = 1e-5;                                          \
     52     float v1 = 1e-5;                                         \
     53     float v2 = 1.1e-5;                                       \
     54     float v3 = 1.2e-5;                                       \
     55     for (size_t i = 0; i < kNum; i++) {                      \
     56       sum1 += F(I, v1);                                      \
     57       sum2 += F(I, v2);                                      \
     58       sum3 += F(I, v3);                                      \
     59       v1 += x;                                               \
     60       v2 += x;                                               \
     61       v3 += x;                                               \
     62     }                                                        \
     63   }                                                          \
     64   /* floats per second */                                    \
     65   state.SetItemsProcessed(kNum* state.iterations() * 3);     \
     66   benchmark::DoNotOptimize(sum1 + sum2 + sum3);
     67 
     68 HWY_NOINLINE void BM_FastSRGB(benchmark::State& state) {
     69   RUN_BENCHMARK(FastLinearToSRGB);
     70 }
     71 
     72 HWY_NOINLINE void BM_TFSRGB(benchmark::State& state) {
     73   RUN_BENCHMARK(TF_SRGB().EncodedFromDisplay);
     74 }
     75 
     76 HWY_NOINLINE void BM_PQDFE(benchmark::State& state) {
     77   TF_PQ tf_pq(10000.0);
     78   RUN_BENCHMARK(tf_pq.DisplayFromEncoded);
     79 }
     80 
     81 HWY_NOINLINE void BM_PQEFD(benchmark::State& state) {
     82   TF_PQ tf_pq(10000.0);
     83   RUN_BENCHMARK(tf_pq.EncodedFromDisplay);
     84 }
     85 
     86 HWY_NOINLINE void BM_PQSlowDFE(benchmark::State& state) {
     87   RUN_BENCHMARK_SCALAR(TF_PQ_Base::DisplayFromEncoded, 10000.0);
     88 }
     89 
     90 HWY_NOINLINE void BM_PQSlowEFD(benchmark::State& state) {
     91   RUN_BENCHMARK_SCALAR(TF_PQ_Base::EncodedFromDisplay, 10000.0);
     92 }
     93 }  // namespace
     94 // NOLINTNEXTLINE(google-readability-namespace-comments)
     95 }  // namespace HWY_NAMESPACE
     96 }  // namespace jxl
     97 HWY_AFTER_NAMESPACE();
     98 
     99 #if HWY_ONCE
    100 namespace jxl {
    101 namespace {
    102 
    103 HWY_EXPORT(BM_FastSRGB);
    104 HWY_EXPORT(BM_TFSRGB);
    105 HWY_EXPORT(BM_PQDFE);
    106 HWY_EXPORT(BM_PQEFD);
    107 HWY_EXPORT(BM_PQSlowDFE);
    108 HWY_EXPORT(BM_PQSlowEFD);
    109 
    110 float SRGB_pow(float _, float x) {
    111   return x < 0.0031308f ? 12.92f * x : 1.055f * powf(x, 1.0f / 2.4f) - 0.055f;
    112 }
    113 
    114 void BM_FastSRGB(benchmark::State& state) {
    115   HWY_DYNAMIC_DISPATCH(BM_FastSRGB)(state);
    116 }
    117 void BM_TFSRGB(benchmark::State& state) {
    118   HWY_DYNAMIC_DISPATCH(BM_TFSRGB)(state);
    119 }
    120 void BM_PQDFE(benchmark::State& state) {
    121   HWY_DYNAMIC_DISPATCH(BM_PQDFE)(state);
    122 }
    123 void BM_PQEFD(benchmark::State& state) {
    124   HWY_DYNAMIC_DISPATCH(BM_PQEFD)(state);
    125 }
    126 void BM_PQSlowDFE(benchmark::State& state) {
    127   HWY_DYNAMIC_DISPATCH(BM_PQSlowDFE)(state);
    128 }
    129 void BM_PQSlowEFD(benchmark::State& state) {
    130   HWY_DYNAMIC_DISPATCH(BM_PQSlowEFD)(state);
    131 }
    132 
    133 void BM_SRGB_pow(benchmark::State& state) { RUN_BENCHMARK_SCALAR(SRGB_pow, 0); }
    134 
    135 BENCHMARK(BM_FastSRGB);
    136 BENCHMARK(BM_TFSRGB);
    137 BENCHMARK(BM_SRGB_pow);
    138 BENCHMARK(BM_PQDFE);
    139 BENCHMARK(BM_PQEFD);
    140 BENCHMARK(BM_PQSlowDFE);
    141 BENCHMARK(BM_PQSlowEFD);
    142 
    143 }  // namespace
    144 }  // namespace jxl
    145 #endif