tf_gbench.cc (5437B)
1 // Copyright (c) the JPEG XL Project Authors. All rights reserved. 2 // 3 // Use of this source code is governed by a BSD-style 4 // license that can be found in the LICENSE file. 5 6 #include "benchmark/benchmark.h" 7 #include "lib/jxl/image_ops.h" 8 9 #undef HWY_TARGET_INCLUDE 10 #define HWY_TARGET_INCLUDE "lib/jxl/tf_gbench.cc" 11 #include <hwy/foreach_target.h> 12 #include <hwy/highway.h> 13 14 #include "lib/jxl/cms/transfer_functions-inl.h" 15 16 HWY_BEFORE_NAMESPACE(); 17 namespace jxl { 18 namespace HWY_NAMESPACE { 19 namespace { 20 21 #define RUN_BENCHMARK(F) \ 22 constexpr size_t kNum = 1 << 12; \ 23 HWY_FULL(float) d; \ 24 /* Three parallel runs, as this will run on R, G and B. */ \ 25 auto sum1 = Zero(d); \ 26 auto sum2 = Zero(d); \ 27 auto sum3 = Zero(d); \ 28 for (auto _ : state) { \ 29 auto x = Set(d, 1e-5); \ 30 auto v1 = Set(d, 1e-5); \ 31 auto v2 = Set(d, 1.1e-5); \ 32 auto v3 = Set(d, 1.2e-5); \ 33 for (size_t i = 0; i < kNum; i++) { \ 34 sum1 += F(d, v1); \ 35 sum2 += F(d, v2); \ 36 sum3 += F(d, v3); \ 37 v1 += x; \ 38 v2 += x; \ 39 v3 += x; \ 40 } \ 41 } \ 42 /* floats per second */ \ 43 state.SetItemsProcessed(kNum* state.iterations() * Lanes(d) * 3); \ 44 benchmark::DoNotOptimize(sum1 + sum2 + sum3); 45 46 #define RUN_BENCHMARK_SCALAR(F, I) \ 47 constexpr size_t kNum = 1 << 12; \ 48 /* Three parallel runs, as this will run on R, G and B. */ \ 49 float sum1 = 0, sum2 = 0, sum3 = 0; \ 50 for (auto _ : state) { \ 51 float x = 1e-5; \ 52 float v1 = 1e-5; \ 53 float v2 = 1.1e-5; \ 54 float v3 = 1.2e-5; \ 55 for (size_t i = 0; i < kNum; i++) { \ 56 sum1 += F(I, v1); \ 57 sum2 += F(I, v2); \ 58 sum3 += F(I, v3); \ 59 v1 += x; \ 60 v2 += x; \ 61 v3 += x; \ 62 } \ 63 } \ 64 /* floats per second */ \ 65 state.SetItemsProcessed(kNum* state.iterations() * 3); \ 66 benchmark::DoNotOptimize(sum1 + sum2 + sum3); 67 68 HWY_NOINLINE void BM_FastSRGB(benchmark::State& state) { 69 RUN_BENCHMARK(FastLinearToSRGB); 70 } 71 72 HWY_NOINLINE void BM_TFSRGB(benchmark::State& state) { 73 RUN_BENCHMARK(TF_SRGB().EncodedFromDisplay); 74 } 75 76 HWY_NOINLINE void BM_PQDFE(benchmark::State& state) { 77 TF_PQ tf_pq(10000.0); 78 RUN_BENCHMARK(tf_pq.DisplayFromEncoded); 79 } 80 81 HWY_NOINLINE void BM_PQEFD(benchmark::State& state) { 82 TF_PQ tf_pq(10000.0); 83 RUN_BENCHMARK(tf_pq.EncodedFromDisplay); 84 } 85 86 HWY_NOINLINE void BM_PQSlowDFE(benchmark::State& state) { 87 RUN_BENCHMARK_SCALAR(TF_PQ_Base::DisplayFromEncoded, 10000.0); 88 } 89 90 HWY_NOINLINE void BM_PQSlowEFD(benchmark::State& state) { 91 RUN_BENCHMARK_SCALAR(TF_PQ_Base::EncodedFromDisplay, 10000.0); 92 } 93 } // namespace 94 // NOLINTNEXTLINE(google-readability-namespace-comments) 95 } // namespace HWY_NAMESPACE 96 } // namespace jxl 97 HWY_AFTER_NAMESPACE(); 98 99 #if HWY_ONCE 100 namespace jxl { 101 namespace { 102 103 HWY_EXPORT(BM_FastSRGB); 104 HWY_EXPORT(BM_TFSRGB); 105 HWY_EXPORT(BM_PQDFE); 106 HWY_EXPORT(BM_PQEFD); 107 HWY_EXPORT(BM_PQSlowDFE); 108 HWY_EXPORT(BM_PQSlowEFD); 109 110 float SRGB_pow(float _, float x) { 111 return x < 0.0031308f ? 12.92f * x : 1.055f * powf(x, 1.0f / 2.4f) - 0.055f; 112 } 113 114 void BM_FastSRGB(benchmark::State& state) { 115 HWY_DYNAMIC_DISPATCH(BM_FastSRGB)(state); 116 } 117 void BM_TFSRGB(benchmark::State& state) { 118 HWY_DYNAMIC_DISPATCH(BM_TFSRGB)(state); 119 } 120 void BM_PQDFE(benchmark::State& state) { 121 HWY_DYNAMIC_DISPATCH(BM_PQDFE)(state); 122 } 123 void BM_PQEFD(benchmark::State& state) { 124 HWY_DYNAMIC_DISPATCH(BM_PQEFD)(state); 125 } 126 void BM_PQSlowDFE(benchmark::State& state) { 127 HWY_DYNAMIC_DISPATCH(BM_PQSlowDFE)(state); 128 } 129 void BM_PQSlowEFD(benchmark::State& state) { 130 HWY_DYNAMIC_DISPATCH(BM_PQSlowEFD)(state); 131 } 132 133 void BM_SRGB_pow(benchmark::State& state) { RUN_BENCHMARK_SCALAR(SRGB_pow, 0); } 134 135 BENCHMARK(BM_FastSRGB); 136 BENCHMARK(BM_TFSRGB); 137 BENCHMARK(BM_SRGB_pow); 138 BENCHMARK(BM_PQDFE); 139 BENCHMARK(BM_PQEFD); 140 BENCHMARK(BM_PQSlowDFE); 141 BENCHMARK(BM_PQSlowEFD); 142 143 } // namespace 144 } // namespace jxl 145 #endif