convolve_slow.cc (7007B)
1 // Copyright (c) the JPEG XL Project Authors. All rights reserved. 2 // 3 // Use of this source code is governed by a BSD-style 4 // license that can be found in the LICENSE file. 5 6 #include "lib/jxl/convolve.h" 7 8 #include "lib/jxl/convolve-inl.h" 9 10 namespace jxl { 11 12 //------------------------------------------------------------------------------ 13 // Kernels 14 15 // 4 instances of a given literal value, useful as input to LoadDup128. 16 #define JXL_REP4(literal) literal, literal, literal, literal 17 18 // Concentrates energy in low-frequency components (e.g. for antialiasing). 19 const WeightsSymmetric3& WeightsSymmetric3Lowpass() { 20 // Computed by research/convolve_weights.py's cubic spline approximations of 21 // prolate spheroidal wave functions. 22 constexpr float w0 = 0.36208932f; 23 constexpr float w1 = 0.12820096f; 24 constexpr float w2 = 0.03127668f; 25 static constexpr WeightsSymmetric3 weights = { 26 {JXL_REP4(w0)}, {JXL_REP4(w1)}, {JXL_REP4(w2)}}; 27 return weights; 28 } 29 30 const WeightsSeparable5& WeightsSeparable5Lowpass() { 31 constexpr float w0 = 0.41714928f; 32 constexpr float w1 = 0.25539268f; 33 constexpr float w2 = 0.03603267f; 34 static constexpr WeightsSeparable5 weights = { 35 {JXL_REP4(w0), JXL_REP4(w1), JXL_REP4(w2)}, 36 {JXL_REP4(w0), JXL_REP4(w1), JXL_REP4(w2)}}; 37 return weights; 38 } 39 40 const WeightsSymmetric5& WeightsSymmetric5Lowpass() { 41 static constexpr WeightsSymmetric5 weights = { 42 {JXL_REP4(0.1740135f)}, {JXL_REP4(0.1065369f)}, {JXL_REP4(0.0150310f)}, 43 {JXL_REP4(0.0652254f)}, {JXL_REP4(0.0012984f)}, {JXL_REP4(0.0092025f)}}; 44 return weights; 45 } 46 47 const WeightsSeparable5& WeightsSeparable5Gaussian1() { 48 constexpr float w0 = 0.38774f; 49 constexpr float w1 = 0.24477f; 50 constexpr float w2 = 0.06136f; 51 static constexpr WeightsSeparable5 weights = { 52 {JXL_REP4(w0), JXL_REP4(w1), JXL_REP4(w2)}, 53 {JXL_REP4(w0), JXL_REP4(w1), JXL_REP4(w2)}}; 54 return weights; 55 } 56 57 const WeightsSeparable5& WeightsSeparable5Gaussian2() { 58 constexpr float w0 = 0.250301f; 59 constexpr float w1 = 0.221461f; 60 constexpr float w2 = 0.153388f; 61 static constexpr WeightsSeparable5 weights = { 62 {JXL_REP4(w0), JXL_REP4(w1), JXL_REP4(w2)}, 63 {JXL_REP4(w0), JXL_REP4(w1), JXL_REP4(w2)}}; 64 return weights; 65 } 66 67 #undef JXL_REP4 68 69 //------------------------------------------------------------------------------ 70 // Slow 71 72 namespace { 73 74 template <class WrapX, class WrapY> 75 float SlowSymmetric3Pixel(const ImageF& in, const int64_t ix, const int64_t iy, 76 const int64_t xsize, const int64_t ysize, 77 const WeightsSymmetric3& weights) { 78 float sum = 0.0f; 79 80 // ix: image; kx: kernel 81 for (int64_t ky = -1; ky <= 1; ky++) { 82 const int64_t y = WrapY()(iy + ky, ysize); 83 const float* JXL_RESTRICT row_in = in.ConstRow(static_cast<size_t>(y)); 84 85 const float wc = ky == 0 ? weights.c[0] : weights.r[0]; 86 const float wlr = ky == 0 ? weights.r[0] : weights.d[0]; 87 88 const int64_t xm1 = WrapX()(ix - 1, xsize); 89 const int64_t xp1 = WrapX()(ix + 1, xsize); 90 sum += row_in[ix] * wc + (row_in[xm1] + row_in[xp1]) * wlr; 91 } 92 return sum; 93 } 94 95 template <class WrapY> 96 void SlowSymmetric3Row(const ImageF& in, const int64_t iy, const int64_t xsize, 97 const int64_t ysize, const WeightsSymmetric3& weights, 98 float* JXL_RESTRICT row_out) { 99 row_out[0] = 100 SlowSymmetric3Pixel<WrapMirror, WrapY>(in, 0, iy, xsize, ysize, weights); 101 for (int64_t ix = 1; ix < xsize - 1; ix++) { 102 row_out[ix] = SlowSymmetric3Pixel<WrapUnchanged, WrapY>(in, ix, iy, xsize, 103 ysize, weights); 104 } 105 { 106 const int64_t ix = xsize - 1; 107 row_out[ix] = SlowSymmetric3Pixel<WrapMirror, WrapY>(in, ix, iy, xsize, 108 ysize, weights); 109 } 110 } 111 112 } // namespace 113 114 void SlowSymmetric3(const ImageF& in, const Rect& rect, 115 const WeightsSymmetric3& weights, ThreadPool* pool, 116 ImageF* JXL_RESTRICT out) { 117 const int64_t xsize = static_cast<int64_t>(rect.xsize()); 118 const int64_t ysize = static_cast<int64_t>(rect.ysize()); 119 const int64_t kRadius = 1; 120 121 JXL_CHECK(RunOnPool( 122 pool, 0, static_cast<uint32_t>(ysize), ThreadPool::NoInit, 123 [&](const uint32_t task, size_t /*thread*/) { 124 const int64_t iy = task; 125 float* JXL_RESTRICT out_row = out->Row(static_cast<size_t>(iy)); 126 127 if (iy < kRadius || iy >= ysize - kRadius) { 128 SlowSymmetric3Row<WrapMirror>(in, iy, xsize, ysize, weights, out_row); 129 } else { 130 SlowSymmetric3Row<WrapUnchanged>(in, iy, xsize, ysize, weights, 131 out_row); 132 } 133 }, 134 "SlowSymmetric3")); 135 } 136 137 namespace { 138 139 // Separable kernels, any radius. 140 float SlowSeparablePixel(const ImageF& in, const Rect& rect, const int64_t x, 141 const int64_t y, const int64_t radius, 142 const float* JXL_RESTRICT horz_weights, 143 const float* JXL_RESTRICT vert_weights) { 144 const size_t xsize = in.xsize(); 145 const size_t ysize = in.ysize(); 146 const WrapMirror wrap; 147 148 float mul = 0.0f; 149 for (int dy = -radius; dy <= radius; ++dy) { 150 const float wy = vert_weights[std::abs(dy) * 4]; 151 const size_t sy = wrap(rect.y0() + y + dy, ysize); 152 JXL_CHECK(sy < ysize); 153 const float* const JXL_RESTRICT row = in.ConstRow(sy); 154 for (int dx = -radius; dx <= radius; ++dx) { 155 const float wx = horz_weights[std::abs(dx) * 4]; 156 const size_t sx = wrap(rect.x0() + x + dx, xsize); 157 JXL_CHECK(sx < xsize); 158 mul += row[sx] * wx * wy; 159 } 160 } 161 return mul; 162 } 163 164 template <int R, typename Weights> 165 void SlowSeparable(const ImageF& in, const Rect& in_rect, 166 const Weights& weights, ThreadPool* pool, ImageF* out, 167 const Rect& out_rect) { 168 JXL_ASSERT(in_rect.xsize() == out_rect.xsize()); 169 JXL_ASSERT(in_rect.ysize() == out_rect.ysize()); 170 JXL_ASSERT(in_rect.IsInside(Rect(in))); 171 JXL_ASSERT(out_rect.IsInside(Rect(*out))); 172 const float* horz_weights = &weights.horz[0]; 173 const float* vert_weights = &weights.vert[0]; 174 175 const size_t ysize = in_rect.ysize(); 176 JXL_CHECK(RunOnPool( 177 pool, 0, static_cast<uint32_t>(ysize), ThreadPool::NoInit, 178 [&](const uint32_t task, size_t /*thread*/) { 179 const int64_t y = task; 180 181 float* const JXL_RESTRICT row_out = out_rect.Row(out, y); 182 for (size_t x = 0; x < in_rect.xsize(); ++x) { 183 row_out[x] = SlowSeparablePixel(in, in_rect, x, y, /*radius=*/R, 184 horz_weights, vert_weights); 185 } 186 }, 187 "SlowSeparable")); 188 } 189 190 } // namespace 191 192 void SlowSeparable5(const ImageF& in, const Rect& in_rect, 193 const WeightsSeparable5& weights, ThreadPool* pool, 194 ImageF* out, const Rect& out_rect) { 195 SlowSeparable<2>(in, in_rect, weights, pool, out, out_rect); 196 } 197 198 } // namespace jxl