libjxl

FORK: libjxl patches used on blog
git clone https://git.neptards.moe/blog/libjxl.git
Log | Files | Refs | Submodules | README | LICENSE

convolve_slow.cc (7007B)


      1 // Copyright (c) the JPEG XL Project Authors. All rights reserved.
      2 //
      3 // Use of this source code is governed by a BSD-style
      4 // license that can be found in the LICENSE file.
      5 
      6 #include "lib/jxl/convolve.h"
      7 
      8 #include "lib/jxl/convolve-inl.h"
      9 
     10 namespace jxl {
     11 
     12 //------------------------------------------------------------------------------
     13 // Kernels
     14 
     15 // 4 instances of a given literal value, useful as input to LoadDup128.
     16 #define JXL_REP4(literal) literal, literal, literal, literal
     17 
     18 // Concentrates energy in low-frequency components (e.g. for antialiasing).
     19 const WeightsSymmetric3& WeightsSymmetric3Lowpass() {
     20   // Computed by research/convolve_weights.py's cubic spline approximations of
     21   // prolate spheroidal wave functions.
     22   constexpr float w0 = 0.36208932f;
     23   constexpr float w1 = 0.12820096f;
     24   constexpr float w2 = 0.03127668f;
     25   static constexpr WeightsSymmetric3 weights = {
     26       {JXL_REP4(w0)}, {JXL_REP4(w1)}, {JXL_REP4(w2)}};
     27   return weights;
     28 }
     29 
     30 const WeightsSeparable5& WeightsSeparable5Lowpass() {
     31   constexpr float w0 = 0.41714928f;
     32   constexpr float w1 = 0.25539268f;
     33   constexpr float w2 = 0.03603267f;
     34   static constexpr WeightsSeparable5 weights = {
     35       {JXL_REP4(w0), JXL_REP4(w1), JXL_REP4(w2)},
     36       {JXL_REP4(w0), JXL_REP4(w1), JXL_REP4(w2)}};
     37   return weights;
     38 }
     39 
     40 const WeightsSymmetric5& WeightsSymmetric5Lowpass() {
     41   static constexpr WeightsSymmetric5 weights = {
     42       {JXL_REP4(0.1740135f)}, {JXL_REP4(0.1065369f)}, {JXL_REP4(0.0150310f)},
     43       {JXL_REP4(0.0652254f)}, {JXL_REP4(0.0012984f)}, {JXL_REP4(0.0092025f)}};
     44   return weights;
     45 }
     46 
     47 const WeightsSeparable5& WeightsSeparable5Gaussian1() {
     48   constexpr float w0 = 0.38774f;
     49   constexpr float w1 = 0.24477f;
     50   constexpr float w2 = 0.06136f;
     51   static constexpr WeightsSeparable5 weights = {
     52       {JXL_REP4(w0), JXL_REP4(w1), JXL_REP4(w2)},
     53       {JXL_REP4(w0), JXL_REP4(w1), JXL_REP4(w2)}};
     54   return weights;
     55 }
     56 
     57 const WeightsSeparable5& WeightsSeparable5Gaussian2() {
     58   constexpr float w0 = 0.250301f;
     59   constexpr float w1 = 0.221461f;
     60   constexpr float w2 = 0.153388f;
     61   static constexpr WeightsSeparable5 weights = {
     62       {JXL_REP4(w0), JXL_REP4(w1), JXL_REP4(w2)},
     63       {JXL_REP4(w0), JXL_REP4(w1), JXL_REP4(w2)}};
     64   return weights;
     65 }
     66 
     67 #undef JXL_REP4
     68 
     69 //------------------------------------------------------------------------------
     70 // Slow
     71 
     72 namespace {
     73 
     74 template <class WrapX, class WrapY>
     75 float SlowSymmetric3Pixel(const ImageF& in, const int64_t ix, const int64_t iy,
     76                           const int64_t xsize, const int64_t ysize,
     77                           const WeightsSymmetric3& weights) {
     78   float sum = 0.0f;
     79 
     80   // ix: image; kx: kernel
     81   for (int64_t ky = -1; ky <= 1; ky++) {
     82     const int64_t y = WrapY()(iy + ky, ysize);
     83     const float* JXL_RESTRICT row_in = in.ConstRow(static_cast<size_t>(y));
     84 
     85     const float wc = ky == 0 ? weights.c[0] : weights.r[0];
     86     const float wlr = ky == 0 ? weights.r[0] : weights.d[0];
     87 
     88     const int64_t xm1 = WrapX()(ix - 1, xsize);
     89     const int64_t xp1 = WrapX()(ix + 1, xsize);
     90     sum += row_in[ix] * wc + (row_in[xm1] + row_in[xp1]) * wlr;
     91   }
     92   return sum;
     93 }
     94 
     95 template <class WrapY>
     96 void SlowSymmetric3Row(const ImageF& in, const int64_t iy, const int64_t xsize,
     97                        const int64_t ysize, const WeightsSymmetric3& weights,
     98                        float* JXL_RESTRICT row_out) {
     99   row_out[0] =
    100       SlowSymmetric3Pixel<WrapMirror, WrapY>(in, 0, iy, xsize, ysize, weights);
    101   for (int64_t ix = 1; ix < xsize - 1; ix++) {
    102     row_out[ix] = SlowSymmetric3Pixel<WrapUnchanged, WrapY>(in, ix, iy, xsize,
    103                                                             ysize, weights);
    104   }
    105   {
    106     const int64_t ix = xsize - 1;
    107     row_out[ix] = SlowSymmetric3Pixel<WrapMirror, WrapY>(in, ix, iy, xsize,
    108                                                          ysize, weights);
    109   }
    110 }
    111 
    112 }  // namespace
    113 
    114 void SlowSymmetric3(const ImageF& in, const Rect& rect,
    115                     const WeightsSymmetric3& weights, ThreadPool* pool,
    116                     ImageF* JXL_RESTRICT out) {
    117   const int64_t xsize = static_cast<int64_t>(rect.xsize());
    118   const int64_t ysize = static_cast<int64_t>(rect.ysize());
    119   const int64_t kRadius = 1;
    120 
    121   JXL_CHECK(RunOnPool(
    122       pool, 0, static_cast<uint32_t>(ysize), ThreadPool::NoInit,
    123       [&](const uint32_t task, size_t /*thread*/) {
    124         const int64_t iy = task;
    125         float* JXL_RESTRICT out_row = out->Row(static_cast<size_t>(iy));
    126 
    127         if (iy < kRadius || iy >= ysize - kRadius) {
    128           SlowSymmetric3Row<WrapMirror>(in, iy, xsize, ysize, weights, out_row);
    129         } else {
    130           SlowSymmetric3Row<WrapUnchanged>(in, iy, xsize, ysize, weights,
    131                                            out_row);
    132         }
    133       },
    134       "SlowSymmetric3"));
    135 }
    136 
    137 namespace {
    138 
    139 // Separable kernels, any radius.
    140 float SlowSeparablePixel(const ImageF& in, const Rect& rect, const int64_t x,
    141                          const int64_t y, const int64_t radius,
    142                          const float* JXL_RESTRICT horz_weights,
    143                          const float* JXL_RESTRICT vert_weights) {
    144   const size_t xsize = in.xsize();
    145   const size_t ysize = in.ysize();
    146   const WrapMirror wrap;
    147 
    148   float mul = 0.0f;
    149   for (int dy = -radius; dy <= radius; ++dy) {
    150     const float wy = vert_weights[std::abs(dy) * 4];
    151     const size_t sy = wrap(rect.y0() + y + dy, ysize);
    152     JXL_CHECK(sy < ysize);
    153     const float* const JXL_RESTRICT row = in.ConstRow(sy);
    154     for (int dx = -radius; dx <= radius; ++dx) {
    155       const float wx = horz_weights[std::abs(dx) * 4];
    156       const size_t sx = wrap(rect.x0() + x + dx, xsize);
    157       JXL_CHECK(sx < xsize);
    158       mul += row[sx] * wx * wy;
    159     }
    160   }
    161   return mul;
    162 }
    163 
    164 template <int R, typename Weights>
    165 void SlowSeparable(const ImageF& in, const Rect& in_rect,
    166                    const Weights& weights, ThreadPool* pool, ImageF* out,
    167                    const Rect& out_rect) {
    168   JXL_ASSERT(in_rect.xsize() == out_rect.xsize());
    169   JXL_ASSERT(in_rect.ysize() == out_rect.ysize());
    170   JXL_ASSERT(in_rect.IsInside(Rect(in)));
    171   JXL_ASSERT(out_rect.IsInside(Rect(*out)));
    172   const float* horz_weights = &weights.horz[0];
    173   const float* vert_weights = &weights.vert[0];
    174 
    175   const size_t ysize = in_rect.ysize();
    176   JXL_CHECK(RunOnPool(
    177       pool, 0, static_cast<uint32_t>(ysize), ThreadPool::NoInit,
    178       [&](const uint32_t task, size_t /*thread*/) {
    179         const int64_t y = task;
    180 
    181         float* const JXL_RESTRICT row_out = out_rect.Row(out, y);
    182         for (size_t x = 0; x < in_rect.xsize(); ++x) {
    183           row_out[x] = SlowSeparablePixel(in, in_rect, x, y, /*radius=*/R,
    184                                           horz_weights, vert_weights);
    185         }
    186       },
    187       "SlowSeparable"));
    188 }
    189 
    190 }  // namespace
    191 
    192 void SlowSeparable5(const ImageF& in, const Rect& in_rect,
    193                     const WeightsSeparable5& weights, ThreadPool* pool,
    194                     ImageF* out, const Rect& out_rect) {
    195   SlowSeparable<2>(in, in_rect, weights, pool, out, out_rect);
    196 }
    197 
    198 }  // namespace jxl