libjxl

FORK: libjxl patches used on blog
git clone https://git.neptards.moe/blog/libjxl.git
Log | Files | Refs | Submodules | README | LICENSE

gauss_blur_test.cc (27447B)


      1 // Copyright (c) the JPEG XL Project Authors. All rights reserved.
      2 //
      3 // Use of this source code is governed by a BSD-style
      4 // license that can be found in the LICENSE file.
      5 
      6 #include "tools/gauss_blur.h"
      7 
      8 #include <cmath>
      9 #include <hwy/targets.h>
     10 #include <vector>
     11 
     12 #include "lib/jxl/base/printf_macros.h"
     13 #include "lib/jxl/image_ops.h"
     14 #include "lib/jxl/image_test_utils.h"
     15 #include "lib/jxl/testing.h"
     16 
     17 namespace jxl {
     18 
     19 void ExtrapolateBorders(const float* const JXL_RESTRICT row_in,
     20                         float* const JXL_RESTRICT row_out, const int xsize,
     21                         const int radius) {
     22   const int lastcol = xsize - 1;
     23   for (int x = 1; x <= radius; ++x) {
     24     row_out[-x] = row_in[std::min(x, xsize - 1)];
     25   }
     26   memcpy(row_out, row_in, xsize * sizeof(row_out[0]));
     27   for (int x = 1; x <= radius; ++x) {
     28     row_out[lastcol + x] = row_in[std::max(0, lastcol - x)];
     29   }
     30 }
     31 
     32 ImageF ConvolveXAndTranspose(const ImageF& in,
     33                              const std::vector<float>& kernel) {
     34   JXL_ASSERT(kernel.size() % 2 == 1);
     35   JXL_ASSIGN_OR_DIE(ImageF out, ImageF::Create(in.ysize(), in.xsize()));
     36   const int r = kernel.size() / 2;
     37   std::vector<float> row_tmp(in.xsize() + 2 * r);
     38   float* const JXL_RESTRICT rowp = &row_tmp[r];
     39   std::vector<float> padded_k = kernel;
     40   padded_k.resize(padded_k.size());
     41   const float* const kernelp = &padded_k[r];
     42   for (size_t y = 0; y < in.ysize(); ++y) {
     43     ExtrapolateBorders(in.Row(y), rowp, in.xsize(), r);
     44     size_t x = 0;
     45     for (; x < static_cast<uint32_t>(r) && x < in.xsize(); ++x) {
     46       float sum = 0.0f;
     47       for (int i = -r; i <= r; ++i) {
     48         sum += rowp[std::max<int>(
     49                    0, std::min<int>(static_cast<int>(x) + i, in.xsize()))] *
     50                kernelp[i];
     51       }
     52       out.Row(x)[y] = sum;
     53     }
     54     for (; x + r < in.xsize(); ++x) {
     55       float sum = 0.0f;
     56       for (int i = -r; i <= r; ++i) {
     57         sum += rowp[x + i] * kernelp[i];
     58       }
     59       out.Row(x)[y] = sum;
     60     }
     61     for (; x < in.xsize(); ++x) {
     62       float sum = 0.0f;
     63       for (int i = -r; i <= r; ++i) {
     64         sum += rowp[std::max<int>(
     65                    0, std::min<int>(static_cast<int>(x) + i, in.xsize()))] *
     66                kernelp[i];
     67       }
     68       out.Row(x)[y] = sum;
     69     }
     70   }
     71   return out;
     72 }
     73 
     74 // All convolution functions below apply mirroring of the input on the borders
     75 // in the following way:
     76 //
     77 //     input: [a0 a1 a2 ...  aN]
     78 //     mirrored input: [aR ... a1 | a0 a1 a2 .... aN | aN-1 ... aN-R]
     79 //
     80 // where R is the radius of the kernel (i.e. kernel size is 2*R+1).
     81 ImageF Convolve(const ImageF& in, const std::vector<float>& kernel) {
     82   ImageF tmp = ConvolveXAndTranspose(in, kernel);
     83   return ConvolveXAndTranspose(tmp, kernel);
     84 }
     85 
     86 bool NearEdge(const int64_t width, const int64_t peak) {
     87   // When around 3*sigma from the edge, there is negligible truncation.
     88   return peak < 10 || peak > width - 10;
     89 }
     90 
     91 // Follow the curve downwards by scanning right from `peak` and verifying
     92 // identical values at the same offset to the left.
     93 void VerifySymmetric(const int64_t width, const int64_t peak,
     94                      const float* out) {
     95   const double tolerance = NearEdge(width, peak) ? 0.015 : 6E-7;
     96   for (int64_t i = 1;; ++i) {
     97     // Stop if we passed either end of the array
     98     if (peak - i < 0 || peak + i >= width) break;
     99     EXPECT_GT(out[peak + i - 1] + tolerance, out[peak + i]);  // descending
    100     EXPECT_NEAR(out[peak - i], out[peak + i], tolerance);     // symmetric
    101   }
    102 }
    103 
    104 void TestImpulseResponse(size_t width, size_t peak) {
    105   const auto rg3 = CreateRecursiveGaussian(3.0);
    106   const auto rg4 = CreateRecursiveGaussian(4.0);
    107   const auto rg5 = CreateRecursiveGaussian(5.0);
    108 
    109   // Extra padding for 4x unrolling
    110   auto in = hwy::AllocateAligned<float>(width + 3);
    111   memset(in.get(), 0, sizeof(float) * (width + 3));
    112   in[peak] = 1.0f;
    113 
    114   auto out3 = hwy::AllocateAligned<float>(width + 3);
    115   auto out4 = hwy::AllocateAligned<float>(width + 3);
    116   auto out5 = hwy::AllocateAligned<float>(width + 3);
    117   FastGaussian1D(rg3, width, in.get(), out3.get());
    118   FastGaussian1D(rg4, width, out3.get(), out4.get());
    119   FastGaussian1D(rg5, width, in.get(), out5.get());
    120 
    121   VerifySymmetric(width, peak, out3.get());
    122   VerifySymmetric(width, peak, out4.get());
    123   VerifySymmetric(width, peak, out5.get());
    124 
    125   // Wider kernel has flatter peak
    126   EXPECT_LT(out5[peak] + 0.05, out3[peak]);
    127 
    128   // Gauss3 o Gauss4 ~= Gauss5
    129   const double tolerance = NearEdge(width, peak) ? 0.04 : 0.01;
    130   for (size_t i = 0; i < width; ++i) {
    131     EXPECT_NEAR(out4[i], out5[i], tolerance);
    132   }
    133 }
    134 
    135 void TestImpulseResponseForWidth(size_t width) {
    136   for (size_t i = 0; i < width; ++i) {
    137     TestImpulseResponse(width, i);
    138   }
    139 }
    140 
    141 TEST(GaussBlurTest, ImpulseResponse) {
    142   TestImpulseResponseForWidth(10);  // tiny even
    143   TestImpulseResponseForWidth(15);  // small odd
    144   TestImpulseResponseForWidth(32);  // power of two
    145   TestImpulseResponseForWidth(31);  // power of two - 1
    146   TestImpulseResponseForWidth(33);  // power of two + 1
    147 }
    148 
    149 // Higher-precision version for accuracy test.
    150 ImageF ConvolveAndTransposeF64(const ImageF& in,
    151                                const std::vector<double>& kernel) {
    152   JXL_ASSERT(kernel.size() % 2 == 1);
    153   JXL_ASSIGN_OR_DIE(ImageF out, ImageF::Create(in.ysize(), in.xsize()));
    154   const int r = kernel.size() / 2;
    155   std::vector<float> row_tmp(in.xsize() + 2 * r);
    156   float* const JXL_RESTRICT rowp = &row_tmp[r];
    157   const double* const kernelp = &kernel[r];
    158   for (size_t y = 0; y < in.ysize(); ++y) {
    159     ExtrapolateBorders(in.Row(y), rowp, in.xsize(), r);
    160     for (size_t x = 0, ox = 0; x < in.xsize(); ++x, ++ox) {
    161       double sum = 0.0;
    162       for (int i = -r; i <= r; ++i) {
    163         sum += rowp[std::max<int>(
    164                    0, std::min<int>(static_cast<int>(x) + i, in.xsize()))] *
    165                kernelp[i];
    166       }
    167       out.Row(ox)[y] = static_cast<float>(sum);
    168     }
    169   }
    170   return out;
    171 }
    172 
    173 ImageF ConvolveF64(const ImageF& in, const std::vector<double>& kernel) {
    174   ImageF tmp = ConvolveAndTransposeF64(in, kernel);
    175   return ConvolveAndTransposeF64(tmp, kernel);
    176 }
    177 
    178 template <typename T>
    179 std::vector<T> GaussianKernel(int radius, T sigma) {
    180   JXL_ASSERT(sigma > 0.0);
    181   std::vector<T> kernel(2 * radius + 1);
    182   const T scaler = -1.0 / (2 * sigma * sigma);
    183   double sum = 0.0;
    184   for (int i = -radius; i <= radius; ++i) {
    185     const T val = std::exp(scaler * i * i);
    186     kernel[i + radius] = val;
    187     sum += val;
    188   }
    189   for (size_t i = 0; i < kernel.size(); ++i) {
    190     kernel[i] /= sum;
    191   }
    192   return kernel;
    193 }
    194 
    195 void TestDirac2D(size_t xsize, size_t ysize, double sigma) {
    196   JXL_ASSIGN_OR_DIE(ImageF in, ImageF::Create(xsize, ysize));
    197   ZeroFillImage(&in);
    198   // We anyway ignore the border below, so might as well choose the middle.
    199   in.Row(ysize / 2)[xsize / 2] = 1.0f;
    200 
    201   JXL_ASSIGN_OR_DIE(ImageF temp, ImageF::Create(xsize, ysize));
    202   JXL_ASSIGN_OR_DIE(ImageF out, ImageF::Create(xsize, ysize));
    203   const auto rg = CreateRecursiveGaussian(sigma);
    204   FastGaussian(
    205       rg, xsize, ysize, [&](size_t y) { return in.ConstRow(y); },
    206       [&](size_t y) { return temp.Row(y); },
    207       [&](size_t y) { return out.Row(y); });
    208 
    209   const std::vector<float> kernel =
    210       GaussianKernel(static_cast<int>(4 * sigma), static_cast<float>(sigma));
    211   const ImageF expected = Convolve(in, kernel);
    212 
    213   const double max_l1 = sigma < 1.5 ? 5E-3 : 6E-4;
    214   const size_t border = 2 * sigma;
    215 
    216   JXL_ASSERT_OK(VerifyRelativeError(expected, out, max_l1, 1E-8, _, border));
    217 }
    218 
    219 TEST(GaussBlurTest, Test2D) {
    220   const std::vector<int> dimensions{6, 15, 17, 64, 50, 49};
    221   for (int xsize : dimensions) {
    222     for (int ysize : dimensions) {
    223       for (double sigma : {1.0, 2.5, 3.6, 7.0}) {
    224         TestDirac2D(static_cast<size_t>(xsize), static_cast<size_t>(ysize),
    225                     sigma);
    226       }
    227     }
    228   }
    229 }
    230 
    231 // Slow (44 sec). To run, remove the disabled prefix.
    232 TEST(GaussBlurTest, DISABLED_SlowTestDirac1D) {
    233   const double sigma = 7.0;
    234   const auto rg = CreateRecursiveGaussian(sigma);
    235 
    236   // IPOL accuracy test uses 10^-15 tolerance, this is 2*10^-11.
    237   const size_t radius = static_cast<size_t>(7 * sigma);
    238   const std::vector<double> kernel = GaussianKernel(radius, sigma);
    239 
    240   const size_t length = 16384;
    241   JXL_ASSIGN_OR_DIE(ImageF inputs, ImageF::Create(length, 1));
    242   ZeroFillImage(&inputs);
    243 
    244   auto outputs = hwy::AllocateAligned<float>(length);
    245 
    246   // One per center position
    247   auto sum_abs_err = hwy::AllocateAligned<double>(length);
    248   std::fill(sum_abs_err.get(), sum_abs_err.get() + length, 0.0);
    249 
    250   for (size_t center = radius; center < length - radius; ++center) {
    251     inputs.Row(0)[center - 1] = 0.0f;  // reset last peak, entire array now 0
    252     inputs.Row(0)[center] = 1.0f;
    253     FastGaussian1D(rg, length, inputs.Row(0), outputs.get());
    254 
    255     const ImageF outputs_fir = ConvolveF64(inputs, kernel);
    256 
    257     for (size_t i = 0; i < length; ++i) {
    258       const float abs_err = std::abs(outputs[i] - outputs_fir.Row(0)[i]);
    259       sum_abs_err[i] += static_cast<double>(abs_err);
    260     }
    261   }
    262 
    263   const double max_abs_err =
    264       *std::max_element(sum_abs_err.get(), sum_abs_err.get() + length);
    265   printf("Max abs err: %.8e\n", max_abs_err);
    266 }
    267 
    268 // Sets "thickness" pixels on each border to "value". This is faster than
    269 // initializing the entire image and overwriting valid/interior pixels.
    270 template <typename T>
    271 void SetBorder(const size_t thickness, const T value, Plane<T>* image) {
    272   const size_t xsize = image->xsize();
    273   const size_t ysize = image->ysize();
    274   // Top: fill entire row
    275   for (size_t y = 0; y < std::min(thickness, ysize); ++y) {
    276     T* const JXL_RESTRICT row = image->Row(y);
    277     std::fill(row, row + xsize, value);
    278   }
    279 
    280   // Bottom: fill entire row
    281   for (size_t y = ysize - thickness; y < ysize; ++y) {
    282     T* const JXL_RESTRICT row = image->Row(y);
    283     std::fill(row, row + xsize, value);
    284   }
    285 
    286   // Left/right: fill the 'columns' on either side, but only if the image is
    287   // big enough that they don't already belong to the top/bottom rows.
    288   if (ysize >= 2 * thickness) {
    289     for (size_t y = thickness; y < ysize - thickness; ++y) {
    290       T* const JXL_RESTRICT row = image->Row(y);
    291       std::fill(row, row + thickness, value);
    292       std::fill(row + xsize - thickness, row + xsize, value);
    293     }
    294   }
    295 }
    296 
    297 void TestRandom(size_t xsize, size_t ysize, float min, float max, double sigma,
    298                 double max_l1, double max_rel) {
    299   printf("%4" PRIuS " x %4" PRIuS " %4.1f %4.1f sigma %.1f\n", xsize, ysize,
    300          min, max, sigma);
    301   JXL_ASSIGN_OR_DIE(ImageF in, ImageF::Create(xsize, ysize));
    302   RandomFillImage(&in, min, max, 65537 + xsize * 129 + ysize);
    303   // FastGaussian/Convolve handle borders differently, so keep those pixels 0.
    304   const size_t border = 4 * sigma;
    305   SetBorder(border, 0.0f, &in);
    306 
    307   JXL_ASSIGN_OR_DIE(ImageF temp, ImageF::Create(xsize, ysize));
    308   JXL_ASSIGN_OR_DIE(ImageF out, ImageF::Create(xsize, ysize));
    309   const auto rg = CreateRecursiveGaussian(sigma);
    310   FastGaussian(
    311       rg, in.xsize(), in.ysize(), [&](size_t y) { return in.ConstRow(y); },
    312       [&](size_t y) { return temp.Row(y); },
    313       [&](size_t y) { return out.Row(y); });
    314 
    315   const std::vector<float> kernel =
    316       GaussianKernel(static_cast<int>(4 * sigma), static_cast<float>(sigma));
    317   const ImageF expected = Convolve(in, kernel);
    318 
    319   JXL_ASSERT_OK(VerifyRelativeError(expected, out, max_l1, max_rel, _, border));
    320 }
    321 
    322 void TestRandomForSizes(float min, float max, double sigma) {
    323   double max_l1 = 6E-3;
    324   double max_rel = 3E-3;
    325   TestRandom(128, 1, min, max, sigma, max_l1, max_rel);
    326   TestRandom(1, 128, min, max, sigma, max_l1, max_rel);
    327   TestRandom(30, 201, min, max, sigma, max_l1 * 1.6, max_rel * 1.2);
    328   TestRandom(201, 30, min, max, sigma, max_l1 * 1.6, max_rel * 1.2);
    329   TestRandom(201, 201, min, max, sigma, max_l1 * 2.0, max_rel * 1.2);
    330 }
    331 
    332 TEST(GaussBlurTest, TestRandom) {
    333   // small non-negative
    334   TestRandomForSizes(0.0f, 10.0f, 3.0f);
    335   TestRandomForSizes(0.0f, 10.0f, 7.0f);
    336 
    337   // small negative
    338   TestRandomForSizes(-4.0f, -1.0f, 3.0f);
    339   TestRandomForSizes(-4.0f, -1.0f, 7.0f);
    340 
    341   // mixed positive/negative
    342   TestRandomForSizes(-6.0f, 6.0f, 3.0f);
    343   TestRandomForSizes(-6.0f, 6.0f, 7.0f);
    344 }
    345 
    346 TEST(GaussBlurTest, TestSign) {
    347   const size_t xsize = 500;
    348   const size_t ysize = 606;
    349   JXL_ASSIGN_OR_DIE(ImageF in, ImageF::Create(xsize, ysize));
    350 
    351   ZeroFillImage(&in);
    352   const float center[33 * 33] = {
    353       -0.128445f, -0.098473f, -0.121883f, -0.093601f, 0.095665f,  -0.271332f,
    354       -0.705475f, -1.324005f, -2.020741f, -1.329464f, 1.834064f,  4.787300f,
    355       5.834560f,  5.272720f,  3.967960f,  3.547935f,  3.432732f,  3.383015f,
    356       3.239326f,  3.290806f,  3.298954f,  3.397808f,  3.359730f,  3.533844f,
    357       3.511856f,  3.436787f,  3.428310f,  3.460209f,  3.550011f,  3.590942f,
    358       3.593109f,  3.560005f,  3.443165f,  0.089741f,  0.179230f,  -0.032997f,
    359       -0.182610f, 0.005669f,  -0.244759f, -0.395123f, -0.514961f, -1.003529f,
    360       -1.798656f, -2.377975f, 0.222191f,  3.957664f,  5.946804f,  5.543129f,
    361       4.290096f,  3.621010f,  3.407257f,  3.392494f,  3.345367f,  3.391903f,
    362       3.441605f,  3.429260f,  3.444969f,  3.507130f,  3.518612f,  3.443111f,
    363       3.475948f,  3.536148f,  3.470333f,  3.628311f,  3.600243f,  3.292892f,
    364       -0.226730f, -0.573616f, -0.762165f, -0.398739f, -0.189842f, -0.275921f,
    365       -0.446739f, -0.550037f, -0.461033f, -0.724792f, -1.448349f, -1.814064f,
    366       -0.491032f, 2.817703f,  5.213242f,  5.675629f,  4.864548f,  3.876324f,
    367       3.535587f,  3.530312f,  3.413765f,  3.386261f,  3.404854f,  3.383472f,
    368       3.420830f,  3.326496f,  3.257877f,  3.362152f,  3.489609f,  3.619587f,
    369       3.555805f,  3.423164f,  3.309708f,  -0.483940f, -0.502926f, -0.592983f,
    370       -0.492527f, -0.413616f, -0.482555f, -0.475506f, -0.447990f, -0.338120f,
    371       -0.189072f, -0.376427f, -0.910828f, -1.878044f, -1.937927f, 1.423218f,
    372       4.871609f,  5.767548f,  5.103741f,  3.983868f,  3.633003f,  3.458263f,
    373       3.507309f,  3.247021f,  3.220612f,  3.326061f,  3.352814f,  3.291061f,
    374       3.322739f,  3.444302f,  3.506207f,  3.556839f,  3.529575f,  3.457024f,
    375       -0.408161f, -0.431343f, -0.454369f, -0.356419f, -0.380924f, -0.399452f,
    376       -0.439476f, -0.412189f, -0.306816f, -0.008213f, -0.325813f, -0.537842f,
    377       -0.984100f, -1.805332f, -2.028198f, 0.773205f,  4.423046f,  5.604839f,
    378       5.231617f,  4.080299f,  3.603008f,  3.498741f,  3.517010f,  3.333897f,
    379       3.381336f,  3.342617f,  3.369686f,  3.434155f,  3.490452f,  3.607029f,
    380       3.555298f,  3.702297f,  3.618679f,  -0.503609f, -0.578564f, -0.419014f,
    381       -0.239883f, 0.269836f,  0.022984f,  -0.455067f, -0.621777f, -0.304176f,
    382       -0.163792f, -0.490250f, -0.466637f, -0.391792f, -0.657940f, -1.498035f,
    383       -1.895836f, 0.036537f,  3.462456f,  5.586445f,  5.658791f,  4.434784f,
    384       3.423435f,  3.318848f,  3.202328f,  3.532764f,  3.436687f,  3.354881f,
    385       3.356941f,  3.382645f,  3.503902f,  3.512867f,  3.632366f,  3.537312f,
    386       -0.274734f, -0.658829f, -0.726532f, -0.281254f, 0.053196f,  -0.064991f,
    387       -0.608517f, -0.720966f, -0.070602f, -0.111320f, -0.440956f, -0.492180f,
    388       -0.488762f, -0.569283f, -1.012741f, -1.582779f, -2.101479f, -1.392380f,
    389       2.451153f,  5.555855f,  6.096313f,  5.230045f,  4.068172f,  3.404274f,
    390       3.392586f,  3.326065f,  3.156670f,  3.284828f,  3.347012f,  3.319252f,
    391       3.352310f,  3.610790f,  3.499847f,  -0.150600f, -0.314445f, -0.093575f,
    392       -0.057384f, 0.053688f,  -0.189255f, -0.263515f, -0.318653f, 0.053246f,
    393       0.080627f,  -0.119553f, -0.152454f, -0.305420f, -0.404869f, -0.385944f,
    394       -0.689949f, -1.204914f, -1.985748f, -1.711361f, 1.260658f,  4.626896f,
    395       5.888351f,  5.450989f,  4.070587f,  3.539200f,  3.383492f,  3.296318f,
    396       3.267334f,  3.436028f,  3.463005f,  3.502625f,  3.522282f,  3.403763f,
    397       -0.348049f, -0.302303f, -0.137016f, -0.041737f, -0.164001f, -0.358849f,
    398       -0.469627f, -0.428291f, -0.375797f, -0.246346f, -0.118950f, -0.084229f,
    399       -0.205681f, -0.241199f, -0.391796f, -0.323151f, -0.241211f, -0.834137f,
    400       -1.684219f, -1.972137f, 0.448399f,  4.019985f,  5.648144f,  5.647846f,
    401       4.295094f,  3.641884f,  3.374790f,  3.197342f,  3.425545f,  3.507481f,
    402       3.478065f,  3.430889f,  3.341900f,  -1.016304f, -0.959221f, -0.909466f,
    403       -0.810715f, -0.590729f, -0.594467f, -0.646721f, -0.629364f, -0.528561f,
    404       -0.551819f, -0.301086f, -0.149101f, -0.060146f, -0.162220f, -0.326210f,
    405       -0.156548f, -0.036293f, -0.426098f, -1.145470f, -1.628998f, -2.003052f,
    406       -1.142891f, 2.885162f,  5.652863f,  5.718426f,  4.911140f,  3.234222f,
    407       3.473373f,  3.577183f,  3.271603f,  3.410435f,  3.505489f,  3.434032f,
    408       -0.508911f, -0.438797f, -0.437450f, -0.627426f, -0.511745f, -0.304874f,
    409       -0.274246f, -0.261841f, -0.228466f, -0.342491f, -0.528206f, -0.490082f,
    410       -0.516350f, -0.361694f, -0.398514f, -0.276020f, -0.210369f, -0.355938f,
    411       -0.402622f, -0.538864f, -1.249573f, -2.100105f, -0.996178f, 1.886410f,
    412       4.929745f,  5.630871f,  5.444199f,  4.042740f,  3.739189f,  3.691399f,
    413       3.391956f,  3.469696f,  3.431232f,  0.204849f,  0.205433f,  -0.131927f,
    414       -0.367908f, -0.374378f, -0.126820f, -0.186951f, -0.228565f, -0.081776f,
    415       -0.143143f, -0.379230f, -0.598701f, -0.458019f, -0.295586f, -0.407730f,
    416       -0.245853f, -0.043140f, 0.024242f,  -0.038998f, -0.044151f, -0.425991f,
    417       -1.240753f, -1.943146f, -2.174755f, 0.523415f,  4.376751f,  5.956558f,
    418       5.850082f,  4.403152f,  3.517399f,  3.560753f,  3.554836f,  3.471985f,
    419       -0.508503f, -0.109783f, 0.057747f,  0.190079f,  -0.257153f, -0.591980f,
    420       -0.666771f, -0.525391f, -0.293060f, -0.489731f, -0.304855f, -0.259644f,
    421       -0.367825f, -0.346977f, -0.292889f, -0.215652f, -0.120705f, -0.176010f,
    422       -0.422905f, -0.114647f, -0.289749f, -0.374203f, -0.606754f, -1.127949f,
    423       -1.994583f, -0.588058f, 3.415840f,  5.603470f,  5.811581f,  4.959423f,
    424       3.721760f,  3.710499f,  3.785461f,  -0.554588f, -0.565517f, -0.434578f,
    425       -0.012482f, -0.284660f, -0.699795f, -0.957535f, -0.755135f, -0.382034f,
    426       -0.321552f, -0.287571f, -0.279537f, -0.314972f, -0.256287f, -0.372818f,
    427       -0.316017f, -0.287975f, -0.365639f, -0.512589f, -0.420692f, -0.436485f,
    428       -0.295353f, -0.451958f, -0.755459f, -1.272358f, -2.301353f, -1.776161f,
    429       1.572483f,  4.826286f,  5.741898f,  5.162853f,  4.028049f,  3.686325f,
    430       -0.495590f, -0.664413f, -0.760044f, -0.152634f, -0.286480f, -0.340462f,
    431       0.076477f,  0.187706f,  -0.068787f, -0.293491f, -0.361145f, -0.292515f,
    432       -0.140671f, -0.190723f, -0.333302f, -0.368168f, -0.192581f, -0.154499f,
    433       -0.236544f, -0.124405f, -0.208321f, -0.465607f, -0.883080f, -1.104813f,
    434       -1.210567f, -1.415665f, -1.924683f, -1.634758f, 0.601017f,  4.276672f,
    435       5.501350f,  5.331257f,  3.809288f,  -0.727722f, -0.533619f, -0.511524f,
    436       -0.470688f, -0.610710f, -0.575130f, -0.311115f, -0.090420f, -0.297676f,
    437       -0.646118f, -0.742805f, -0.485050f, -0.330910f, -0.275417f, -0.357037f,
    438       -0.425598f, -0.481876f, -0.488941f, -0.393551f, -0.051105f, -0.090755f,
    439       -0.328674f, -0.536369f, -0.533684f, -0.336960f, -0.689194f, -1.187195f,
    440       -1.860954f, -2.290253f, -0.424774f, 3.050060f,  5.083332f,  5.291920f,
    441       -0.343605f, -0.190975f, -0.303692f, -0.456512f, -0.681820f, -0.690693f,
    442       -0.416729f, -0.286446f, -0.442055f, -0.709148f, -0.569160f, -0.382423f,
    443       -0.402321f, -0.383362f, -0.366413f, -0.290718f, -0.110069f, -0.220280f,
    444       -0.279018f, -0.255424f, -0.262081f, -0.487556f, -0.444492f, -0.250500f,
    445       -0.119583f, -0.291557f, -0.537781f, -1.104073f, -1.737091f, -1.697441f,
    446       -0.323456f, 2.042049f,  4.605103f,  -0.310631f, -0.279568f, -0.012695f,
    447       -0.160130f, -0.358746f, -0.421101f, -0.559677f, -0.474136f, -0.416565f,
    448       -0.561817f, -0.534672f, -0.519157f, -0.767197f, -0.605831f, -0.186523f,
    449       0.219872f,  0.264984f,  -0.193432f, -0.363182f, -0.467472f, -0.462009f,
    450       -0.571053f, -0.522476f, -0.315903f, -0.237427f, -0.147320f, -0.100201f,
    451       -0.237568f, -0.763435f, -1.242043f, -2.135159f, -1.409485f, 1.236370f,
    452       -0.474247f, -0.517906f, -0.410217f, -0.542244f, -0.795986f, -0.590004f,
    453       -0.388863f, -0.462921f, -0.810627f, -0.778637f, -0.512486f, -0.718025f,
    454       -0.710854f, -0.482513f, -0.318233f, -0.194962f, -0.220116f, -0.421673f,
    455       -0.534233f, -0.403339f, -0.389332f, -0.407303f, -0.437355f, -0.469730f,
    456       -0.359600f, -0.352745f, -0.466755f, -0.414585f, -0.430756f, -0.656822f,
    457       -1.237038f, -2.046097f, -1.574898f, -0.593815f, -0.582165f, -0.336098f,
    458       -0.372612f, -0.554386f, -0.410603f, -0.428276f, -0.647644f, -0.640720f,
    459       -0.582207f, -0.414112f, -0.435547f, -0.435505f, -0.332561f, -0.248116f,
    460       -0.340221f, -0.277855f, -0.352699f, -0.377319f, -0.230850f, -0.313267f,
    461       -0.446270f, -0.346237f, -0.420422f, -0.530781f, -0.400341f, -0.463661f,
    462       -0.209091f, -0.056705f, -0.011772f, -0.169388f, -0.736275f, -1.463017f,
    463       -0.752701f, -0.668865f, -0.329765f, -0.299347f, -0.245667f, -0.286999f,
    464       -0.520420f, -0.675438f, -0.255753f, 0.141357f,  -0.079639f, -0.419476f,
    465       -0.374069f, -0.046253f, 0.116116f,  -0.145847f, -0.380371f, -0.563412f,
    466       -0.638634f, -0.310116f, -0.260914f, -0.508404f, -0.465508f, -0.527824f,
    467       -0.370979f, -0.305595f, -0.244694f, -0.254490f, 0.009968f,  -0.050201f,
    468       -0.331219f, -0.614960f, -0.788208f, -0.483242f, -0.367516f, -0.186951f,
    469       -0.180031f, 0.129711f,  -0.127811f, -0.384750f, -0.499542f, -0.418613f,
    470       -0.121635f, 0.203197f,  -0.167290f, -0.397270f, -0.355461f, -0.218746f,
    471       -0.376785f, -0.521698f, -0.721581f, -0.845741f, -0.535439f, -0.220882f,
    472       -0.309067f, -0.555248f, -0.690342f, -0.664948f, -0.390102f, 0.020355f,
    473       -0.130447f, -0.173252f, -0.170059f, -0.633663f, -0.956001f, -0.621696f,
    474       -0.388302f, -0.342262f, -0.244370f, -0.386948f, -0.401421f, -0.172979f,
    475       -0.206163f, -0.450058f, -0.525789f, -0.549274f, -0.349251f, -0.474613f,
    476       -0.667976f, -0.435600f, -0.175369f, -0.196877f, -0.202976f, -0.242481f,
    477       -0.258369f, -0.189133f, -0.395397f, -0.765499f, -0.944016f, -0.850967f,
    478       -0.631561f, -0.152493f, -0.046432f, -0.262066f, -0.195919f, 0.048218f,
    479       0.084972f,  0.039902f,  0.000618f,  -0.404430f, -0.447456f, -0.418076f,
    480       -0.631935f, -0.717415f, -0.502888f, -0.530514f, -0.747826f, -0.704041f,
    481       -0.674969f, -0.516853f, -0.418446f, -0.327740f, -0.308815f, -0.481636f,
    482       -0.440083f, -0.481720f, -0.341053f, -0.283897f, -0.324368f, -0.352829f,
    483       -0.434349f, -0.545589f, -0.533104f, -0.472755f, -0.570496f, -0.557735f,
    484       -0.708176f, -0.493332f, -0.194416f, -0.186249f, -0.256710f, -0.271835f,
    485       -0.304752f, -0.431267f, -0.422398f, -0.646725f, -0.680801f, -0.249031f,
    486       -0.058567f, -0.213890f, -0.383949f, -0.540291f, -0.549877f, -0.225567f,
    487       -0.037174f, -0.499874f, -0.641010f, -0.628044f, -0.390549f, -0.311497f,
    488       -0.542313f, -0.569565f, -0.473408f, -0.331245f, -0.357197f, -0.285599f,
    489       -0.200157f, -0.201866f, -0.124428f, -0.346016f, -0.392311f, -0.264496f,
    490       -0.285370f, -0.436974f, -0.523483f, -0.410461f, -0.267925f, -0.055016f,
    491       -0.382458f, -0.319771f, -0.049927f, 0.124329f,  0.266102f,  -0.106606f,
    492       -0.773647f, -0.973053f, -0.708206f, -0.486137f, -0.319923f, -0.493900f,
    493       -0.490860f, -0.324986f, -0.147346f, -0.146088f, -0.161758f, -0.084396f,
    494       -0.379494f, 0.041626f,  -0.113361f, -0.277767f, 0.083366f,  0.126476f,
    495       0.139057f,  0.038040f,  0.038162f,  -0.242126f, -0.411736f, -0.370049f,
    496       -0.455357f, -0.039257f, 0.264442f,  -0.271492f, -0.425346f, -0.514847f,
    497       -0.448650f, -0.580399f, -0.652603f, -0.774803f, -0.692524f, -0.579578f,
    498       -0.465206f, -0.386265f, -0.458012f, -0.446594f, -0.284893f, -0.345448f,
    499       -0.350876f, -0.440350f, -0.360378f, -0.270428f, 0.237213f,  -0.063602f,
    500       -0.364529f, -0.179867f, 0.078197f,  0.117947f,  -0.093410f, -0.359119f,
    501       -0.480961f, -0.540638f, -0.436287f, -0.598576f, -0.253735f, -0.060093f,
    502       -0.549145f, -0.808327f, -0.698593f, -0.595764f, -0.582508f, -0.497353f,
    503       -0.480892f, -0.584240f, -0.665791f, -0.690903f, -0.743446f, -0.796677f,
    504       -0.782391f, -0.649010f, -0.628139f, -0.880848f, -0.829361f, -0.373272f,
    505       -0.223667f, 0.174572f,  -0.348743f, -0.798901f, -0.692307f, -0.607609f,
    506       -0.401455f, -0.480919f, -0.450798f, -0.435413f, -0.322338f, -0.228382f,
    507       -0.450466f, -0.504440f, -0.477402f, -0.662224f, -0.583397f, -0.217445f,
    508       -0.157459f, -0.079584f, -0.226168f, -0.488720f, -0.669624f, -0.666878f,
    509       -0.565311f, -0.549625f, -0.364601f, -0.497627f, -0.736897f, -0.763023f,
    510       -0.741020f, -0.404503f, 0.184814f,  -0.075315f, -0.281513f, -0.532906f,
    511       -0.405800f, -0.313438f, -0.536652f, -0.403381f, 0.011967f,  0.103310f,
    512       -0.269848f, -0.508656f, -0.445923f, -0.644859f, -0.617870f, -0.500927f,
    513       -0.371559f, -0.125580f, 0.028625f,  -0.154713f, -0.442024f, -0.492764f,
    514       -0.199371f, 0.236305f,  0.225925f,  0.075577f,  -0.285812f, -0.437145f,
    515       -0.374260f, -0.156693f, -0.129635f, -0.243206f, -0.123058f, 0.162148f,
    516       -0.313152f, -0.337982f, -0.358421f, 0.040070f,  0.038925f,  -0.333313f,
    517       -0.351662f, 0.023014f,  0.091362f,  -0.282890f, -0.373253f, -0.389050f,
    518       -0.532707f, -0.423347f, -0.349968f, -0.287045f, -0.202442f, -0.308430f,
    519       -0.222801f, -0.106323f, -0.056358f, 0.027222f,  0.390732f,  0.033558f,
    520       -0.160088f, -0.382217f, -0.535282f, -0.515900f, -0.022736f, 0.165665f,
    521       -0.111408f, -0.233784f, -0.312357f, -0.541885f, -0.480022f, -0.482513f,
    522       -0.246254f, 0.132244f,  0.090134f,  0.234634f,  -0.089249f, -0.460854f,
    523       -0.515457f, -0.450874f, -0.311031f, -0.387680f, -0.360554f, -0.179241f,
    524       -0.283817f, -0.475815f, -0.246399f, -0.388958f, -0.551140f, -0.496239f,
    525       -0.559879f, -0.379761f, -0.254288f, -0.395111f, -0.613018f, -0.459427f,
    526       -0.263580f, -0.268929f, 0.080826f,  0.115616f,  -0.097324f, -0.325310f,
    527       -0.480450f, -0.313286f, -0.310371f, -0.517361f, -0.288288f, -0.112679f,
    528       -0.173241f, -0.221664f, -0.039452f, -0.107578f, -0.089630f, -0.483768f,
    529       -0.571087f, -0.497108f, -0.321533f, -0.375492f, -0.540363f, -0.406815f,
    530       -0.388512f, -0.514561f, -0.540192f, -0.402412f, -0.232246f, -0.304749f,
    531       -0.383724f, -0.679596f, -0.685463f, -0.694538f, -0.642937f, -0.425789f,
    532       0.103271f,  -0.194862f, -0.487999f, -0.717281f, -0.681850f, -0.709286f,
    533       -0.615398f, -0.554245f, -0.254681f, -0.049950f, -0.002914f, -0.095383f,
    534       -0.370911f, -0.564224f, -0.242714f};
    535   const size_t xtest = xsize / 2;
    536   const size_t ytest = ysize / 2;
    537 
    538   for (intptr_t dy = -16; dy <= 16; ++dy) {
    539     float* row = in.Row(ytest + dy);
    540     for (intptr_t dx = -16; dx <= 16; ++dx)
    541       row[xtest + dx] = center[(dy + 16) * 33 + (dx + 16)];
    542   }
    543 
    544   const double sigma = 7.155933;
    545 
    546   JXL_ASSIGN_OR_DIE(ImageF temp, ImageF::Create(xsize, ysize));
    547   JXL_ASSIGN_OR_DIE(ImageF out_rg, ImageF::Create(xsize, ysize));
    548   const auto rg = CreateRecursiveGaussian(sigma);
    549   FastGaussian(
    550       rg, in.xsize(), in.ysize(), [&](size_t y) { return in.ConstRow(y); },
    551       [&](size_t y) { return temp.Row(y); },
    552       [&](size_t y) { return out_rg.Row(y); });
    553 
    554   ImageF out_old;
    555   {
    556     const std::vector<float> kernel =
    557         GaussianKernel(static_cast<int>(4 * sigma), static_cast<float>(sigma));
    558     printf("old kernel size %" PRIuS "\n", kernel.size());
    559     out_old = Convolve(in, kernel);
    560   }
    561 
    562   printf("rg %.4f old %.4f\n", out_rg.Row(ytest)[xtest],
    563          out_old.Row(ytest)[xtest]);
    564 }
    565 
    566 }  // namespace jxl