butteraugli.cc (78837B)
1 // Copyright (c) the JPEG XL Project Authors. All rights reserved. 2 // 3 // Use of this source code is governed by a BSD-style 4 // license that can be found in the LICENSE file. 5 // 6 // Author: Jyrki Alakuijala (jyrki.alakuijala@gmail.com) 7 // 8 // The physical architecture of butteraugli is based on the following naming 9 // convention: 10 // * Opsin - dynamics of the photosensitive chemicals in the retina 11 // with their immediate electrical processing 12 // * Xyb - hybrid opponent/trichromatic color space 13 // x is roughly red-subtract-green. 14 // y is yellow. 15 // b is blue. 16 // Xyb values are computed from Opsin mixing, not directly from rgb. 17 // * Mask - for visual masking 18 // * Hf - color modeling for spatially high-frequency features 19 // * Lf - color modeling for spatially low-frequency features 20 // * Diffmap - to cluster and build an image of error between the images 21 // * Blur - to hold the smoothing code 22 23 #include "lib/jxl/butteraugli/butteraugli.h" 24 25 #include <stdint.h> 26 #include <stdio.h> 27 #include <stdlib.h> 28 #include <string.h> 29 30 #include <algorithm> 31 #include <cmath> 32 #include <memory> 33 #include <vector> 34 35 #include "lib/jxl/image.h" 36 37 #undef HWY_TARGET_INCLUDE 38 #define HWY_TARGET_INCLUDE "lib/jxl/butteraugli/butteraugli.cc" 39 #include <hwy/foreach_target.h> 40 41 #include "lib/jxl/base/fast_math-inl.h" 42 #include "lib/jxl/base/printf_macros.h" 43 #include "lib/jxl/base/status.h" 44 #include "lib/jxl/convolve.h" 45 #include "lib/jxl/image_ops.h" 46 47 #ifndef JXL_BUTTERAUGLI_ONCE 48 #define JXL_BUTTERAUGLI_ONCE 49 50 namespace jxl { 51 52 static const double wMfMalta = 37.0819870399; 53 static const double norm1Mf = 130262059.556; 54 static const double wMfMaltaX = 8246.75321353; 55 static const double norm1MfX = 1009002.70582; 56 static const double wHfMalta = 18.7237414387; 57 static const double norm1Hf = 4498534.45232; 58 static const double wHfMaltaX = 6923.99476109; 59 static const double norm1HfX = 8051.15833247; 60 static const double wUhfMalta = 1.10039032555; 61 static const double norm1Uhf = 71.7800275169; 62 static const double wUhfMaltaX = 173.5; 63 static const double norm1UhfX = 5.0; 64 static const double wmul[9] = { 65 400.0, 1.50815703118, 0, 66 2150.0, 10.6195433239, 16.2176043152, 67 29.2353797994, 0.844626970982, 0.703646627719, 68 }; 69 70 std::vector<float> ComputeKernel(float sigma) { 71 const float m = 2.25; // Accuracy increases when m is increased. 72 const double scaler = -1.0 / (2.0 * sigma * sigma); 73 const int diff = std::max<int>(1, m * std::fabs(sigma)); 74 std::vector<float> kernel(2 * diff + 1); 75 for (int i = -diff; i <= diff; ++i) { 76 kernel[i + diff] = std::exp(scaler * i * i); 77 } 78 return kernel; 79 } 80 81 void ConvolveBorderColumn(const ImageF& in, const std::vector<float>& kernel, 82 const size_t x, float* BUTTERAUGLI_RESTRICT row_out) { 83 const size_t offset = kernel.size() / 2; 84 int minx = x < offset ? 0 : x - offset; 85 int maxx = std::min<int>(in.xsize() - 1, x + offset); 86 float weight = 0.0f; 87 for (int j = minx; j <= maxx; ++j) { 88 weight += kernel[j - x + offset]; 89 } 90 float scale = 1.0f / weight; 91 for (size_t y = 0; y < in.ysize(); ++y) { 92 const float* BUTTERAUGLI_RESTRICT row_in = in.Row(y); 93 float sum = 0.0f; 94 for (int j = minx; j <= maxx; ++j) { 95 sum += row_in[j] * kernel[j - x + offset]; 96 } 97 row_out[y] = sum * scale; 98 } 99 } 100 101 // Computes a horizontal convolution and transposes the result. 102 void ConvolutionWithTranspose(const ImageF& in, 103 const std::vector<float>& kernel, 104 ImageF* BUTTERAUGLI_RESTRICT out) { 105 JXL_CHECK(out->xsize() == in.ysize()); 106 JXL_CHECK(out->ysize() == in.xsize()); 107 const size_t len = kernel.size(); 108 const size_t offset = len / 2; 109 float weight_no_border = 0.0f; 110 for (size_t j = 0; j < len; ++j) { 111 weight_no_border += kernel[j]; 112 } 113 const float scale_no_border = 1.0f / weight_no_border; 114 const size_t border1 = std::min(in.xsize(), offset); 115 const size_t border2 = in.xsize() > offset ? in.xsize() - offset : 0; 116 std::vector<float> scaled_kernel(len / 2 + 1); 117 for (size_t i = 0; i <= len / 2; ++i) { 118 scaled_kernel[i] = kernel[i] * scale_no_border; 119 } 120 121 // middle 122 switch (len) { 123 case 7: { 124 const float sk0 = scaled_kernel[0]; 125 const float sk1 = scaled_kernel[1]; 126 const float sk2 = scaled_kernel[2]; 127 const float sk3 = scaled_kernel[3]; 128 for (size_t y = 0; y < in.ysize(); ++y) { 129 const float* BUTTERAUGLI_RESTRICT row_in = in.Row(y) + border1 - offset; 130 for (size_t x = border1; x < border2; ++x, ++row_in) { 131 const float sum0 = (row_in[0] + row_in[6]) * sk0; 132 const float sum1 = (row_in[1] + row_in[5]) * sk1; 133 const float sum2 = (row_in[2] + row_in[4]) * sk2; 134 const float sum = (row_in[3]) * sk3 + sum0 + sum1 + sum2; 135 float* BUTTERAUGLI_RESTRICT row_out = out->Row(x); 136 row_out[y] = sum; 137 } 138 } 139 } break; 140 case 13: { 141 for (size_t y = 0; y < in.ysize(); ++y) { 142 const float* BUTTERAUGLI_RESTRICT row_in = in.Row(y) + border1 - offset; 143 for (size_t x = border1; x < border2; ++x, ++row_in) { 144 float sum0 = (row_in[0] + row_in[12]) * scaled_kernel[0]; 145 float sum1 = (row_in[1] + row_in[11]) * scaled_kernel[1]; 146 float sum2 = (row_in[2] + row_in[10]) * scaled_kernel[2]; 147 float sum3 = (row_in[3] + row_in[9]) * scaled_kernel[3]; 148 sum0 += (row_in[4] + row_in[8]) * scaled_kernel[4]; 149 sum1 += (row_in[5] + row_in[7]) * scaled_kernel[5]; 150 const float sum = (row_in[6]) * scaled_kernel[6]; 151 float* BUTTERAUGLI_RESTRICT row_out = out->Row(x); 152 row_out[y] = sum + sum0 + sum1 + sum2 + sum3; 153 } 154 } 155 break; 156 } 157 case 15: { 158 for (size_t y = 0; y < in.ysize(); ++y) { 159 const float* BUTTERAUGLI_RESTRICT row_in = in.Row(y) + border1 - offset; 160 for (size_t x = border1; x < border2; ++x, ++row_in) { 161 float sum0 = (row_in[0] + row_in[14]) * scaled_kernel[0]; 162 float sum1 = (row_in[1] + row_in[13]) * scaled_kernel[1]; 163 float sum2 = (row_in[2] + row_in[12]) * scaled_kernel[2]; 164 float sum3 = (row_in[3] + row_in[11]) * scaled_kernel[3]; 165 sum0 += (row_in[4] + row_in[10]) * scaled_kernel[4]; 166 sum1 += (row_in[5] + row_in[9]) * scaled_kernel[5]; 167 sum2 += (row_in[6] + row_in[8]) * scaled_kernel[6]; 168 const float sum = (row_in[7]) * scaled_kernel[7]; 169 float* BUTTERAUGLI_RESTRICT row_out = out->Row(x); 170 row_out[y] = sum + sum0 + sum1 + sum2 + sum3; 171 } 172 } 173 break; 174 } 175 case 33: { 176 for (size_t y = 0; y < in.ysize(); ++y) { 177 const float* BUTTERAUGLI_RESTRICT row_in = in.Row(y) + border1 - offset; 178 for (size_t x = border1; x < border2; ++x, ++row_in) { 179 float sum0 = (row_in[0] + row_in[32]) * scaled_kernel[0]; 180 float sum1 = (row_in[1] + row_in[31]) * scaled_kernel[1]; 181 float sum2 = (row_in[2] + row_in[30]) * scaled_kernel[2]; 182 float sum3 = (row_in[3] + row_in[29]) * scaled_kernel[3]; 183 sum0 += (row_in[4] + row_in[28]) * scaled_kernel[4]; 184 sum1 += (row_in[5] + row_in[27]) * scaled_kernel[5]; 185 sum2 += (row_in[6] + row_in[26]) * scaled_kernel[6]; 186 sum3 += (row_in[7] + row_in[25]) * scaled_kernel[7]; 187 sum0 += (row_in[8] + row_in[24]) * scaled_kernel[8]; 188 sum1 += (row_in[9] + row_in[23]) * scaled_kernel[9]; 189 sum2 += (row_in[10] + row_in[22]) * scaled_kernel[10]; 190 sum3 += (row_in[11] + row_in[21]) * scaled_kernel[11]; 191 sum0 += (row_in[12] + row_in[20]) * scaled_kernel[12]; 192 sum1 += (row_in[13] + row_in[19]) * scaled_kernel[13]; 193 sum2 += (row_in[14] + row_in[18]) * scaled_kernel[14]; 194 sum3 += (row_in[15] + row_in[17]) * scaled_kernel[15]; 195 const float sum = (row_in[16]) * scaled_kernel[16]; 196 float* BUTTERAUGLI_RESTRICT row_out = out->Row(x); 197 row_out[y] = sum + sum0 + sum1 + sum2 + sum3; 198 } 199 } 200 break; 201 } 202 default: 203 JXL_UNREACHABLE("Kernel size %" PRIuS " not implemented", len); 204 } 205 // left border 206 for (size_t x = 0; x < border1; ++x) { 207 ConvolveBorderColumn(in, kernel, x, out->Row(x)); 208 } 209 210 // right border 211 for (size_t x = border2; x < in.xsize(); ++x) { 212 ConvolveBorderColumn(in, kernel, x, out->Row(x)); 213 } 214 } 215 216 // A blur somewhat similar to a 2D Gaussian blur. 217 // See: https://en.wikipedia.org/wiki/Gaussian_blur 218 // 219 // This is a bottleneck because the sigma can be quite large (>7). We can use 220 // gauss_blur.cc (runtime independent of sigma, closer to a 4*sigma truncated 221 // Gaussian and our 2.25 in ComputeKernel), but its boundary conditions are 222 // zero-valued. This leads to noticeable differences at the edges of diffmaps. 223 // We retain a special case for 5x5 kernels (even faster than gauss_blur), 224 // optionally use gauss_blur followed by fixup of the borders for large images, 225 // or fall back to the previous truncated FIR followed by a transpose. 226 Status Blur(const ImageF& in, float sigma, const ButteraugliParams& params, 227 BlurTemp* temp, ImageF* out) { 228 std::vector<float> kernel = ComputeKernel(sigma); 229 // Separable5 does an in-place convolution, so this fast path is not safe if 230 // in aliases out. 231 if (kernel.size() == 5 && &in != out) { 232 float sum_weights = 0.0f; 233 for (const float w : kernel) { 234 sum_weights += w; 235 } 236 const float scale = 1.0f / sum_weights; 237 const float w0 = kernel[2] * scale; 238 const float w1 = kernel[1] * scale; 239 const float w2 = kernel[0] * scale; 240 const WeightsSeparable5 weights = { 241 {HWY_REP4(w0), HWY_REP4(w1), HWY_REP4(w2)}, 242 {HWY_REP4(w0), HWY_REP4(w1), HWY_REP4(w2)}, 243 }; 244 Separable5(in, Rect(in), weights, /*pool=*/nullptr, out); 245 return true; 246 } 247 248 ImageF* temp_t; 249 JXL_RETURN_IF_ERROR(temp->GetTransposed(in, &temp_t)); 250 ConvolutionWithTranspose(in, kernel, temp_t); 251 ConvolutionWithTranspose(*temp_t, kernel, out); 252 return true; 253 } 254 255 // Allows PaddedMaltaUnit to call either function via overloading. 256 struct MaltaTagLF {}; 257 struct MaltaTag {}; 258 259 } // namespace jxl 260 261 #endif // JXL_BUTTERAUGLI_ONCE 262 263 #include <hwy/highway.h> 264 HWY_BEFORE_NAMESPACE(); 265 namespace jxl { 266 namespace HWY_NAMESPACE { 267 268 // These templates are not found via ADL. 269 using hwy::HWY_NAMESPACE::Abs; 270 using hwy::HWY_NAMESPACE::Div; 271 using hwy::HWY_NAMESPACE::Gt; 272 using hwy::HWY_NAMESPACE::IfThenElse; 273 using hwy::HWY_NAMESPACE::IfThenElseZero; 274 using hwy::HWY_NAMESPACE::Lt; 275 using hwy::HWY_NAMESPACE::Max; 276 using hwy::HWY_NAMESPACE::Mul; 277 using hwy::HWY_NAMESPACE::MulAdd; 278 using hwy::HWY_NAMESPACE::MulSub; 279 using hwy::HWY_NAMESPACE::Neg; 280 using hwy::HWY_NAMESPACE::Sub; 281 using hwy::HWY_NAMESPACE::Vec; 282 using hwy::HWY_NAMESPACE::ZeroIfNegative; 283 284 template <class D, class V> 285 HWY_INLINE V MaximumClamp(D d, V v, double kMaxVal) { 286 static const double kMul = 0.724216145665; 287 const V mul = Set(d, kMul); 288 const V maxval = Set(d, kMaxVal); 289 // If greater than maxval or less than -maxval, replace with if_*. 290 const V if_pos = MulAdd(Sub(v, maxval), mul, maxval); 291 const V if_neg = MulSub(Add(v, maxval), mul, maxval); 292 const V pos_or_v = IfThenElse(Ge(v, maxval), if_pos, v); 293 return IfThenElse(Lt(v, Neg(maxval)), if_neg, pos_or_v); 294 } 295 296 // Make area around zero less important (remove it). 297 template <class D, class V> 298 HWY_INLINE V RemoveRangeAroundZero(const D d, const double kw, const V x) { 299 const auto w = Set(d, kw); 300 return IfThenElse(Gt(x, w), Sub(x, w), 301 IfThenElseZero(Lt(x, Neg(w)), Add(x, w))); 302 } 303 304 // Make area around zero more important (2x it until the limit). 305 template <class D, class V> 306 HWY_INLINE V AmplifyRangeAroundZero(const D d, const double kw, const V x) { 307 const auto w = Set(d, kw); 308 return IfThenElse(Gt(x, w), Add(x, w), 309 IfThenElse(Lt(x, Neg(w)), Sub(x, w), Add(x, x))); 310 } 311 312 // XybLowFreqToVals converts from low-frequency XYB space to the 'vals' space. 313 // Vals space can be converted to L2-norm space (Euclidean and normalized) 314 // through visual masking. 315 template <class D, class V> 316 HWY_INLINE void XybLowFreqToVals(const D d, const V& x, const V& y, 317 const V& b_arg, V* HWY_RESTRICT valx, 318 V* HWY_RESTRICT valy, V* HWY_RESTRICT valb) { 319 static const double xmul_scalar = 33.832837186260; 320 static const double ymul_scalar = 14.458268100570; 321 static const double bmul_scalar = 49.87984651440; 322 static const double y_to_b_mul_scalar = -0.362267051518; 323 const V xmul = Set(d, xmul_scalar); 324 const V ymul = Set(d, ymul_scalar); 325 const V bmul = Set(d, bmul_scalar); 326 const V y_to_b_mul = Set(d, y_to_b_mul_scalar); 327 const V b = MulAdd(y_to_b_mul, y, b_arg); 328 *valb = Mul(b, bmul); 329 *valx = Mul(x, xmul); 330 *valy = Mul(y, ymul); 331 } 332 333 void XybLowFreqToVals(Image3F* xyb_lf) { 334 // Modify range around zero code only concerns the high frequency 335 // planes and only the X and Y channels. 336 // Convert low freq xyb to vals space so that we can do a simple squared sum 337 // diff on the low frequencies later. 338 const HWY_FULL(float) d; 339 for (size_t y = 0; y < xyb_lf->ysize(); ++y) { 340 float* BUTTERAUGLI_RESTRICT row_x = xyb_lf->PlaneRow(0, y); 341 float* BUTTERAUGLI_RESTRICT row_y = xyb_lf->PlaneRow(1, y); 342 float* BUTTERAUGLI_RESTRICT row_b = xyb_lf->PlaneRow(2, y); 343 for (size_t x = 0; x < xyb_lf->xsize(); x += Lanes(d)) { 344 auto valx = Undefined(d); 345 auto valy = Undefined(d); 346 auto valb = Undefined(d); 347 XybLowFreqToVals(d, Load(d, row_x + x), Load(d, row_y + x), 348 Load(d, row_b + x), &valx, &valy, &valb); 349 Store(valx, d, row_x + x); 350 Store(valy, d, row_y + x); 351 Store(valb, d, row_b + x); 352 } 353 } 354 } 355 356 void SuppressXByY(const ImageF& in_y, ImageF* HWY_RESTRICT inout_x) { 357 JXL_DASSERT(SameSize(*inout_x, in_y)); 358 const size_t xsize = in_y.xsize(); 359 const size_t ysize = in_y.ysize(); 360 const HWY_FULL(float) d; 361 static const double suppress = 46.0; 362 static const double s = 0.653020556257; 363 const auto sv = Set(d, s); 364 const auto one_minus_s = Set(d, 1.0 - s); 365 const auto ywv = Set(d, suppress); 366 367 for (size_t y = 0; y < ysize; ++y) { 368 const float* HWY_RESTRICT row_y = in_y.ConstRow(y); 369 float* HWY_RESTRICT row_x = inout_x->Row(y); 370 for (size_t x = 0; x < xsize; x += Lanes(d)) { 371 const auto vx = Load(d, row_x + x); 372 const auto vy = Load(d, row_y + x); 373 const auto scaler = 374 MulAdd(Div(ywv, MulAdd(vy, vy, ywv)), one_minus_s, sv); 375 Store(Mul(scaler, vx), d, row_x + x); 376 } 377 } 378 } 379 380 void Subtract(const ImageF& a, const ImageF& b, ImageF* c) { 381 const HWY_FULL(float) d; 382 for (size_t y = 0; y < a.ysize(); ++y) { 383 const float* row_a = a.ConstRow(y); 384 const float* row_b = b.ConstRow(y); 385 float* row_c = c->Row(y); 386 for (size_t x = 0; x < a.xsize(); x += Lanes(d)) { 387 Store(Sub(Load(d, row_a + x), Load(d, row_b + x)), d, row_c + x); 388 } 389 } 390 } 391 392 Status SeparateLFAndMF(const ButteraugliParams& params, const Image3F& xyb, 393 Image3F* lf, Image3F* mf, BlurTemp* blur_temp) { 394 static const double kSigmaLf = 7.15593339443; 395 for (int i = 0; i < 3; ++i) { 396 // Extract lf ... 397 JXL_RETURN_IF_ERROR( 398 Blur(xyb.Plane(i), kSigmaLf, params, blur_temp, &lf->Plane(i))); 399 // ... and keep everything else in mf. 400 Subtract(xyb.Plane(i), lf->Plane(i), &mf->Plane(i)); 401 } 402 XybLowFreqToVals(lf); 403 return true; 404 } 405 406 Status SeparateMFAndHF(const ButteraugliParams& params, Image3F* mf, ImageF* hf, 407 BlurTemp* blur_temp) { 408 const HWY_FULL(float) d; 409 static const double kSigmaHf = 3.22489901262; 410 const size_t xsize = mf->xsize(); 411 const size_t ysize = mf->ysize(); 412 JXL_ASSIGN_OR_RETURN(hf[0], ImageF::Create(xsize, ysize)); 413 JXL_ASSIGN_OR_RETURN(hf[1], ImageF::Create(xsize, ysize)); 414 for (int i = 0; i < 3; ++i) { 415 if (i == 2) { 416 JXL_RETURN_IF_ERROR( 417 Blur(mf->Plane(i), kSigmaHf, params, blur_temp, &mf->Plane(i))); 418 break; 419 } 420 for (size_t y = 0; y < ysize; ++y) { 421 float* BUTTERAUGLI_RESTRICT row_mf = mf->PlaneRow(i, y); 422 float* BUTTERAUGLI_RESTRICT row_hf = hf[i].Row(y); 423 for (size_t x = 0; x < xsize; x += Lanes(d)) { 424 Store(Load(d, row_mf + x), d, row_hf + x); 425 } 426 } 427 JXL_RETURN_IF_ERROR( 428 Blur(mf->Plane(i), kSigmaHf, params, blur_temp, &mf->Plane(i))); 429 static const double kRemoveMfRange = 0.29; 430 static const double kAddMfRange = 0.1; 431 if (i == 0) { 432 for (size_t y = 0; y < ysize; ++y) { 433 float* BUTTERAUGLI_RESTRICT row_mf = mf->PlaneRow(0, y); 434 float* BUTTERAUGLI_RESTRICT row_hf = hf[0].Row(y); 435 for (size_t x = 0; x < xsize; x += Lanes(d)) { 436 auto mf = Load(d, row_mf + x); 437 auto hf = Sub(Load(d, row_hf + x), mf); 438 mf = RemoveRangeAroundZero(d, kRemoveMfRange, mf); 439 Store(mf, d, row_mf + x); 440 Store(hf, d, row_hf + x); 441 } 442 } 443 } else { 444 for (size_t y = 0; y < ysize; ++y) { 445 float* BUTTERAUGLI_RESTRICT row_mf = mf->PlaneRow(1, y); 446 float* BUTTERAUGLI_RESTRICT row_hf = hf[1].Row(y); 447 for (size_t x = 0; x < xsize; x += Lanes(d)) { 448 auto mf = Load(d, row_mf + x); 449 auto hf = Sub(Load(d, row_hf + x), mf); 450 451 mf = AmplifyRangeAroundZero(d, kAddMfRange, mf); 452 Store(mf, d, row_mf + x); 453 Store(hf, d, row_hf + x); 454 } 455 } 456 } 457 } 458 // Suppress red-green by intensity change in the high freq channels. 459 SuppressXByY(hf[1], &hf[0]); 460 return true; 461 } 462 463 Status SeparateHFAndUHF(const ButteraugliParams& params, ImageF* hf, 464 ImageF* uhf, BlurTemp* blur_temp) { 465 const HWY_FULL(float) d; 466 const size_t xsize = hf[0].xsize(); 467 const size_t ysize = hf[0].ysize(); 468 static const double kSigmaUhf = 1.56416327805; 469 JXL_ASSIGN_OR_RETURN(uhf[0], ImageF::Create(xsize, ysize)); 470 JXL_ASSIGN_OR_RETURN(uhf[1], ImageF::Create(xsize, ysize)); 471 for (int i = 0; i < 2; ++i) { 472 // Divide hf into hf and uhf. 473 for (size_t y = 0; y < ysize; ++y) { 474 float* BUTTERAUGLI_RESTRICT row_uhf = uhf[i].Row(y); 475 float* BUTTERAUGLI_RESTRICT row_hf = hf[i].Row(y); 476 for (size_t x = 0; x < xsize; ++x) { 477 row_uhf[x] = row_hf[x]; 478 } 479 } 480 JXL_RETURN_IF_ERROR(Blur(hf[i], kSigmaUhf, params, blur_temp, &hf[i])); 481 static const double kRemoveHfRange = 1.5; 482 static const double kAddHfRange = 0.132; 483 static const double kRemoveUhfRange = 0.04; 484 static const double kMaxclampHf = 28.4691806922; 485 static const double kMaxclampUhf = 5.19175294647; 486 static double kMulYHf = 2.155; 487 static double kMulYUhf = 2.69313763794; 488 if (i == 0) { 489 for (size_t y = 0; y < ysize; ++y) { 490 float* BUTTERAUGLI_RESTRICT row_uhf = uhf[0].Row(y); 491 float* BUTTERAUGLI_RESTRICT row_hf = hf[0].Row(y); 492 for (size_t x = 0; x < xsize; x += Lanes(d)) { 493 auto hf = Load(d, row_hf + x); 494 auto uhf = Sub(Load(d, row_uhf + x), hf); 495 hf = RemoveRangeAroundZero(d, kRemoveHfRange, hf); 496 uhf = RemoveRangeAroundZero(d, kRemoveUhfRange, uhf); 497 Store(hf, d, row_hf + x); 498 Store(uhf, d, row_uhf + x); 499 } 500 } 501 } else { 502 for (size_t y = 0; y < ysize; ++y) { 503 float* BUTTERAUGLI_RESTRICT row_uhf = uhf[1].Row(y); 504 float* BUTTERAUGLI_RESTRICT row_hf = hf[1].Row(y); 505 for (size_t x = 0; x < xsize; x += Lanes(d)) { 506 auto hf = Load(d, row_hf + x); 507 hf = MaximumClamp(d, hf, kMaxclampHf); 508 509 auto uhf = Sub(Load(d, row_uhf + x), hf); 510 uhf = MaximumClamp(d, uhf, kMaxclampUhf); 511 uhf = Mul(uhf, Set(d, kMulYUhf)); 512 Store(uhf, d, row_uhf + x); 513 514 hf = Mul(hf, Set(d, kMulYHf)); 515 hf = AmplifyRangeAroundZero(d, kAddHfRange, hf); 516 Store(hf, d, row_hf + x); 517 } 518 } 519 } 520 } 521 return true; 522 } 523 524 void DeallocateHFAndUHF(ImageF* hf, ImageF* uhf) { 525 for (int i = 0; i < 2; ++i) { 526 hf[i] = ImageF(); 527 uhf[i] = ImageF(); 528 } 529 } 530 531 Status SeparateFrequencies(size_t xsize, size_t ysize, 532 const ButteraugliParams& params, BlurTemp* blur_temp, 533 const Image3F& xyb, PsychoImage& ps) { 534 JXL_ASSIGN_OR_RETURN(ps.lf, Image3F::Create(xyb.xsize(), xyb.ysize())); 535 JXL_ASSIGN_OR_RETURN(ps.mf, Image3F::Create(xyb.xsize(), xyb.ysize())); 536 JXL_RETURN_IF_ERROR(SeparateLFAndMF(params, xyb, &ps.lf, &ps.mf, blur_temp)); 537 JXL_RETURN_IF_ERROR(SeparateMFAndHF(params, &ps.mf, &ps.hf[0], blur_temp)); 538 JXL_RETURN_IF_ERROR( 539 SeparateHFAndUHF(params, &ps.hf[0], &ps.uhf[0], blur_temp)); 540 return true; 541 } 542 543 namespace { 544 template <typename V> 545 BUTTERAUGLI_INLINE V Sum(V a, V b, V c, V d) { 546 return Add(Add(a, b), Add(c, d)); 547 } 548 template <typename V> 549 BUTTERAUGLI_INLINE V Sum(V a, V b, V c, V d, V e) { 550 return Sum(a, b, c, Add(d, e)); 551 } 552 template <typename V> 553 BUTTERAUGLI_INLINE V Sum(V a, V b, V c, V d, V e, V f, V g) { 554 return Sum(a, b, c, Sum(d, e, f, g)); 555 } 556 template <typename V> 557 BUTTERAUGLI_INLINE V Sum(V a, V b, V c, V d, V e, V f, V g, V h, V i) { 558 return Add(Add(Sum(a, b, c, d), Sum(e, f, g, h)), i); 559 } 560 } // namespace 561 562 template <class D> 563 Vec<D> MaltaUnit(MaltaTagLF /*tag*/, const D df, 564 const float* BUTTERAUGLI_RESTRICT d, const intptr_t xs) { 565 const intptr_t xs3 = 3 * xs; 566 567 const auto center = LoadU(df, d); 568 569 // x grows, y constant 570 const auto sum_yconst = Sum(LoadU(df, d - 4), LoadU(df, d - 2), center, 571 LoadU(df, d + 2), LoadU(df, d + 4)); 572 // Will return this, sum of all line kernels 573 auto retval = Mul(sum_yconst, sum_yconst); 574 { 575 // y grows, x constant 576 auto sum = Sum(LoadU(df, d - xs3 - xs), LoadU(df, d - xs - xs), center, 577 LoadU(df, d + xs + xs), LoadU(df, d + xs3 + xs)); 578 retval = MulAdd(sum, sum, retval); 579 } 580 { 581 // both grow 582 auto sum = Sum(LoadU(df, d - xs3 - 3), LoadU(df, d - xs - xs - 2), center, 583 LoadU(df, d + xs + xs + 2), LoadU(df, d + xs3 + 3)); 584 retval = MulAdd(sum, sum, retval); 585 } 586 { 587 // y grows, x shrinks 588 auto sum = Sum(LoadU(df, d - xs3 + 3), LoadU(df, d - xs - xs + 2), center, 589 LoadU(df, d + xs + xs - 2), LoadU(df, d + xs3 - 3)); 590 retval = MulAdd(sum, sum, retval); 591 } 592 { 593 // y grows -4 to 4, x shrinks 1 -> -1 594 auto sum = 595 Sum(LoadU(df, d - xs3 - xs + 1), LoadU(df, d - xs - xs + 1), center, 596 LoadU(df, d + xs + xs - 1), LoadU(df, d + xs3 + xs - 1)); 597 retval = MulAdd(sum, sum, retval); 598 } 599 { 600 // y grows -4 to 4, x grows -1 -> 1 601 auto sum = 602 Sum(LoadU(df, d - xs3 - xs - 1), LoadU(df, d - xs - xs - 1), center, 603 LoadU(df, d + xs + xs + 1), LoadU(df, d + xs3 + xs + 1)); 604 retval = MulAdd(sum, sum, retval); 605 } 606 { 607 // x grows -4 to 4, y grows -1 to 1 608 auto sum = Sum(LoadU(df, d - 4 - xs), LoadU(df, d - 2 - xs), center, 609 LoadU(df, d + 2 + xs), LoadU(df, d + 4 + xs)); 610 retval = MulAdd(sum, sum, retval); 611 } 612 { 613 // x grows -4 to 4, y shrinks 1 to -1 614 auto sum = Sum(LoadU(df, d - 4 + xs), LoadU(df, d - 2 + xs), center, 615 LoadU(df, d + 2 - xs), LoadU(df, d + 4 - xs)); 616 retval = MulAdd(sum, sum, retval); 617 } 618 { 619 /* 0_________ 620 1__*______ 621 2___*_____ 622 3_________ 623 4____0____ 624 5_________ 625 6_____*___ 626 7______*__ 627 8_________ */ 628 auto sum = Sum(LoadU(df, d - xs3 - 2), LoadU(df, d - xs - xs - 1), center, 629 LoadU(df, d + xs + xs + 1), LoadU(df, d + xs3 + 2)); 630 retval = MulAdd(sum, sum, retval); 631 } 632 { 633 /* 0_________ 634 1______*__ 635 2_____*___ 636 3_________ 637 4____0____ 638 5_________ 639 6___*_____ 640 7__*______ 641 8_________ */ 642 auto sum = Sum(LoadU(df, d - xs3 + 2), LoadU(df, d - xs - xs + 1), center, 643 LoadU(df, d + xs + xs - 1), LoadU(df, d + xs3 - 2)); 644 retval = MulAdd(sum, sum, retval); 645 } 646 { 647 /* 0_________ 648 1_________ 649 2_*_______ 650 3__*______ 651 4____0____ 652 5______*__ 653 6_______*_ 654 7_________ 655 8_________ */ 656 auto sum = Sum(LoadU(df, d - xs - xs - 3), LoadU(df, d - xs - 2), center, 657 LoadU(df, d + xs + 2), LoadU(df, d + xs + xs + 3)); 658 retval = MulAdd(sum, sum, retval); 659 } 660 { 661 /* 0_________ 662 1_________ 663 2_______*_ 664 3______*__ 665 4____0____ 666 5__*______ 667 6_*_______ 668 7_________ 669 8_________ */ 670 auto sum = Sum(LoadU(df, d - xs - xs + 3), LoadU(df, d - xs + 2), center, 671 LoadU(df, d + xs - 2), LoadU(df, d + xs + xs - 3)); 672 retval = MulAdd(sum, sum, retval); 673 } 674 { 675 /* 0_________ 676 1_________ 677 2________* 678 3______*__ 679 4____0____ 680 5__*______ 681 6*________ 682 7_________ 683 8_________ */ 684 685 auto sum = Sum(LoadU(df, d + xs + xs - 4), LoadU(df, d + xs - 2), center, 686 LoadU(df, d - xs + 2), LoadU(df, d - xs - xs + 4)); 687 retval = MulAdd(sum, sum, retval); 688 } 689 { 690 /* 0_________ 691 1_________ 692 2*________ 693 3__*______ 694 4____0____ 695 5______*__ 696 6________* 697 7_________ 698 8_________ */ 699 auto sum = Sum(LoadU(df, d - xs - xs - 4), LoadU(df, d - xs - 2), center, 700 LoadU(df, d + xs + 2), LoadU(df, d + xs + xs + 4)); 701 retval = MulAdd(sum, sum, retval); 702 } 703 { 704 /* 0__*______ 705 1_________ 706 2___*_____ 707 3_________ 708 4____0____ 709 5_________ 710 6_____*___ 711 7_________ 712 8______*__ */ 713 auto sum = 714 Sum(LoadU(df, d - xs3 - xs - 2), LoadU(df, d - xs - xs - 1), center, 715 LoadU(df, d + xs + xs + 1), LoadU(df, d + xs3 + xs + 2)); 716 retval = MulAdd(sum, sum, retval); 717 } 718 { 719 /* 0______*__ 720 1_________ 721 2_____*___ 722 3_________ 723 4____0____ 724 5_________ 725 6___*_____ 726 7_________ 727 8__*______ */ 728 auto sum = 729 Sum(LoadU(df, d - xs3 - xs + 2), LoadU(df, d - xs - xs + 1), center, 730 LoadU(df, d + xs + xs - 1), LoadU(df, d + xs3 + xs - 2)); 731 retval = MulAdd(sum, sum, retval); 732 } 733 return retval; 734 } 735 736 template <class D> 737 Vec<D> MaltaUnit(MaltaTag /*tag*/, const D df, 738 const float* BUTTERAUGLI_RESTRICT d, const intptr_t xs) { 739 const intptr_t xs3 = 3 * xs; 740 741 const auto center = LoadU(df, d); 742 743 // x grows, y constant 744 const auto sum_yconst = 745 Sum(LoadU(df, d - 4), LoadU(df, d - 3), LoadU(df, d - 2), 746 LoadU(df, d - 1), center, LoadU(df, d + 1), LoadU(df, d + 2), 747 LoadU(df, d + 3), LoadU(df, d + 4)); 748 // Will return this, sum of all line kernels 749 auto retval = Mul(sum_yconst, sum_yconst); 750 751 { 752 // y grows, x constant 753 auto sum = Sum(LoadU(df, d - xs3 - xs), LoadU(df, d - xs3), 754 LoadU(df, d - xs - xs), LoadU(df, d - xs), center, 755 LoadU(df, d + xs), LoadU(df, d + xs + xs), 756 LoadU(df, d + xs3), LoadU(df, d + xs3 + xs)); 757 retval = MulAdd(sum, sum, retval); 758 } 759 { 760 // both grow 761 auto sum = Sum(LoadU(df, d - xs3 - 3), LoadU(df, d - xs - xs - 2), 762 LoadU(df, d - xs - 1), center, LoadU(df, d + xs + 1), 763 LoadU(df, d + xs + xs + 2), LoadU(df, d + xs3 + 3)); 764 retval = MulAdd(sum, sum, retval); 765 } 766 { 767 // y grows, x shrinks 768 auto sum = Sum(LoadU(df, d - xs3 + 3), LoadU(df, d - xs - xs + 2), 769 LoadU(df, d - xs + 1), center, LoadU(df, d + xs - 1), 770 LoadU(df, d + xs + xs - 2), LoadU(df, d + xs3 - 3)); 771 retval = MulAdd(sum, sum, retval); 772 } 773 { 774 // y grows -4 to 4, x shrinks 1 -> -1 775 auto sum = Sum(LoadU(df, d - xs3 - xs + 1), LoadU(df, d - xs3 + 1), 776 LoadU(df, d - xs - xs + 1), LoadU(df, d - xs), center, 777 LoadU(df, d + xs), LoadU(df, d + xs + xs - 1), 778 LoadU(df, d + xs3 - 1), LoadU(df, d + xs3 + xs - 1)); 779 retval = MulAdd(sum, sum, retval); 780 } 781 { 782 // y grows -4 to 4, x grows -1 -> 1 783 auto sum = Sum(LoadU(df, d - xs3 - xs - 1), LoadU(df, d - xs3 - 1), 784 LoadU(df, d - xs - xs - 1), LoadU(df, d - xs), center, 785 LoadU(df, d + xs), LoadU(df, d + xs + xs + 1), 786 LoadU(df, d + xs3 + 1), LoadU(df, d + xs3 + xs + 1)); 787 retval = MulAdd(sum, sum, retval); 788 } 789 { 790 // x grows -4 to 4, y grows -1 to 1 791 auto sum = 792 Sum(LoadU(df, d - 4 - xs), LoadU(df, d - 3 - xs), LoadU(df, d - 2 - xs), 793 LoadU(df, d - 1), center, LoadU(df, d + 1), LoadU(df, d + 2 + xs), 794 LoadU(df, d + 3 + xs), LoadU(df, d + 4 + xs)); 795 retval = MulAdd(sum, sum, retval); 796 } 797 { 798 // x grows -4 to 4, y shrinks 1 to -1 799 auto sum = 800 Sum(LoadU(df, d - 4 + xs), LoadU(df, d - 3 + xs), LoadU(df, d - 2 + xs), 801 LoadU(df, d - 1), center, LoadU(df, d + 1), LoadU(df, d + 2 - xs), 802 LoadU(df, d + 3 - xs), LoadU(df, d + 4 - xs)); 803 retval = MulAdd(sum, sum, retval); 804 } 805 { 806 /* 0_________ 807 1__*______ 808 2___*_____ 809 3___*_____ 810 4____0____ 811 5_____*___ 812 6_____*___ 813 7______*__ 814 8_________ */ 815 auto sum = Sum(LoadU(df, d - xs3 - 2), LoadU(df, d - xs - xs - 1), 816 LoadU(df, d - xs - 1), center, LoadU(df, d + xs + 1), 817 LoadU(df, d + xs + xs + 1), LoadU(df, d + xs3 + 2)); 818 retval = MulAdd(sum, sum, retval); 819 } 820 { 821 /* 0_________ 822 1______*__ 823 2_____*___ 824 3_____*___ 825 4____0____ 826 5___*_____ 827 6___*_____ 828 7__*______ 829 8_________ */ 830 auto sum = Sum(LoadU(df, d - xs3 + 2), LoadU(df, d - xs - xs + 1), 831 LoadU(df, d - xs + 1), center, LoadU(df, d + xs - 1), 832 LoadU(df, d + xs + xs - 1), LoadU(df, d + xs3 - 2)); 833 retval = MulAdd(sum, sum, retval); 834 } 835 { 836 /* 0_________ 837 1_________ 838 2_*_______ 839 3__**_____ 840 4____0____ 841 5_____**__ 842 6_______*_ 843 7_________ 844 8_________ */ 845 auto sum = Sum(LoadU(df, d - xs - xs - 3), LoadU(df, d - xs - 2), 846 LoadU(df, d - xs - 1), center, LoadU(df, d + xs + 1), 847 LoadU(df, d + xs + 2), LoadU(df, d + xs + xs + 3)); 848 retval = MulAdd(sum, sum, retval); 849 } 850 { 851 /* 0_________ 852 1_________ 853 2_______*_ 854 3_____**__ 855 4____0____ 856 5__**_____ 857 6_*_______ 858 7_________ 859 8_________ */ 860 auto sum = Sum(LoadU(df, d - xs - xs + 3), LoadU(df, d - xs + 2), 861 LoadU(df, d - xs + 1), center, LoadU(df, d + xs - 1), 862 LoadU(df, d + xs - 2), LoadU(df, d + xs + xs - 3)); 863 retval = MulAdd(sum, sum, retval); 864 } 865 { 866 /* 0_________ 867 1_________ 868 2_________ 869 3______*** 870 4___*0*___ 871 5***______ 872 6_________ 873 7_________ 874 8_________ */ 875 876 auto sum = 877 Sum(LoadU(df, d + xs - 4), LoadU(df, d + xs - 3), LoadU(df, d + xs - 2), 878 LoadU(df, d - 1), center, LoadU(df, d + 1), LoadU(df, d - xs + 2), 879 LoadU(df, d - xs + 3), LoadU(df, d - xs + 4)); 880 retval = MulAdd(sum, sum, retval); 881 } 882 { 883 /* 0_________ 884 1_________ 885 2_________ 886 3***______ 887 4___*0*___ 888 5______*** 889 6_________ 890 7_________ 891 8_________ */ 892 auto sum = 893 Sum(LoadU(df, d - xs - 4), LoadU(df, d - xs - 3), LoadU(df, d - xs - 2), 894 LoadU(df, d - 1), center, LoadU(df, d + 1), LoadU(df, d + xs + 2), 895 LoadU(df, d + xs + 3), LoadU(df, d + xs + 4)); 896 retval = MulAdd(sum, sum, retval); 897 } 898 { 899 /* 0___*_____ 900 1___*_____ 901 2___*_____ 902 3____*____ 903 4____0____ 904 5____*____ 905 6_____*___ 906 7_____*___ 907 8_____*___ */ 908 auto sum = Sum(LoadU(df, d - xs3 - xs - 1), LoadU(df, d - xs3 - 1), 909 LoadU(df, d - xs - xs - 1), LoadU(df, d - xs), center, 910 LoadU(df, d + xs), LoadU(df, d + xs + xs + 1), 911 LoadU(df, d + xs3 + 1), LoadU(df, d + xs3 + xs + 1)); 912 retval = MulAdd(sum, sum, retval); 913 } 914 { 915 /* 0_____*___ 916 1_____*___ 917 2____ *___ 918 3____*____ 919 4____0____ 920 5____*____ 921 6___*_____ 922 7___*_____ 923 8___*_____ */ 924 auto sum = Sum(LoadU(df, d - xs3 - xs + 1), LoadU(df, d - xs3 + 1), 925 LoadU(df, d - xs - xs + 1), LoadU(df, d - xs), center, 926 LoadU(df, d + xs), LoadU(df, d + xs + xs - 1), 927 LoadU(df, d + xs3 - 1), LoadU(df, d + xs3 + xs - 1)); 928 retval = MulAdd(sum, sum, retval); 929 } 930 return retval; 931 } 932 933 // Returns MaltaUnit. Avoids bounds-checks when x0 and y0 are known 934 // to be far enough from the image borders. "diffs" is a packed image. 935 template <class Tag> 936 static BUTTERAUGLI_INLINE float PaddedMaltaUnit(const ImageF& diffs, 937 const size_t x0, 938 const size_t y0) { 939 const float* BUTTERAUGLI_RESTRICT d = diffs.ConstRow(y0) + x0; 940 const HWY_CAPPED(float, 1) df; 941 if ((x0 >= 4 && y0 >= 4 && x0 < (diffs.xsize() - 4) && 942 y0 < (diffs.ysize() - 4))) { 943 return GetLane(MaltaUnit(Tag(), df, d, diffs.PixelsPerRow())); 944 } 945 946 float borderimage[12 * 9]; // round up to 4 947 for (int dy = 0; dy < 9; ++dy) { 948 int y = y0 + dy - 4; 949 if (y < 0 || static_cast<size_t>(y) >= diffs.ysize()) { 950 for (int dx = 0; dx < 12; ++dx) { 951 borderimage[dy * 12 + dx] = 0.0f; 952 } 953 continue; 954 } 955 956 const float* row_diffs = diffs.ConstRow(y); 957 for (int dx = 0; dx < 9; ++dx) { 958 int x = x0 + dx - 4; 959 if (x < 0 || static_cast<size_t>(x) >= diffs.xsize()) { 960 borderimage[dy * 12 + dx] = 0.0f; 961 } else { 962 borderimage[dy * 12 + dx] = row_diffs[x]; 963 } 964 } 965 std::fill(borderimage + dy * 12 + 9, borderimage + dy * 12 + 12, 0.0f); 966 } 967 return GetLane(MaltaUnit(Tag(), df, &borderimage[4 * 12 + 4], 12)); 968 } 969 970 template <class Tag> 971 static void MaltaDiffMapT(const Tag tag, const ImageF& lum0, const ImageF& lum1, 972 const double w_0gt1, const double w_0lt1, 973 const double norm1, const double len, 974 const double mulli, ImageF* HWY_RESTRICT diffs, 975 ImageF* HWY_RESTRICT block_diff_ac) { 976 JXL_DASSERT(SameSize(lum0, lum1) && SameSize(lum0, *diffs)); 977 const size_t xsize_ = lum0.xsize(); 978 const size_t ysize_ = lum0.ysize(); 979 980 const float kWeight0 = 0.5; 981 const float kWeight1 = 0.33; 982 983 const double w_pre0gt1 = mulli * std::sqrt(kWeight0 * w_0gt1) / (len * 2 + 1); 984 const double w_pre0lt1 = mulli * std::sqrt(kWeight1 * w_0lt1) / (len * 2 + 1); 985 const float norm2_0gt1 = w_pre0gt1 * norm1; 986 const float norm2_0lt1 = w_pre0lt1 * norm1; 987 988 for (size_t y = 0; y < ysize_; ++y) { 989 const float* HWY_RESTRICT row0 = lum0.ConstRow(y); 990 const float* HWY_RESTRICT row1 = lum1.ConstRow(y); 991 float* HWY_RESTRICT row_diffs = diffs->Row(y); 992 for (size_t x = 0; x < xsize_; ++x) { 993 const float absval = 0.5f * (std::abs(row0[x]) + std::abs(row1[x])); 994 const float diff = row0[x] - row1[x]; 995 const float scaler = norm2_0gt1 / (static_cast<float>(norm1) + absval); 996 997 // Primary symmetric quadratic objective. 998 row_diffs[x] = scaler * diff; 999 1000 const float scaler2 = norm2_0lt1 / (static_cast<float>(norm1) + absval); 1001 const double fabs0 = std::fabs(row0[x]); 1002 1003 // Secondary half-open quadratic objectives. 1004 const double too_small = 0.55 * fabs0; 1005 const double too_big = 1.05 * fabs0; 1006 1007 if (row0[x] < 0) { 1008 if (row1[x] > -too_small) { 1009 double impact = scaler2 * (row1[x] + too_small); 1010 row_diffs[x] -= impact; 1011 } else if (row1[x] < -too_big) { 1012 double impact = scaler2 * (-row1[x] - too_big); 1013 row_diffs[x] += impact; 1014 } 1015 } else { 1016 if (row1[x] < too_small) { 1017 double impact = scaler2 * (too_small - row1[x]); 1018 row_diffs[x] += impact; 1019 } else if (row1[x] > too_big) { 1020 double impact = scaler2 * (row1[x] - too_big); 1021 row_diffs[x] -= impact; 1022 } 1023 } 1024 } 1025 } 1026 1027 size_t y0 = 0; 1028 // Top 1029 for (; y0 < 4; ++y0) { 1030 float* BUTTERAUGLI_RESTRICT row_diff = block_diff_ac->Row(y0); 1031 for (size_t x0 = 0; x0 < xsize_; ++x0) { 1032 row_diff[x0] += PaddedMaltaUnit<Tag>(*diffs, x0, y0); 1033 } 1034 } 1035 1036 const HWY_FULL(float) df; 1037 const size_t aligned_x = std::max(static_cast<size_t>(4), Lanes(df)); 1038 const intptr_t stride = diffs->PixelsPerRow(); 1039 1040 // Middle 1041 for (; y0 < ysize_ - 4; ++y0) { 1042 const float* BUTTERAUGLI_RESTRICT row_in = diffs->ConstRow(y0); 1043 float* BUTTERAUGLI_RESTRICT row_diff = block_diff_ac->Row(y0); 1044 size_t x0 = 0; 1045 for (; x0 < aligned_x; ++x0) { 1046 row_diff[x0] += PaddedMaltaUnit<Tag>(*diffs, x0, y0); 1047 } 1048 for (; x0 + Lanes(df) + 4 <= xsize_; x0 += Lanes(df)) { 1049 auto diff = Load(df, row_diff + x0); 1050 diff = Add(diff, MaltaUnit(Tag(), df, row_in + x0, stride)); 1051 Store(diff, df, row_diff + x0); 1052 } 1053 1054 for (; x0 < xsize_; ++x0) { 1055 row_diff[x0] += PaddedMaltaUnit<Tag>(*diffs, x0, y0); 1056 } 1057 } 1058 1059 // Bottom 1060 for (; y0 < ysize_; ++y0) { 1061 float* BUTTERAUGLI_RESTRICT row_diff = block_diff_ac->Row(y0); 1062 for (size_t x0 = 0; x0 < xsize_; ++x0) { 1063 row_diff[x0] += PaddedMaltaUnit<Tag>(*diffs, x0, y0); 1064 } 1065 } 1066 } 1067 1068 // Need non-template wrapper functions for HWY_EXPORT. 1069 void MaltaDiffMap(const ImageF& lum0, const ImageF& lum1, const double w_0gt1, 1070 const double w_0lt1, const double norm1, 1071 ImageF* HWY_RESTRICT diffs, 1072 ImageF* HWY_RESTRICT block_diff_ac) { 1073 const double len = 3.75; 1074 static const double mulli = 0.39905817637; 1075 MaltaDiffMapT(MaltaTag(), lum0, lum1, w_0gt1, w_0lt1, norm1, len, mulli, 1076 diffs, block_diff_ac); 1077 } 1078 1079 void MaltaDiffMapLF(const ImageF& lum0, const ImageF& lum1, const double w_0gt1, 1080 const double w_0lt1, const double norm1, 1081 ImageF* HWY_RESTRICT diffs, 1082 ImageF* HWY_RESTRICT block_diff_ac) { 1083 const double len = 3.75; 1084 static const double mulli = 0.611612573796; 1085 MaltaDiffMapT(MaltaTagLF(), lum0, lum1, w_0gt1, w_0lt1, norm1, len, mulli, 1086 diffs, block_diff_ac); 1087 } 1088 1089 void CombineChannelsForMasking(const ImageF* hf, const ImageF* uhf, 1090 ImageF* out) { 1091 // Only X and Y components are involved in masking. B's influence 1092 // is considered less important in the high frequency area, and we 1093 // don't model masking from lower frequency signals. 1094 static const float muls[3] = { 1095 2.5f, 1096 0.4f, 1097 0.4f, 1098 }; 1099 // Silly and unoptimized approach here. TODO(jyrki): rework this. 1100 for (size_t y = 0; y < hf[0].ysize(); ++y) { 1101 const float* BUTTERAUGLI_RESTRICT row_y_hf = hf[1].Row(y); 1102 const float* BUTTERAUGLI_RESTRICT row_y_uhf = uhf[1].Row(y); 1103 const float* BUTTERAUGLI_RESTRICT row_x_hf = hf[0].Row(y); 1104 const float* BUTTERAUGLI_RESTRICT row_x_uhf = uhf[0].Row(y); 1105 float* BUTTERAUGLI_RESTRICT row = out->Row(y); 1106 for (size_t x = 0; x < hf[0].xsize(); ++x) { 1107 float xdiff = (row_x_uhf[x] + row_x_hf[x]) * muls[0]; 1108 float ydiff = row_y_uhf[x] * muls[1] + row_y_hf[x] * muls[2]; 1109 row[x] = xdiff * xdiff + ydiff * ydiff; 1110 row[x] = std::sqrt(row[x]); 1111 } 1112 } 1113 } 1114 1115 void DiffPrecompute(const ImageF& xyb, float mul, float bias_arg, ImageF* out) { 1116 const size_t xsize = xyb.xsize(); 1117 const size_t ysize = xyb.ysize(); 1118 const float bias = mul * bias_arg; 1119 const float sqrt_bias = std::sqrt(bias); 1120 for (size_t y = 0; y < ysize; ++y) { 1121 const float* BUTTERAUGLI_RESTRICT row_in = xyb.Row(y); 1122 float* BUTTERAUGLI_RESTRICT row_out = out->Row(y); 1123 for (size_t x = 0; x < xsize; ++x) { 1124 // kBias makes sqrt behave more linearly. 1125 row_out[x] = std::sqrt(mul * std::abs(row_in[x]) + bias) - sqrt_bias; 1126 } 1127 } 1128 } 1129 1130 // std::log(80.0) / std::log(255.0); 1131 constexpr float kIntensityTargetNormalizationHack = 0.79079917404f; 1132 static const float kInternalGoodQualityThreshold = 1133 17.83f * kIntensityTargetNormalizationHack; 1134 static const float kGlobalScale = 1.0 / kInternalGoodQualityThreshold; 1135 1136 void StoreMin3(const float v, float& min0, float& min1, float& min2) { 1137 if (v < min2) { 1138 if (v < min0) { 1139 min2 = min1; 1140 min1 = min0; 1141 min0 = v; 1142 } else if (v < min1) { 1143 min2 = min1; 1144 min1 = v; 1145 } else { 1146 min2 = v; 1147 } 1148 } 1149 } 1150 1151 // Look for smooth areas near the area of degradation. 1152 // If the areas area generally smooth, don't do masking. 1153 void FuzzyErosion(const ImageF& from, ImageF* to) { 1154 const size_t xsize = from.xsize(); 1155 const size_t ysize = from.ysize(); 1156 static const int kStep = 3; 1157 for (size_t y = 0; y < ysize; ++y) { 1158 for (size_t x = 0; x < xsize; ++x) { 1159 float min0 = from.Row(y)[x]; 1160 float min1 = 2 * min0; 1161 float min2 = min1; 1162 if (x >= kStep) { 1163 float v = from.Row(y)[x - kStep]; 1164 StoreMin3(v, min0, min1, min2); 1165 if (y >= kStep) { 1166 float v = from.Row(y - kStep)[x - kStep]; 1167 StoreMin3(v, min0, min1, min2); 1168 } 1169 if (y < ysize - kStep) { 1170 float v = from.Row(y + kStep)[x - kStep]; 1171 StoreMin3(v, min0, min1, min2); 1172 } 1173 } 1174 if (x < xsize - kStep) { 1175 float v = from.Row(y)[x + kStep]; 1176 StoreMin3(v, min0, min1, min2); 1177 if (y >= kStep) { 1178 float v = from.Row(y - kStep)[x + kStep]; 1179 StoreMin3(v, min0, min1, min2); 1180 } 1181 if (y < ysize - kStep) { 1182 float v = from.Row(y + kStep)[x + kStep]; 1183 StoreMin3(v, min0, min1, min2); 1184 } 1185 } 1186 if (y >= kStep) { 1187 float v = from.Row(y - kStep)[x]; 1188 StoreMin3(v, min0, min1, min2); 1189 } 1190 if (y < ysize - kStep) { 1191 float v = from.Row(y + kStep)[x]; 1192 StoreMin3(v, min0, min1, min2); 1193 } 1194 to->Row(y)[x] = (0.45f * min0 + 0.3f * min1 + 0.25f * min2); 1195 } 1196 } 1197 } 1198 1199 // Compute values of local frequency and dc masking based on the activity 1200 // in the two images. img_diff_ac may be null. 1201 Status Mask(const ImageF& mask0, const ImageF& mask1, 1202 const ButteraugliParams& params, BlurTemp* blur_temp, 1203 ImageF* BUTTERAUGLI_RESTRICT mask, 1204 ImageF* BUTTERAUGLI_RESTRICT diff_ac) { 1205 const size_t xsize = mask0.xsize(); 1206 const size_t ysize = mask0.ysize(); 1207 JXL_ASSIGN_OR_RETURN(*mask, ImageF::Create(xsize, ysize)); 1208 static const float kMul = 6.19424080439; 1209 static const float kBias = 12.61050594197; 1210 static const float kRadius = 2.7; 1211 JXL_ASSIGN_OR_RETURN(ImageF diff0, ImageF::Create(xsize, ysize)); 1212 JXL_ASSIGN_OR_RETURN(ImageF diff1, ImageF::Create(xsize, ysize)); 1213 JXL_ASSIGN_OR_RETURN(ImageF blurred0, ImageF::Create(xsize, ysize)); 1214 JXL_ASSIGN_OR_RETURN(ImageF blurred1, ImageF::Create(xsize, ysize)); 1215 DiffPrecompute(mask0, kMul, kBias, &diff0); 1216 DiffPrecompute(mask1, kMul, kBias, &diff1); 1217 JXL_RETURN_IF_ERROR(Blur(diff0, kRadius, params, blur_temp, &blurred0)); 1218 FuzzyErosion(blurred0, &diff0); 1219 JXL_RETURN_IF_ERROR(Blur(diff1, kRadius, params, blur_temp, &blurred1)); 1220 for (size_t y = 0; y < ysize; ++y) { 1221 for (size_t x = 0; x < xsize; ++x) { 1222 mask->Row(y)[x] = diff0.Row(y)[x]; 1223 if (diff_ac != nullptr) { 1224 static const float kMaskToErrorMul = 10.0; 1225 float diff = blurred0.Row(y)[x] - blurred1.Row(y)[x]; 1226 diff_ac->Row(y)[x] += kMaskToErrorMul * diff * diff; 1227 } 1228 } 1229 } 1230 return true; 1231 } 1232 1233 // `diff_ac` may be null. 1234 Status MaskPsychoImage(const PsychoImage& pi0, const PsychoImage& pi1, 1235 const size_t xsize, const size_t ysize, 1236 const ButteraugliParams& params, BlurTemp* blur_temp, 1237 ImageF* BUTTERAUGLI_RESTRICT mask, 1238 ImageF* BUTTERAUGLI_RESTRICT diff_ac) { 1239 JXL_ASSIGN_OR_RETURN(ImageF mask0, ImageF::Create(xsize, ysize)); 1240 JXL_ASSIGN_OR_RETURN(ImageF mask1, ImageF::Create(xsize, ysize)); 1241 CombineChannelsForMasking(&pi0.hf[0], &pi0.uhf[0], &mask0); 1242 CombineChannelsForMasking(&pi1.hf[0], &pi1.uhf[0], &mask1); 1243 JXL_RETURN_IF_ERROR(Mask(mask0, mask1, params, blur_temp, mask, diff_ac)); 1244 return true; 1245 } 1246 1247 double MaskY(double delta) { 1248 static const double offset = 0.829591754942; 1249 static const double scaler = 0.451936922203; 1250 static const double mul = 2.5485944793; 1251 const double c = mul / ((scaler * delta) + offset); 1252 const double retval = kGlobalScale * (1.0 + c); 1253 return retval * retval; 1254 } 1255 1256 double MaskDcY(double delta) { 1257 static const double offset = 0.20025578522; 1258 static const double scaler = 3.87449418804; 1259 static const double mul = 0.505054525019; 1260 const double c = mul / ((scaler * delta) + offset); 1261 const double retval = kGlobalScale * (1.0 + c); 1262 return retval * retval; 1263 } 1264 1265 inline float MaskColor(const float color[3], const float mask) { 1266 return color[0] * mask + color[1] * mask + color[2] * mask; 1267 } 1268 1269 // Diffmap := sqrt of sum{diff images by multiplied by X and Y/B masks} 1270 void CombineChannelsToDiffmap(const ImageF& mask, const Image3F& block_diff_dc, 1271 const Image3F& block_diff_ac, float xmul, 1272 ImageF* result) { 1273 JXL_CHECK(SameSize(mask, *result)); 1274 size_t xsize = mask.xsize(); 1275 size_t ysize = mask.ysize(); 1276 for (size_t y = 0; y < ysize; ++y) { 1277 float* BUTTERAUGLI_RESTRICT row_out = result->Row(y); 1278 for (size_t x = 0; x < xsize; ++x) { 1279 float val = mask.Row(y)[x]; 1280 float maskval = MaskY(val); 1281 float dc_maskval = MaskDcY(val); 1282 float diff_dc[3]; 1283 float diff_ac[3]; 1284 for (int i = 0; i < 3; ++i) { 1285 diff_dc[i] = block_diff_dc.PlaneRow(i, y)[x]; 1286 diff_ac[i] = block_diff_ac.PlaneRow(i, y)[x]; 1287 } 1288 diff_ac[0] *= xmul; 1289 diff_dc[0] *= xmul; 1290 row_out[x] = std::sqrt(MaskColor(diff_dc, dc_maskval) + 1291 MaskColor(diff_ac, maskval)); 1292 } 1293 } 1294 } 1295 1296 // Adds weighted L2 difference between i0 and i1 to diffmap. 1297 static void L2Diff(const ImageF& i0, const ImageF& i1, const float w, 1298 ImageF* BUTTERAUGLI_RESTRICT diffmap) { 1299 if (w == 0) return; 1300 1301 const HWY_FULL(float) d; 1302 const auto weight = Set(d, w); 1303 1304 for (size_t y = 0; y < i0.ysize(); ++y) { 1305 const float* BUTTERAUGLI_RESTRICT row0 = i0.ConstRow(y); 1306 const float* BUTTERAUGLI_RESTRICT row1 = i1.ConstRow(y); 1307 float* BUTTERAUGLI_RESTRICT row_diff = diffmap->Row(y); 1308 1309 for (size_t x = 0; x < i0.xsize(); x += Lanes(d)) { 1310 const auto diff = Sub(Load(d, row0 + x), Load(d, row1 + x)); 1311 const auto diff2 = Mul(diff, diff); 1312 const auto prev = Load(d, row_diff + x); 1313 Store(MulAdd(diff2, weight, prev), d, row_diff + x); 1314 } 1315 } 1316 } 1317 1318 // Initializes diffmap to the weighted L2 difference between i0 and i1. 1319 static void SetL2Diff(const ImageF& i0, const ImageF& i1, const float w, 1320 ImageF* BUTTERAUGLI_RESTRICT diffmap) { 1321 if (w == 0) return; 1322 1323 const HWY_FULL(float) d; 1324 const auto weight = Set(d, w); 1325 1326 for (size_t y = 0; y < i0.ysize(); ++y) { 1327 const float* BUTTERAUGLI_RESTRICT row0 = i0.ConstRow(y); 1328 const float* BUTTERAUGLI_RESTRICT row1 = i1.ConstRow(y); 1329 float* BUTTERAUGLI_RESTRICT row_diff = diffmap->Row(y); 1330 1331 for (size_t x = 0; x < i0.xsize(); x += Lanes(d)) { 1332 const auto diff = Sub(Load(d, row0 + x), Load(d, row1 + x)); 1333 const auto diff2 = Mul(diff, diff); 1334 Store(Mul(diff2, weight), d, row_diff + x); 1335 } 1336 } 1337 } 1338 1339 // i0 is the original image. 1340 // i1 is the deformed copy. 1341 static void L2DiffAsymmetric(const ImageF& i0, const ImageF& i1, float w_0gt1, 1342 float w_0lt1, 1343 ImageF* BUTTERAUGLI_RESTRICT diffmap) { 1344 if (w_0gt1 == 0 && w_0lt1 == 0) { 1345 return; 1346 } 1347 1348 const HWY_FULL(float) d; 1349 const auto vw_0gt1 = Set(d, w_0gt1 * 0.8); 1350 const auto vw_0lt1 = Set(d, w_0lt1 * 0.8); 1351 1352 for (size_t y = 0; y < i0.ysize(); ++y) { 1353 const float* BUTTERAUGLI_RESTRICT row0 = i0.Row(y); 1354 const float* BUTTERAUGLI_RESTRICT row1 = i1.Row(y); 1355 float* BUTTERAUGLI_RESTRICT row_diff = diffmap->Row(y); 1356 1357 for (size_t x = 0; x < i0.xsize(); x += Lanes(d)) { 1358 const auto val0 = Load(d, row0 + x); 1359 const auto val1 = Load(d, row1 + x); 1360 1361 // Primary symmetric quadratic objective. 1362 const auto diff = Sub(val0, val1); 1363 auto total = MulAdd(Mul(diff, diff), vw_0gt1, Load(d, row_diff + x)); 1364 1365 // Secondary half-open quadratic objectives. 1366 const auto fabs0 = Abs(val0); 1367 const auto too_small = Mul(Set(d, 0.4), fabs0); 1368 const auto too_big = fabs0; 1369 1370 const auto if_neg = IfThenElse( 1371 Gt(val1, Neg(too_small)), Add(val1, too_small), 1372 IfThenElseZero(Lt(val1, Neg(too_big)), Sub(Neg(val1), too_big))); 1373 const auto if_pos = 1374 IfThenElse(Lt(val1, too_small), Sub(too_small, val1), 1375 IfThenElseZero(Gt(val1, too_big), Sub(val1, too_big))); 1376 const auto v = IfThenElse(Lt(val0, Zero(d)), if_neg, if_pos); 1377 total = MulAdd(vw_0lt1, Mul(v, v), total); 1378 Store(total, d, row_diff + x); 1379 } 1380 } 1381 } 1382 1383 // A simple HDR compatible gamma function. 1384 template <class DF, class V> 1385 V Gamma(const DF df, V v) { 1386 // ln(2) constant folded in because we want std::log but have FastLog2f. 1387 const auto kRetMul = Set(df, 19.245013259874995f * 0.693147180559945f); 1388 const auto kRetAdd = Set(df, -23.16046239805755); 1389 // This should happen rarely, but may lead to a NaN in log, which is 1390 // undesirable. Since negative photons don't exist we solve the NaNs by 1391 // clamping here. 1392 v = ZeroIfNegative(v); 1393 1394 const auto biased = Add(v, Set(df, 9.9710635769299145)); 1395 const auto log = FastLog2f(df, biased); 1396 // We could fold this into a custom Log2 polynomial, but there would be 1397 // relatively little gain. 1398 return MulAdd(kRetMul, log, kRetAdd); 1399 } 1400 1401 template <bool Clamp, class DF, class V> 1402 BUTTERAUGLI_INLINE void OpsinAbsorbance(const DF df, const V& in0, const V& in1, 1403 const V& in2, V* JXL_RESTRICT out0, 1404 V* JXL_RESTRICT out1, 1405 V* JXL_RESTRICT out2) { 1406 // https://en.wikipedia.org/wiki/Photopsin absorbance modeling. 1407 static const double mixi0 = 0.29956550340058319; 1408 static const double mixi1 = 0.63373087833825936; 1409 static const double mixi2 = 0.077705617820981968; 1410 static const double mixi3 = 1.7557483643287353; 1411 static const double mixi4 = 0.22158691104574774; 1412 static const double mixi5 = 0.69391388044116142; 1413 static const double mixi6 = 0.0987313588422; 1414 static const double mixi7 = 1.7557483643287353; 1415 static const double mixi8 = 0.02; 1416 static const double mixi9 = 0.02; 1417 static const double mixi10 = 0.20480129041026129; 1418 static const double mixi11 = 12.226454707163354; 1419 1420 const V mix0 = Set(df, mixi0); 1421 const V mix1 = Set(df, mixi1); 1422 const V mix2 = Set(df, mixi2); 1423 const V mix3 = Set(df, mixi3); 1424 const V mix4 = Set(df, mixi4); 1425 const V mix5 = Set(df, mixi5); 1426 const V mix6 = Set(df, mixi6); 1427 const V mix7 = Set(df, mixi7); 1428 const V mix8 = Set(df, mixi8); 1429 const V mix9 = Set(df, mixi9); 1430 const V mix10 = Set(df, mixi10); 1431 const V mix11 = Set(df, mixi11); 1432 1433 *out0 = MulAdd(mix0, in0, MulAdd(mix1, in1, MulAdd(mix2, in2, mix3))); 1434 *out1 = MulAdd(mix4, in0, MulAdd(mix5, in1, MulAdd(mix6, in2, mix7))); 1435 *out2 = MulAdd(mix8, in0, MulAdd(mix9, in1, MulAdd(mix10, in2, mix11))); 1436 1437 if (Clamp) { 1438 *out0 = Max(*out0, mix3); 1439 *out1 = Max(*out1, mix7); 1440 *out2 = Max(*out2, mix11); 1441 } 1442 } 1443 1444 // `blurred` is a temporary image used inside this function and not returned. 1445 Status OpsinDynamicsImage(const Image3F& rgb, const ButteraugliParams& params, 1446 Image3F* blurred, BlurTemp* blur_temp, Image3F* xyb) { 1447 const double kSigma = 1.2; 1448 JXL_RETURN_IF_ERROR( 1449 Blur(rgb.Plane(0), kSigma, params, blur_temp, &blurred->Plane(0))); 1450 JXL_RETURN_IF_ERROR( 1451 Blur(rgb.Plane(1), kSigma, params, blur_temp, &blurred->Plane(1))); 1452 JXL_RETURN_IF_ERROR( 1453 Blur(rgb.Plane(2), kSigma, params, blur_temp, &blurred->Plane(2))); 1454 const HWY_FULL(float) df; 1455 const auto intensity_target_multiplier = Set(df, params.intensity_target); 1456 for (size_t y = 0; y < rgb.ysize(); ++y) { 1457 const float* row_r = rgb.ConstPlaneRow(0, y); 1458 const float* row_g = rgb.ConstPlaneRow(1, y); 1459 const float* row_b = rgb.ConstPlaneRow(2, y); 1460 const float* row_blurred_r = blurred->ConstPlaneRow(0, y); 1461 const float* row_blurred_g = blurred->ConstPlaneRow(1, y); 1462 const float* row_blurred_b = blurred->ConstPlaneRow(2, y); 1463 float* row_out_x = xyb->PlaneRow(0, y); 1464 float* row_out_y = xyb->PlaneRow(1, y); 1465 float* row_out_b = xyb->PlaneRow(2, y); 1466 const auto min = Set(df, 1e-4f); 1467 for (size_t x = 0; x < rgb.xsize(); x += Lanes(df)) { 1468 auto sensitivity0 = Undefined(df); 1469 auto sensitivity1 = Undefined(df); 1470 auto sensitivity2 = Undefined(df); 1471 { 1472 // Calculate sensitivity based on the smoothed image gamma derivative. 1473 auto pre_mixed0 = Undefined(df); 1474 auto pre_mixed1 = Undefined(df); 1475 auto pre_mixed2 = Undefined(df); 1476 OpsinAbsorbance<true>( 1477 df, Mul(Load(df, row_blurred_r + x), intensity_target_multiplier), 1478 Mul(Load(df, row_blurred_g + x), intensity_target_multiplier), 1479 Mul(Load(df, row_blurred_b + x), intensity_target_multiplier), 1480 &pre_mixed0, &pre_mixed1, &pre_mixed2); 1481 pre_mixed0 = Max(pre_mixed0, min); 1482 pre_mixed1 = Max(pre_mixed1, min); 1483 pre_mixed2 = Max(pre_mixed2, min); 1484 sensitivity0 = Div(Gamma(df, pre_mixed0), pre_mixed0); 1485 sensitivity1 = Div(Gamma(df, pre_mixed1), pre_mixed1); 1486 sensitivity2 = Div(Gamma(df, pre_mixed2), pre_mixed2); 1487 sensitivity0 = Max(sensitivity0, min); 1488 sensitivity1 = Max(sensitivity1, min); 1489 sensitivity2 = Max(sensitivity2, min); 1490 } 1491 auto cur_mixed0 = Undefined(df); 1492 auto cur_mixed1 = Undefined(df); 1493 auto cur_mixed2 = Undefined(df); 1494 OpsinAbsorbance<false>( 1495 df, Mul(Load(df, row_r + x), intensity_target_multiplier), 1496 Mul(Load(df, row_g + x), intensity_target_multiplier), 1497 Mul(Load(df, row_b + x), intensity_target_multiplier), &cur_mixed0, 1498 &cur_mixed1, &cur_mixed2); 1499 cur_mixed0 = Mul(cur_mixed0, sensitivity0); 1500 cur_mixed1 = Mul(cur_mixed1, sensitivity1); 1501 cur_mixed2 = Mul(cur_mixed2, sensitivity2); 1502 // This is a kludge. The negative values should be zeroed away before 1503 // blurring. Ideally there would be no negative values in the first place. 1504 const auto min01 = Set(df, 1.7557483643287353f); 1505 const auto min2 = Set(df, 12.226454707163354f); 1506 cur_mixed0 = Max(cur_mixed0, min01); 1507 cur_mixed1 = Max(cur_mixed1, min01); 1508 cur_mixed2 = Max(cur_mixed2, min2); 1509 1510 Store(Sub(cur_mixed0, cur_mixed1), df, row_out_x + x); 1511 Store(Add(cur_mixed0, cur_mixed1), df, row_out_y + x); 1512 Store(cur_mixed2, df, row_out_b + x); 1513 } 1514 } 1515 return true; 1516 } 1517 1518 Status ButteraugliDiffmapInPlace(Image3F& image0, Image3F& image1, 1519 const ButteraugliParams& params, 1520 ImageF& diffmap) { 1521 // image0 and image1 are in linear sRGB color space 1522 const size_t xsize = image0.xsize(); 1523 const size_t ysize = image0.ysize(); 1524 BlurTemp blur_temp; 1525 { 1526 // Convert image0 and image1 to XYB in-place 1527 JXL_ASSIGN_OR_RETURN(Image3F temp, Image3F::Create(xsize, ysize)); 1528 JXL_RETURN_IF_ERROR( 1529 OpsinDynamicsImage(image0, params, &temp, &blur_temp, &image0)); 1530 JXL_RETURN_IF_ERROR( 1531 OpsinDynamicsImage(image1, params, &temp, &blur_temp, &image1)); 1532 } 1533 // image0 and image1 are in XYB color space 1534 JXL_ASSIGN_OR_RETURN(ImageF block_diff_dc, ImageF::Create(xsize, ysize)); 1535 ZeroFillImage(&block_diff_dc); 1536 { 1537 // separate out LF components from image0 and image1 and compute the dc 1538 // diff image from them 1539 JXL_ASSIGN_OR_RETURN(Image3F lf0, Image3F::Create(xsize, ysize)); 1540 JXL_ASSIGN_OR_RETURN(Image3F lf1, Image3F::Create(xsize, ysize)); 1541 JXL_RETURN_IF_ERROR( 1542 SeparateLFAndMF(params, image0, &lf0, &image0, &blur_temp)); 1543 JXL_RETURN_IF_ERROR( 1544 SeparateLFAndMF(params, image1, &lf1, &image1, &blur_temp)); 1545 for (size_t c = 0; c < 3; ++c) { 1546 L2Diff(lf0.Plane(c), lf1.Plane(c), wmul[6 + c], &block_diff_dc); 1547 } 1548 } 1549 // image0 and image1 are MF residuals (before blurring) in XYB color space 1550 ImageF hf0[2]; 1551 ImageF hf1[2]; 1552 JXL_RETURN_IF_ERROR(SeparateMFAndHF(params, &image0, &hf0[0], &blur_temp)); 1553 JXL_RETURN_IF_ERROR(SeparateMFAndHF(params, &image1, &hf1[0], &blur_temp)); 1554 // image0 and image1 are MF-images in XYB color space 1555 1556 JXL_ASSIGN_OR_RETURN(ImageF block_diff_ac, ImageF::Create(xsize, ysize)); 1557 ZeroFillImage(&block_diff_ac); 1558 // start accumulating ac diff image from MF images 1559 { 1560 JXL_ASSIGN_OR_RETURN(ImageF diffs, ImageF::Create(xsize, ysize)); 1561 MaltaDiffMapLF(image0.Plane(1), image1.Plane(1), wMfMalta, wMfMalta, 1562 norm1Mf, &diffs, &block_diff_ac); 1563 MaltaDiffMapLF(image0.Plane(0), image1.Plane(0), wMfMaltaX, wMfMaltaX, 1564 norm1MfX, &diffs, &block_diff_ac); 1565 } 1566 for (size_t c = 0; c < 3; ++c) { 1567 L2Diff(image0.Plane(c), image1.Plane(c), wmul[3 + c], &block_diff_ac); 1568 } 1569 // we will not need the MF-images and more, so we deallocate them to reduce 1570 // peak memory usage 1571 image0 = Image3F(); 1572 image1 = Image3F(); 1573 1574 ImageF uhf0[2]; 1575 ImageF uhf1[2]; 1576 JXL_RETURN_IF_ERROR(SeparateHFAndUHF(params, &hf0[0], &uhf0[0], &blur_temp)); 1577 JXL_RETURN_IF_ERROR(SeparateHFAndUHF(params, &hf1[0], &uhf1[0], &blur_temp)); 1578 1579 // continue accumulating ac diff image from HF and UHF images 1580 const float hf_asymmetry = params.hf_asymmetry; 1581 { 1582 JXL_ASSIGN_OR_RETURN(ImageF diffs, ImageF::Create(xsize, ysize)); 1583 MaltaDiffMap(uhf0[1], uhf1[1], wUhfMalta * hf_asymmetry, 1584 wUhfMalta / hf_asymmetry, norm1Uhf, &diffs, &block_diff_ac); 1585 MaltaDiffMap(uhf0[0], uhf1[0], wUhfMaltaX * hf_asymmetry, 1586 wUhfMaltaX / hf_asymmetry, norm1UhfX, &diffs, &block_diff_ac); 1587 MaltaDiffMapLF(hf0[1], hf1[1], wHfMalta * std::sqrt(hf_asymmetry), 1588 wHfMalta / std::sqrt(hf_asymmetry), norm1Hf, &diffs, 1589 &block_diff_ac); 1590 MaltaDiffMapLF(hf0[0], hf1[0], wHfMaltaX * std::sqrt(hf_asymmetry), 1591 wHfMaltaX / std::sqrt(hf_asymmetry), norm1HfX, &diffs, 1592 &block_diff_ac); 1593 } 1594 for (size_t c = 0; c < 2; ++c) { 1595 L2DiffAsymmetric(hf0[c], hf1[c], wmul[c] * hf_asymmetry, 1596 wmul[c] / hf_asymmetry, &block_diff_ac); 1597 } 1598 1599 // compute mask image from HF and UHF X and Y images 1600 JXL_ASSIGN_OR_RETURN(ImageF mask, ImageF::Create(xsize, ysize)); 1601 { 1602 JXL_ASSIGN_OR_RETURN(ImageF mask0, ImageF::Create(xsize, ysize)); 1603 JXL_ASSIGN_OR_RETURN(ImageF mask1, ImageF::Create(xsize, ysize)); 1604 CombineChannelsForMasking(&hf0[0], &uhf0[0], &mask0); 1605 CombineChannelsForMasking(&hf1[0], &uhf1[0], &mask1); 1606 DeallocateHFAndUHF(&hf1[0], &uhf1[0]); 1607 DeallocateHFAndUHF(&hf0[0], &uhf0[0]); 1608 JXL_RETURN_IF_ERROR( 1609 Mask(mask0, mask1, params, &blur_temp, &mask, &block_diff_ac)); 1610 } 1611 1612 // compute final diffmap from mask image and ac and dc diff images 1613 JXL_ASSIGN_OR_RETURN(diffmap, ImageF::Create(xsize, ysize)); 1614 for (size_t y = 0; y < ysize; ++y) { 1615 const float* row_dc = block_diff_dc.Row(y); 1616 const float* row_ac = block_diff_ac.Row(y); 1617 float* row_out = diffmap.Row(y); 1618 for (size_t x = 0; x < xsize; ++x) { 1619 const float val = mask.Row(y)[x]; 1620 row_out[x] = sqrt(row_dc[x] * MaskDcY(val) + row_ac[x] * MaskY(val)); 1621 } 1622 } 1623 return true; 1624 } 1625 1626 // NOLINTNEXTLINE(google-readability-namespace-comments) 1627 } // namespace HWY_NAMESPACE 1628 } // namespace jxl 1629 HWY_AFTER_NAMESPACE(); 1630 1631 #if HWY_ONCE 1632 namespace jxl { 1633 1634 HWY_EXPORT(SeparateFrequencies); // Local function. 1635 HWY_EXPORT(MaskPsychoImage); // Local function. 1636 HWY_EXPORT(L2DiffAsymmetric); // Local function. 1637 HWY_EXPORT(L2Diff); // Local function. 1638 HWY_EXPORT(SetL2Diff); // Local function. 1639 HWY_EXPORT(CombineChannelsToDiffmap); // Local function. 1640 HWY_EXPORT(MaltaDiffMap); // Local function. 1641 HWY_EXPORT(MaltaDiffMapLF); // Local function. 1642 HWY_EXPORT(OpsinDynamicsImage); // Local function. 1643 HWY_EXPORT(ButteraugliDiffmapInPlace); // Local function. 1644 1645 #if BUTTERAUGLI_ENABLE_CHECKS 1646 1647 static inline bool IsNan(const float x) { 1648 uint32_t bits; 1649 memcpy(&bits, &x, sizeof(bits)); 1650 const uint32_t bitmask_exp = 0x7F800000; 1651 return (bits & bitmask_exp) == bitmask_exp && (bits & 0x7FFFFF); 1652 } 1653 1654 static inline bool IsNan(const double x) { 1655 uint64_t bits; 1656 memcpy(&bits, &x, sizeof(bits)); 1657 return (0x7ff0000000000001ULL <= bits && bits <= 0x7fffffffffffffffULL) || 1658 (0xfff0000000000001ULL <= bits && bits <= 0xffffffffffffffffULL); 1659 } 1660 1661 static inline void CheckImage(const ImageF& image, const char* name) { 1662 for (size_t y = 0; y < image.ysize(); ++y) { 1663 const float* BUTTERAUGLI_RESTRICT row = image.Row(y); 1664 for (size_t x = 0; x < image.xsize(); ++x) { 1665 if (IsNan(row[x])) { 1666 printf("NAN: Image %s @ %" PRIuS ",%" PRIuS " (of %" PRIuS ",%" PRIuS 1667 ")\n", 1668 name, x, y, image.xsize(), image.ysize()); 1669 exit(1); 1670 } 1671 } 1672 } 1673 } 1674 1675 #define CHECK_NAN(x, str) \ 1676 do { \ 1677 if (IsNan(x)) { \ 1678 printf("%d: %s\n", __LINE__, str); \ 1679 abort(); \ 1680 } \ 1681 } while (0) 1682 1683 #define CHECK_IMAGE(image, name) CheckImage(image, name) 1684 1685 #else // BUTTERAUGLI_ENABLE_CHECKS 1686 1687 #define CHECK_NAN(x, str) 1688 #define CHECK_IMAGE(image, name) 1689 1690 #endif // BUTTERAUGLI_ENABLE_CHECKS 1691 1692 // Calculate a 2x2 subsampled image for purposes of recursive butteraugli at 1693 // multiresolution. 1694 static StatusOr<Image3F> SubSample2x(const Image3F& in) { 1695 size_t xs = (in.xsize() + 1) / 2; 1696 size_t ys = (in.ysize() + 1) / 2; 1697 JXL_ASSIGN_OR_RETURN(Image3F retval, Image3F::Create(xs, ys)); 1698 for (size_t c = 0; c < 3; ++c) { 1699 for (size_t y = 0; y < ys; ++y) { 1700 for (size_t x = 0; x < xs; ++x) { 1701 retval.PlaneRow(c, y)[x] = 0; 1702 } 1703 } 1704 } 1705 for (size_t c = 0; c < 3; ++c) { 1706 for (size_t y = 0; y < in.ysize(); ++y) { 1707 for (size_t x = 0; x < in.xsize(); ++x) { 1708 retval.PlaneRow(c, y / 2)[x / 2] += 0.25f * in.PlaneRow(c, y)[x]; 1709 } 1710 } 1711 if ((in.xsize() & 1) != 0) { 1712 for (size_t y = 0; y < retval.ysize(); ++y) { 1713 size_t last_column = retval.xsize() - 1; 1714 retval.PlaneRow(c, y)[last_column] *= 2.0f; 1715 } 1716 } 1717 if ((in.ysize() & 1) != 0) { 1718 for (size_t x = 0; x < retval.xsize(); ++x) { 1719 size_t last_row = retval.ysize() - 1; 1720 retval.PlaneRow(c, last_row)[x] *= 2.0f; 1721 } 1722 } 1723 } 1724 return retval; 1725 } 1726 1727 // Supersample src by 2x and add it to dest. 1728 static void AddSupersampled2x(const ImageF& src, float w, ImageF& dest) { 1729 for (size_t y = 0; y < dest.ysize(); ++y) { 1730 for (size_t x = 0; x < dest.xsize(); ++x) { 1731 // There will be less errors from the more averaged images. 1732 // We take it into account to some extent using a scaler. 1733 static const double kHeuristicMixingValue = 0.3; 1734 dest.Row(y)[x] *= 1.0 - kHeuristicMixingValue * w; 1735 dest.Row(y)[x] += w * src.Row(y / 2)[x / 2]; 1736 } 1737 } 1738 } 1739 1740 Image3F* ButteraugliComparator::Temp() const { 1741 bool was_in_use = temp_in_use_.test_and_set(std::memory_order_acq_rel); 1742 JXL_ASSERT(!was_in_use); 1743 (void)was_in_use; 1744 return &temp_; 1745 } 1746 1747 void ButteraugliComparator::ReleaseTemp() const { temp_in_use_.clear(); } 1748 1749 ButteraugliComparator::ButteraugliComparator(size_t xsize, size_t ysize, 1750 const ButteraugliParams& params) 1751 : xsize_(xsize), ysize_(ysize), params_(params) {} 1752 1753 StatusOr<std::unique_ptr<ButteraugliComparator>> ButteraugliComparator::Make( 1754 const Image3F& rgb0, const ButteraugliParams& params) { 1755 size_t xsize = rgb0.xsize(); 1756 size_t ysize = rgb0.ysize(); 1757 std::unique_ptr<ButteraugliComparator> result = 1758 std::unique_ptr<ButteraugliComparator>( 1759 new ButteraugliComparator(xsize, ysize, params)); 1760 JXL_ASSIGN_OR_RETURN(result->temp_, Image3F::Create(xsize, ysize)); 1761 1762 if (xsize < 8 || ysize < 8) { 1763 return result; 1764 } 1765 1766 JXL_ASSIGN_OR_RETURN(Image3F xyb0, Image3F::Create(xsize, ysize)); 1767 JXL_RETURN_IF_ERROR(HWY_DYNAMIC_DISPATCH(OpsinDynamicsImage)( 1768 rgb0, params, result->Temp(), &result->blur_temp_, &xyb0)); 1769 result->ReleaseTemp(); 1770 JXL_RETURN_IF_ERROR(HWY_DYNAMIC_DISPATCH(SeparateFrequencies)( 1771 xsize, ysize, params, &result->blur_temp_, xyb0, result->pi0_)); 1772 1773 // Awful recursive construction of samples of different resolution. 1774 // This is an after-thought and possibly somewhat parallel in 1775 // functionality with the PsychoImage multi-resolution approach. 1776 JXL_ASSIGN_OR_RETURN(Image3F subsampledRgb0, SubSample2x(rgb0)); 1777 StatusOr<std::unique_ptr<ButteraugliComparator>> sub = 1778 ButteraugliComparator::Make(subsampledRgb0, params); 1779 if (!sub.ok()) return sub.status(); 1780 result->sub_ = std::move(sub).value(); 1781 1782 return result; 1783 } 1784 1785 Status ButteraugliComparator::Mask(ImageF* BUTTERAUGLI_RESTRICT mask) const { 1786 return HWY_DYNAMIC_DISPATCH(MaskPsychoImage)( 1787 pi0_, pi0_, xsize_, ysize_, params_, &blur_temp_, mask, nullptr); 1788 } 1789 1790 Status ButteraugliComparator::Diffmap(const Image3F& rgb1, 1791 ImageF& result) const { 1792 if (xsize_ < 8 || ysize_ < 8) { 1793 ZeroFillImage(&result); 1794 return true; 1795 } 1796 JXL_ASSIGN_OR_RETURN(Image3F xyb1, Image3F::Create(xsize_, ysize_)); 1797 JXL_RETURN_IF_ERROR(HWY_DYNAMIC_DISPATCH(OpsinDynamicsImage)( 1798 rgb1, params_, Temp(), &blur_temp_, &xyb1)); 1799 ReleaseTemp(); 1800 JXL_RETURN_IF_ERROR(DiffmapOpsinDynamicsImage(xyb1, result)); 1801 if (sub_) { 1802 if (sub_->xsize_ < 8 || sub_->ysize_ < 8) { 1803 return true; 1804 } 1805 JXL_ASSIGN_OR_RETURN(Image3F sub_xyb, 1806 Image3F::Create(sub_->xsize_, sub_->ysize_)); 1807 JXL_ASSIGN_OR_RETURN(Image3F subsampledRgb1, SubSample2x(rgb1)); 1808 JXL_RETURN_IF_ERROR(HWY_DYNAMIC_DISPATCH(OpsinDynamicsImage)( 1809 subsampledRgb1, params_, sub_->Temp(), &sub_->blur_temp_, &sub_xyb)); 1810 sub_->ReleaseTemp(); 1811 ImageF subresult; 1812 JXL_RETURN_IF_ERROR(sub_->DiffmapOpsinDynamicsImage(sub_xyb, subresult)); 1813 AddSupersampled2x(subresult, 0.5, result); 1814 } 1815 return true; 1816 } 1817 1818 Status ButteraugliComparator::DiffmapOpsinDynamicsImage(const Image3F& xyb1, 1819 ImageF& result) const { 1820 if (xsize_ < 8 || ysize_ < 8) { 1821 ZeroFillImage(&result); 1822 return true; 1823 } 1824 PsychoImage pi1; 1825 JXL_RETURN_IF_ERROR(HWY_DYNAMIC_DISPATCH(SeparateFrequencies)( 1826 xsize_, ysize_, params_, &blur_temp_, xyb1, pi1)); 1827 JXL_ASSIGN_OR_RETURN(result, ImageF::Create(xsize_, ysize_)); 1828 return DiffmapPsychoImage(pi1, result); 1829 } 1830 1831 namespace { 1832 1833 void MaltaDiffMap(const ImageF& lum0, const ImageF& lum1, const double w_0gt1, 1834 const double w_0lt1, const double norm1, 1835 ImageF* HWY_RESTRICT diffs, 1836 Image3F* HWY_RESTRICT block_diff_ac, size_t c) { 1837 HWY_DYNAMIC_DISPATCH(MaltaDiffMap) 1838 (lum0, lum1, w_0gt1, w_0lt1, norm1, diffs, &block_diff_ac->Plane(c)); 1839 } 1840 1841 void MaltaDiffMapLF(const ImageF& lum0, const ImageF& lum1, const double w_0gt1, 1842 const double w_0lt1, const double norm1, 1843 ImageF* HWY_RESTRICT diffs, 1844 Image3F* HWY_RESTRICT block_diff_ac, size_t c) { 1845 HWY_DYNAMIC_DISPATCH(MaltaDiffMapLF) 1846 (lum0, lum1, w_0gt1, w_0lt1, norm1, diffs, &block_diff_ac->Plane(c)); 1847 } 1848 1849 } // namespace 1850 1851 Status ButteraugliComparator::DiffmapPsychoImage(const PsychoImage& pi1, 1852 ImageF& diffmap) const { 1853 if (xsize_ < 8 || ysize_ < 8) { 1854 ZeroFillImage(&diffmap); 1855 return true; 1856 } 1857 1858 const float hf_asymmetry_ = params_.hf_asymmetry; 1859 const float xmul_ = params_.xmul; 1860 1861 JXL_ASSIGN_OR_RETURN(ImageF diffs, ImageF::Create(xsize_, ysize_)); 1862 JXL_ASSIGN_OR_RETURN(Image3F block_diff_ac, Image3F::Create(xsize_, ysize_)); 1863 ZeroFillImage(&block_diff_ac); 1864 MaltaDiffMap(pi0_.uhf[1], pi1.uhf[1], wUhfMalta * hf_asymmetry_, 1865 wUhfMalta / hf_asymmetry_, norm1Uhf, &diffs, &block_diff_ac, 1); 1866 MaltaDiffMap(pi0_.uhf[0], pi1.uhf[0], wUhfMaltaX * hf_asymmetry_, 1867 wUhfMaltaX / hf_asymmetry_, norm1UhfX, &diffs, &block_diff_ac, 1868 0); 1869 MaltaDiffMapLF(pi0_.hf[1], pi1.hf[1], wHfMalta * std::sqrt(hf_asymmetry_), 1870 wHfMalta / std::sqrt(hf_asymmetry_), norm1Hf, &diffs, 1871 &block_diff_ac, 1); 1872 MaltaDiffMapLF(pi0_.hf[0], pi1.hf[0], wHfMaltaX * std::sqrt(hf_asymmetry_), 1873 wHfMaltaX / std::sqrt(hf_asymmetry_), norm1HfX, &diffs, 1874 &block_diff_ac, 0); 1875 MaltaDiffMapLF(pi0_.mf.Plane(1), pi1.mf.Plane(1), wMfMalta, wMfMalta, norm1Mf, 1876 &diffs, &block_diff_ac, 1); 1877 MaltaDiffMapLF(pi0_.mf.Plane(0), pi1.mf.Plane(0), wMfMaltaX, wMfMaltaX, 1878 norm1MfX, &diffs, &block_diff_ac, 0); 1879 1880 JXL_ASSIGN_OR_RETURN(Image3F block_diff_dc, Image3F::Create(xsize_, ysize_)); 1881 for (size_t c = 0; c < 3; ++c) { 1882 if (c < 2) { // No blue channel error accumulated at HF. 1883 HWY_DYNAMIC_DISPATCH(L2DiffAsymmetric) 1884 (pi0_.hf[c], pi1.hf[c], wmul[c] * hf_asymmetry_, wmul[c] / hf_asymmetry_, 1885 &block_diff_ac.Plane(c)); 1886 } 1887 HWY_DYNAMIC_DISPATCH(L2Diff) 1888 (pi0_.mf.Plane(c), pi1.mf.Plane(c), wmul[3 + c], &block_diff_ac.Plane(c)); 1889 HWY_DYNAMIC_DISPATCH(SetL2Diff) 1890 (pi0_.lf.Plane(c), pi1.lf.Plane(c), wmul[6 + c], &block_diff_dc.Plane(c)); 1891 } 1892 1893 ImageF mask; 1894 JXL_RETURN_IF_ERROR(HWY_DYNAMIC_DISPATCH(MaskPsychoImage)( 1895 pi0_, pi1, xsize_, ysize_, params_, &blur_temp_, &mask, 1896 &block_diff_ac.Plane(1))); 1897 1898 HWY_DYNAMIC_DISPATCH(CombineChannelsToDiffmap) 1899 (mask, block_diff_dc, block_diff_ac, xmul_, &diffmap); 1900 return true; 1901 } 1902 1903 double ButteraugliScoreFromDiffmap(const ImageF& diffmap, 1904 const ButteraugliParams* params) { 1905 float retval = 0.0f; 1906 for (size_t y = 0; y < diffmap.ysize(); ++y) { 1907 const float* BUTTERAUGLI_RESTRICT row = diffmap.ConstRow(y); 1908 for (size_t x = 0; x < diffmap.xsize(); ++x) { 1909 retval = std::max(retval, row[x]); 1910 } 1911 } 1912 return retval; 1913 } 1914 1915 Status ButteraugliDiffmap(const Image3F& rgb0, const Image3F& rgb1, 1916 double hf_asymmetry, double xmul, ImageF& diffmap) { 1917 ButteraugliParams params; 1918 params.hf_asymmetry = hf_asymmetry; 1919 params.xmul = xmul; 1920 return ButteraugliDiffmap(rgb0, rgb1, params, diffmap); 1921 } 1922 1923 template <size_t kMax> 1924 bool ButteraugliDiffmapSmall(const Image3F& rgb0, const Image3F& rgb1, 1925 const ButteraugliParams& params, ImageF& diffmap) { 1926 const size_t xsize = rgb0.xsize(); 1927 const size_t ysize = rgb0.ysize(); 1928 // Butteraugli values for small (where xsize or ysize is smaller 1929 // than 8 pixels) images are non-sensical, but most likely it is 1930 // less disruptive to try to compute something than just give up. 1931 // Temporarily extend the borders of the image to fit 8 x 8 size. 1932 size_t xborder = xsize < kMax ? (kMax - xsize) / 2 : 0; 1933 size_t yborder = ysize < kMax ? (kMax - ysize) / 2 : 0; 1934 size_t xscaled = std::max<size_t>(kMax, xsize); 1935 size_t yscaled = std::max<size_t>(kMax, ysize); 1936 JXL_ASSIGN_OR_RETURN(Image3F scaled0, Image3F::Create(xscaled, yscaled)); 1937 JXL_ASSIGN_OR_RETURN(Image3F scaled1, Image3F::Create(xscaled, yscaled)); 1938 for (int i = 0; i < 3; ++i) { 1939 for (size_t y = 0; y < yscaled; ++y) { 1940 for (size_t x = 0; x < xscaled; ++x) { 1941 size_t x2 = std::min<size_t>(xsize - 1, x > xborder ? x - xborder : 0); 1942 size_t y2 = std::min<size_t>(ysize - 1, y > yborder ? y - yborder : 0); 1943 scaled0.PlaneRow(i, y)[x] = rgb0.PlaneRow(i, y2)[x2]; 1944 scaled1.PlaneRow(i, y)[x] = rgb1.PlaneRow(i, y2)[x2]; 1945 } 1946 } 1947 } 1948 ImageF diffmap_scaled; 1949 const bool ok = ButteraugliDiffmap(scaled0, scaled1, params, diffmap_scaled); 1950 JXL_ASSIGN_OR_RETURN(diffmap, ImageF::Create(xsize, ysize)); 1951 for (size_t y = 0; y < ysize; ++y) { 1952 for (size_t x = 0; x < xsize; ++x) { 1953 diffmap.Row(y)[x] = diffmap_scaled.Row(y + yborder)[x + xborder]; 1954 } 1955 } 1956 return ok; 1957 } 1958 1959 Status ButteraugliDiffmap(const Image3F& rgb0, const Image3F& rgb1, 1960 const ButteraugliParams& params, ImageF& diffmap) { 1961 const size_t xsize = rgb0.xsize(); 1962 const size_t ysize = rgb0.ysize(); 1963 if (xsize < 1 || ysize < 1) { 1964 return JXL_FAILURE("Zero-sized image"); 1965 } 1966 if (!SameSize(rgb0, rgb1)) { 1967 return JXL_FAILURE("Size mismatch"); 1968 } 1969 static const int kMax = 8; 1970 if (xsize < kMax || ysize < kMax) { 1971 return ButteraugliDiffmapSmall<kMax>(rgb0, rgb1, params, diffmap); 1972 } 1973 JXL_ASSIGN_OR_RETURN(std::unique_ptr<ButteraugliComparator> butteraugli, 1974 ButteraugliComparator::Make(rgb0, params)); 1975 JXL_RETURN_IF_ERROR(butteraugli->Diffmap(rgb1, diffmap)); 1976 return true; 1977 } 1978 1979 bool ButteraugliInterface(const Image3F& rgb0, const Image3F& rgb1, 1980 float hf_asymmetry, float xmul, ImageF& diffmap, 1981 double& diffvalue) { 1982 ButteraugliParams params; 1983 params.hf_asymmetry = hf_asymmetry; 1984 params.xmul = xmul; 1985 return ButteraugliInterface(rgb0, rgb1, params, diffmap, diffvalue); 1986 } 1987 1988 bool ButteraugliInterface(const Image3F& rgb0, const Image3F& rgb1, 1989 const ButteraugliParams& params, ImageF& diffmap, 1990 double& diffvalue) { 1991 if (!ButteraugliDiffmap(rgb0, rgb1, params, diffmap)) { 1992 return false; 1993 } 1994 diffvalue = ButteraugliScoreFromDiffmap(diffmap, ¶ms); 1995 return true; 1996 } 1997 1998 Status ButteraugliInterfaceInPlace(Image3F&& rgb0, Image3F&& rgb1, 1999 const ButteraugliParams& params, 2000 ImageF& diffmap, double& diffvalue) { 2001 const size_t xsize = rgb0.xsize(); 2002 const size_t ysize = rgb0.ysize(); 2003 if (xsize < 1 || ysize < 1) { 2004 return JXL_FAILURE("Zero-sized image"); 2005 } 2006 if (!SameSize(rgb0, rgb1)) { 2007 return JXL_FAILURE("Size mismatch"); 2008 } 2009 static const int kMax = 8; 2010 if (xsize < kMax || ysize < kMax) { 2011 bool ok = ButteraugliDiffmapSmall<kMax>(rgb0, rgb1, params, diffmap); 2012 diffvalue = ButteraugliScoreFromDiffmap(diffmap, ¶ms); 2013 return ok; 2014 } 2015 ImageF subdiffmap; 2016 if (xsize >= 15 && ysize >= 15) { 2017 JXL_ASSIGN_OR_RETURN(Image3F rgb0_sub, SubSample2x(rgb0)); 2018 JXL_ASSIGN_OR_RETURN(Image3F rgb1_sub, SubSample2x(rgb1)); 2019 JXL_RETURN_IF_ERROR(HWY_DYNAMIC_DISPATCH(ButteraugliDiffmapInPlace)( 2020 rgb0_sub, rgb1_sub, params, subdiffmap)); 2021 } 2022 JXL_RETURN_IF_ERROR(HWY_DYNAMIC_DISPATCH(ButteraugliDiffmapInPlace)( 2023 rgb0, rgb1, params, diffmap)); 2024 if (xsize >= 15 && ysize >= 15) { 2025 AddSupersampled2x(subdiffmap, 0.5, diffmap); 2026 } 2027 diffvalue = ButteraugliScoreFromDiffmap(diffmap, ¶ms); 2028 return true; 2029 } 2030 2031 double ButteraugliFuzzyClass(double score) { 2032 static const double fuzzy_width_up = 4.8; 2033 static const double fuzzy_width_down = 4.8; 2034 static const double m0 = 2.0; 2035 static const double scaler = 0.7777; 2036 double val; 2037 if (score < 1.0) { 2038 // val in [scaler .. 2.0] 2039 val = m0 / (1.0 + exp((score - 1.0) * fuzzy_width_down)); 2040 val -= 1.0; // from [1 .. 2] to [0 .. 1] 2041 val *= 2.0 - scaler; // from [0 .. 1] to [0 .. 2.0 - scaler] 2042 val += scaler; // from [0 .. 2.0 - scaler] to [scaler .. 2.0] 2043 } else { 2044 // val in [0 .. scaler] 2045 val = m0 / (1.0 + exp((score - 1.0) * fuzzy_width_up)); 2046 val *= scaler; 2047 } 2048 return val; 2049 } 2050 2051 // #define PRINT_OUT_NORMALIZATION 2052 2053 double ButteraugliFuzzyInverse(double seek) { 2054 double pos = 0; 2055 // NOLINTNEXTLINE(clang-analyzer-security.FloatLoopCounter) 2056 for (double range = 1.0; range >= 1e-10; range *= 0.5) { 2057 double cur = ButteraugliFuzzyClass(pos); 2058 if (cur < seek) { 2059 pos -= range; 2060 } else { 2061 pos += range; 2062 } 2063 } 2064 #ifdef PRINT_OUT_NORMALIZATION 2065 if (seek == 1.0) { 2066 fprintf(stderr, "Fuzzy inverse %g\n", pos); 2067 } 2068 #endif 2069 return pos; 2070 } 2071 2072 #ifdef PRINT_OUT_NORMALIZATION 2073 static double print_out_normalization = ButteraugliFuzzyInverse(1.0); 2074 #endif 2075 2076 namespace { 2077 2078 void ScoreToRgb(double score, double good_threshold, double bad_threshold, 2079 float rgb[3]) { 2080 double heatmap[12][3] = { 2081 {0, 0, 0}, {0, 0, 1}, 2082 {0, 1, 1}, {0, 1, 0}, // Good level 2083 {1, 1, 0}, {1, 0, 0}, // Bad level 2084 {1, 0, 1}, {0.5, 0.5, 1.0}, 2085 {1.0, 0.5, 0.5}, // Pastel colors for the very bad quality range. 2086 {1.0, 1.0, 0.5}, {1, 1, 1}, 2087 {1, 1, 1}, // Last color repeated to have a solid range of white. 2088 }; 2089 if (score < good_threshold) { 2090 score = (score / good_threshold) * 0.3; 2091 } else if (score < bad_threshold) { 2092 score = 0.3 + 2093 (score - good_threshold) / (bad_threshold - good_threshold) * 0.15; 2094 } else { 2095 score = 0.45 + (score - bad_threshold) / (bad_threshold * 12) * 0.5; 2096 } 2097 static const int kTableSize = sizeof(heatmap) / sizeof(heatmap[0]); 2098 score = std::min<double>(std::max<double>(score * (kTableSize - 1), 0.0), 2099 kTableSize - 2); 2100 int ix = static_cast<int>(score); 2101 ix = std::min(std::max(0, ix), kTableSize - 2); // Handle NaN 2102 double mix = score - ix; 2103 for (int i = 0; i < 3; ++i) { 2104 double v = mix * heatmap[ix + 1][i] + (1 - mix) * heatmap[ix][i]; 2105 rgb[i] = pow(v, 0.5); 2106 } 2107 } 2108 2109 } // namespace 2110 2111 StatusOr<Image3F> CreateHeatMapImage(const ImageF& distmap, 2112 double good_threshold, 2113 double bad_threshold) { 2114 JXL_ASSIGN_OR_RETURN(Image3F heatmap, 2115 Image3F::Create(distmap.xsize(), distmap.ysize())); 2116 for (size_t y = 0; y < distmap.ysize(); ++y) { 2117 const float* BUTTERAUGLI_RESTRICT row_distmap = distmap.ConstRow(y); 2118 float* BUTTERAUGLI_RESTRICT row_h0 = heatmap.PlaneRow(0, y); 2119 float* BUTTERAUGLI_RESTRICT row_h1 = heatmap.PlaneRow(1, y); 2120 float* BUTTERAUGLI_RESTRICT row_h2 = heatmap.PlaneRow(2, y); 2121 for (size_t x = 0; x < distmap.xsize(); ++x) { 2122 const float d = row_distmap[x]; 2123 float rgb[3]; 2124 ScoreToRgb(d, good_threshold, bad_threshold, rgb); 2125 row_h0[x] = rgb[0]; 2126 row_h1[x] = rgb[1]; 2127 row_h2[x] = rgb[2]; 2128 } 2129 } 2130 return heatmap; 2131 } 2132 2133 } // namespace jxl 2134 #endif // HWY_ONCE