dec_xyb.cc (12736B)
1 // Copyright (c) the JPEG XL Project Authors. All rights reserved. 2 // 3 // Use of this source code is governed by a BSD-style 4 // license that can be found in the LICENSE file. 5 6 #include "lib/jxl/dec_xyb.h" 7 8 #include <string.h> 9 10 #undef HWY_TARGET_INCLUDE 11 #define HWY_TARGET_INCLUDE "lib/jxl/dec_xyb.cc" 12 #include <hwy/foreach_target.h> 13 #include <hwy/highway.h> 14 15 #include "lib/jxl/base/compiler_specific.h" 16 #include "lib/jxl/base/matrix_ops.h" 17 #include "lib/jxl/base/status.h" 18 #include "lib/jxl/cms/jxl_cms_internal.h" 19 #include "lib/jxl/cms/opsin_params.h" 20 #include "lib/jxl/color_encoding_internal.h" 21 #include "lib/jxl/dec_group_border.h" 22 #include "lib/jxl/dec_xyb-inl.h" 23 #include "lib/jxl/fields.h" 24 #include "lib/jxl/image.h" 25 #include "lib/jxl/opsin_params.h" 26 #include "lib/jxl/quantizer.h" 27 #include "lib/jxl/sanitizers.h" 28 HWY_BEFORE_NAMESPACE(); 29 namespace jxl { 30 namespace HWY_NAMESPACE { 31 32 // These templates are not found via ADL. 33 using hwy::HWY_NAMESPACE::MulAdd; 34 35 void OpsinToLinearInplace(Image3F* JXL_RESTRICT inout, ThreadPool* pool, 36 const OpsinParams& opsin_params) { 37 JXL_CHECK_IMAGE_INITIALIZED(*inout, Rect(*inout)); 38 39 const size_t xsize = inout->xsize(); // not padded 40 JXL_CHECK(RunOnPool( 41 pool, 0, inout->ysize(), ThreadPool::NoInit, 42 [&](const uint32_t task, size_t /* thread */) { 43 const size_t y = task; 44 45 // Faster than adding via ByteOffset at end of loop. 46 float* JXL_RESTRICT row0 = inout->PlaneRow(0, y); 47 float* JXL_RESTRICT row1 = inout->PlaneRow(1, y); 48 float* JXL_RESTRICT row2 = inout->PlaneRow(2, y); 49 50 const HWY_FULL(float) d; 51 52 for (size_t x = 0; x < xsize; x += Lanes(d)) { 53 const auto in_opsin_x = Load(d, row0 + x); 54 const auto in_opsin_y = Load(d, row1 + x); 55 const auto in_opsin_b = Load(d, row2 + x); 56 auto linear_r = Undefined(d); 57 auto linear_g = Undefined(d); 58 auto linear_b = Undefined(d); 59 XybToRgb(d, in_opsin_x, in_opsin_y, in_opsin_b, opsin_params, 60 &linear_r, &linear_g, &linear_b); 61 62 Store(linear_r, d, row0 + x); 63 Store(linear_g, d, row1 + x); 64 Store(linear_b, d, row2 + x); 65 } 66 }, 67 "OpsinToLinear")); 68 } 69 70 // Same, but not in-place. 71 void OpsinToLinear(const Image3F& opsin, const Rect& rect, ThreadPool* pool, 72 Image3F* JXL_RESTRICT linear, 73 const OpsinParams& opsin_params) { 74 JXL_ASSERT(SameSize(rect, *linear)); 75 JXL_CHECK_IMAGE_INITIALIZED(opsin, rect); 76 77 JXL_CHECK(RunOnPool( 78 pool, 0, static_cast<int>(rect.ysize()), ThreadPool::NoInit, 79 [&](const uint32_t task, size_t /*thread*/) { 80 const size_t y = static_cast<size_t>(task); 81 82 // Faster than adding via ByteOffset at end of loop. 83 const float* JXL_RESTRICT row_opsin_0 = rect.ConstPlaneRow(opsin, 0, y); 84 const float* JXL_RESTRICT row_opsin_1 = rect.ConstPlaneRow(opsin, 1, y); 85 const float* JXL_RESTRICT row_opsin_2 = rect.ConstPlaneRow(opsin, 2, y); 86 float* JXL_RESTRICT row_linear_0 = linear->PlaneRow(0, y); 87 float* JXL_RESTRICT row_linear_1 = linear->PlaneRow(1, y); 88 float* JXL_RESTRICT row_linear_2 = linear->PlaneRow(2, y); 89 90 const HWY_FULL(float) d; 91 92 for (size_t x = 0; x < rect.xsize(); x += Lanes(d)) { 93 const auto in_opsin_x = Load(d, row_opsin_0 + x); 94 const auto in_opsin_y = Load(d, row_opsin_1 + x); 95 const auto in_opsin_b = Load(d, row_opsin_2 + x); 96 auto linear_r = Undefined(d); 97 auto linear_g = Undefined(d); 98 auto linear_b = Undefined(d); 99 XybToRgb(d, in_opsin_x, in_opsin_y, in_opsin_b, opsin_params, 100 &linear_r, &linear_g, &linear_b); 101 102 Store(linear_r, d, row_linear_0 + x); 103 Store(linear_g, d, row_linear_1 + x); 104 Store(linear_b, d, row_linear_2 + x); 105 } 106 }, 107 "OpsinToLinear(Rect)")); 108 JXL_CHECK_IMAGE_INITIALIZED(*linear, rect); 109 } 110 111 // Transform YCbCr to RGB. 112 // Could be performed in-place (i.e. Y, Cb and Cr could alias R, B and B). 113 void YcbcrToRgb(const Image3F& ycbcr, Image3F* rgb, const Rect& rect) { 114 JXL_CHECK_IMAGE_INITIALIZED(ycbcr, rect); 115 const HWY_CAPPED(float, kBlockDim) df; 116 const size_t S = Lanes(df); // Step. 117 118 const size_t xsize = rect.xsize(); 119 const size_t ysize = rect.ysize(); 120 if ((xsize == 0) || (ysize == 0)) return; 121 122 // Full-range BT.601 as defined by JFIF Clause 7: 123 // https://www.itu.int/rec/T-REC-T.871-201105-I/en 124 const auto c128 = Set(df, 128.0f / 255); 125 const auto crcr = Set(df, 1.402f); 126 const auto cgcb = Set(df, -0.114f * 1.772f / 0.587f); 127 const auto cgcr = Set(df, -0.299f * 1.402f / 0.587f); 128 const auto cbcb = Set(df, 1.772f); 129 130 for (size_t y = 0; y < ysize; y++) { 131 const float* y_row = rect.ConstPlaneRow(ycbcr, 1, y); 132 const float* cb_row = rect.ConstPlaneRow(ycbcr, 0, y); 133 const float* cr_row = rect.ConstPlaneRow(ycbcr, 2, y); 134 float* r_row = rect.PlaneRow(rgb, 0, y); 135 float* g_row = rect.PlaneRow(rgb, 1, y); 136 float* b_row = rect.PlaneRow(rgb, 2, y); 137 for (size_t x = 0; x < xsize; x += S) { 138 const auto y_vec = Add(Load(df, y_row + x), c128); 139 const auto cb_vec = Load(df, cb_row + x); 140 const auto cr_vec = Load(df, cr_row + x); 141 const auto r_vec = MulAdd(crcr, cr_vec, y_vec); 142 const auto g_vec = MulAdd(cgcr, cr_vec, MulAdd(cgcb, cb_vec, y_vec)); 143 const auto b_vec = MulAdd(cbcb, cb_vec, y_vec); 144 Store(r_vec, df, r_row + x); 145 Store(g_vec, df, g_row + x); 146 Store(b_vec, df, b_row + x); 147 } 148 } 149 JXL_CHECK_IMAGE_INITIALIZED(*rgb, rect); 150 } 151 152 // NOLINTNEXTLINE(google-readability-namespace-comments) 153 } // namespace HWY_NAMESPACE 154 } // namespace jxl 155 HWY_AFTER_NAMESPACE(); 156 157 #if HWY_ONCE 158 namespace jxl { 159 160 HWY_EXPORT(OpsinToLinearInplace); 161 void OpsinToLinearInplace(Image3F* JXL_RESTRICT inout, ThreadPool* pool, 162 const OpsinParams& opsin_params) { 163 HWY_DYNAMIC_DISPATCH(OpsinToLinearInplace)(inout, pool, opsin_params); 164 } 165 166 HWY_EXPORT(OpsinToLinear); 167 void OpsinToLinear(const Image3F& opsin, const Rect& rect, ThreadPool* pool, 168 Image3F* JXL_RESTRICT linear, 169 const OpsinParams& opsin_params) { 170 HWY_DYNAMIC_DISPATCH(OpsinToLinear)(opsin, rect, pool, linear, opsin_params); 171 } 172 173 HWY_EXPORT(YcbcrToRgb); 174 void YcbcrToRgb(const Image3F& ycbcr, Image3F* rgb, const Rect& rect) { 175 HWY_DYNAMIC_DISPATCH(YcbcrToRgb)(ycbcr, rgb, rect); 176 } 177 178 HWY_EXPORT(HasFastXYBTosRGB8); 179 bool HasFastXYBTosRGB8() { return HWY_DYNAMIC_DISPATCH(HasFastXYBTosRGB8)(); } 180 181 HWY_EXPORT(FastXYBTosRGB8); 182 void FastXYBTosRGB8(const float* input[4], uint8_t* output, bool is_rgba, 183 size_t xsize) { 184 HWY_DYNAMIC_DISPATCH(FastXYBTosRGB8)(input, output, is_rgba, xsize); 185 } 186 187 void OpsinParams::Init(float intensity_target) { 188 InitSIMDInverseMatrix(GetOpsinAbsorbanceInverseMatrix(), inverse_opsin_matrix, 189 intensity_target); 190 memcpy(opsin_biases, jxl::cms::kNegOpsinAbsorbanceBiasRGB.data(), 191 sizeof(jxl::cms::kNegOpsinAbsorbanceBiasRGB)); 192 memcpy(quant_biases, kDefaultQuantBias, sizeof(kDefaultQuantBias)); 193 for (size_t c = 0; c < 4; c++) { 194 opsin_biases_cbrt[c] = cbrtf(opsin_biases[c]); 195 } 196 } 197 198 bool CanOutputToColorEncoding(const ColorEncoding& c_desired) { 199 if (!c_desired.HaveFields()) { 200 return false; 201 } 202 // TODO(veluca): keep in sync with dec_reconstruct.cc 203 const auto& tf = c_desired.Tf(); 204 if (!tf.IsPQ() && !tf.IsSRGB() && !tf.have_gamma && !tf.IsLinear() && 205 !tf.IsHLG() && !tf.IsDCI() && !tf.Is709()) { 206 return false; 207 } 208 if (c_desired.IsGray() && c_desired.GetWhitePointType() != WhitePoint::kD65) { 209 // TODO(veluca): figure out what should happen here. 210 return false; 211 } 212 return true; 213 } 214 215 Status OutputEncodingInfo::SetFromMetadata(const CodecMetadata& metadata) { 216 orig_color_encoding = metadata.m.color_encoding; 217 orig_intensity_target = metadata.m.IntensityTarget(); 218 desired_intensity_target = orig_intensity_target; 219 const auto& im = metadata.transform_data.opsin_inverse_matrix; 220 memcpy(orig_inverse_matrix, im.inverse_matrix, sizeof(orig_inverse_matrix)); 221 default_transform = im.all_default; 222 xyb_encoded = metadata.m.xyb_encoded; 223 std::copy(std::begin(im.opsin_biases), std::end(im.opsin_biases), 224 opsin_params.opsin_biases); 225 for (int i = 0; i < 3; ++i) { 226 opsin_params.opsin_biases_cbrt[i] = cbrtf(opsin_params.opsin_biases[i]); 227 } 228 opsin_params.opsin_biases_cbrt[3] = opsin_params.opsin_biases[3] = 1; 229 std::copy(std::begin(im.quant_biases), std::end(im.quant_biases), 230 opsin_params.quant_biases); 231 bool orig_ok = CanOutputToColorEncoding(orig_color_encoding); 232 bool orig_grey = orig_color_encoding.IsGray(); 233 return SetColorEncoding(!xyb_encoded || orig_ok 234 ? orig_color_encoding 235 : ColorEncoding::LinearSRGB(orig_grey)); 236 } 237 238 Status OutputEncodingInfo::MaybeSetColorEncoding( 239 const ColorEncoding& c_desired) { 240 if (c_desired.GetColorSpace() == ColorSpace::kXYB && 241 ((color_encoding.GetColorSpace() == ColorSpace::kRGB && 242 color_encoding.GetPrimariesType() != Primaries::kSRGB) || 243 color_encoding.Tf().IsPQ())) { 244 return false; 245 } 246 if (!xyb_encoded && !CanOutputToColorEncoding(c_desired)) { 247 return false; 248 } 249 return SetColorEncoding(c_desired); 250 } 251 252 Status OutputEncodingInfo::SetColorEncoding(const ColorEncoding& c_desired) { 253 color_encoding = c_desired; 254 linear_color_encoding = color_encoding; 255 linear_color_encoding.Tf().SetTransferFunction(TransferFunction::kLinear); 256 color_encoding_is_original = orig_color_encoding.SameColorEncoding(c_desired); 257 258 // Compute the opsin inverse matrix and luminances based on primaries and 259 // white point. 260 float inverse_matrix[9]; 261 bool inverse_matrix_is_default = default_transform; 262 memcpy(inverse_matrix, orig_inverse_matrix, sizeof(inverse_matrix)); 263 constexpr float kSRGBLuminances[3] = {0.2126, 0.7152, 0.0722}; 264 memcpy(luminances, kSRGBLuminances, sizeof(luminances)); 265 if ((c_desired.GetPrimariesType() != Primaries::kSRGB || 266 c_desired.GetWhitePointType() != WhitePoint::kD65) && 267 !c_desired.IsGray()) { 268 float srgb_to_xyzd50[9]; 269 const auto& srgb = ColorEncoding::SRGB(/*is_gray=*/false); 270 PrimariesCIExy p = srgb.GetPrimaries(); 271 CIExy w = srgb.GetWhitePoint(); 272 JXL_CHECK(PrimariesToXYZD50(p.r.x, p.r.y, p.g.x, p.g.y, p.b.x, p.b.y, w.x, 273 w.y, srgb_to_xyzd50)); 274 float original_to_xyz[3][3]; 275 p = c_desired.GetPrimaries(); 276 w = c_desired.GetWhitePoint(); 277 if (!PrimariesToXYZ(p.r.x, p.r.y, p.g.x, p.g.y, p.b.x, p.b.y, w.x, w.y, 278 &original_to_xyz[0][0])) { 279 return JXL_FAILURE("PrimariesToXYZ failed"); 280 } 281 memcpy(luminances, original_to_xyz[1], sizeof luminances); 282 if (xyb_encoded) { 283 float adapt_to_d50[9]; 284 if (!AdaptToXYZD50(c_desired.GetWhitePoint().x, 285 c_desired.GetWhitePoint().y, adapt_to_d50)) { 286 return JXL_FAILURE("AdaptToXYZD50 failed"); 287 } 288 float xyzd50_to_original[9]; 289 Mul3x3Matrix(adapt_to_d50, &original_to_xyz[0][0], xyzd50_to_original); 290 JXL_RETURN_IF_ERROR(Inv3x3Matrix(xyzd50_to_original)); 291 float srgb_to_original[9]; 292 Mul3x3Matrix(xyzd50_to_original, srgb_to_xyzd50, srgb_to_original); 293 Mul3x3Matrix(srgb_to_original, orig_inverse_matrix, inverse_matrix); 294 inverse_matrix_is_default = false; 295 } 296 } 297 298 if (c_desired.IsGray()) { 299 float tmp_inv_matrix[9]; 300 memcpy(tmp_inv_matrix, inverse_matrix, sizeof(inverse_matrix)); 301 float srgb_to_luma[9]; 302 memcpy(&srgb_to_luma[0], luminances, sizeof(luminances)); 303 memcpy(&srgb_to_luma[3], luminances, sizeof(luminances)); 304 memcpy(&srgb_to_luma[6], luminances, sizeof(luminances)); 305 Mul3x3Matrix(srgb_to_luma, tmp_inv_matrix, inverse_matrix); 306 } 307 308 // The internal XYB color space uses absolute luminance, so we scale back the 309 // opsin inverse matrix to relative luminance where 1.0 corresponds to the 310 // original intensity target. 311 if (xyb_encoded) { 312 InitSIMDInverseMatrix(inverse_matrix, opsin_params.inverse_opsin_matrix, 313 orig_intensity_target); 314 all_default_opsin = (std::abs(orig_intensity_target - 255.0) <= 0.1f && 315 inverse_matrix_is_default); 316 } 317 318 // Set the inverse gamma based on color space transfer function. 319 const auto& tf = c_desired.Tf(); 320 inverse_gamma = (tf.have_gamma ? tf.GetGamma() 321 : tf.IsDCI() ? 1.0f / 2.6f 322 : 1.0); 323 return true; 324 } 325 326 } // namespace jxl 327 #endif // HWY_ONCE