libjxl

FORK: libjxl patches used on blog
git clone https://git.neptards.moe/blog/libjxl.git
Log | Files | Refs | Submodules | README | LICENSE

dec_xyb.cc (12736B)


      1 // Copyright (c) the JPEG XL Project Authors. All rights reserved.
      2 //
      3 // Use of this source code is governed by a BSD-style
      4 // license that can be found in the LICENSE file.
      5 
      6 #include "lib/jxl/dec_xyb.h"
      7 
      8 #include <string.h>
      9 
     10 #undef HWY_TARGET_INCLUDE
     11 #define HWY_TARGET_INCLUDE "lib/jxl/dec_xyb.cc"
     12 #include <hwy/foreach_target.h>
     13 #include <hwy/highway.h>
     14 
     15 #include "lib/jxl/base/compiler_specific.h"
     16 #include "lib/jxl/base/matrix_ops.h"
     17 #include "lib/jxl/base/status.h"
     18 #include "lib/jxl/cms/jxl_cms_internal.h"
     19 #include "lib/jxl/cms/opsin_params.h"
     20 #include "lib/jxl/color_encoding_internal.h"
     21 #include "lib/jxl/dec_group_border.h"
     22 #include "lib/jxl/dec_xyb-inl.h"
     23 #include "lib/jxl/fields.h"
     24 #include "lib/jxl/image.h"
     25 #include "lib/jxl/opsin_params.h"
     26 #include "lib/jxl/quantizer.h"
     27 #include "lib/jxl/sanitizers.h"
     28 HWY_BEFORE_NAMESPACE();
     29 namespace jxl {
     30 namespace HWY_NAMESPACE {
     31 
     32 // These templates are not found via ADL.
     33 using hwy::HWY_NAMESPACE::MulAdd;
     34 
     35 void OpsinToLinearInplace(Image3F* JXL_RESTRICT inout, ThreadPool* pool,
     36                           const OpsinParams& opsin_params) {
     37   JXL_CHECK_IMAGE_INITIALIZED(*inout, Rect(*inout));
     38 
     39   const size_t xsize = inout->xsize();  // not padded
     40   JXL_CHECK(RunOnPool(
     41       pool, 0, inout->ysize(), ThreadPool::NoInit,
     42       [&](const uint32_t task, size_t /* thread */) {
     43         const size_t y = task;
     44 
     45         // Faster than adding via ByteOffset at end of loop.
     46         float* JXL_RESTRICT row0 = inout->PlaneRow(0, y);
     47         float* JXL_RESTRICT row1 = inout->PlaneRow(1, y);
     48         float* JXL_RESTRICT row2 = inout->PlaneRow(2, y);
     49 
     50         const HWY_FULL(float) d;
     51 
     52         for (size_t x = 0; x < xsize; x += Lanes(d)) {
     53           const auto in_opsin_x = Load(d, row0 + x);
     54           const auto in_opsin_y = Load(d, row1 + x);
     55           const auto in_opsin_b = Load(d, row2 + x);
     56           auto linear_r = Undefined(d);
     57           auto linear_g = Undefined(d);
     58           auto linear_b = Undefined(d);
     59           XybToRgb(d, in_opsin_x, in_opsin_y, in_opsin_b, opsin_params,
     60                    &linear_r, &linear_g, &linear_b);
     61 
     62           Store(linear_r, d, row0 + x);
     63           Store(linear_g, d, row1 + x);
     64           Store(linear_b, d, row2 + x);
     65         }
     66       },
     67       "OpsinToLinear"));
     68 }
     69 
     70 // Same, but not in-place.
     71 void OpsinToLinear(const Image3F& opsin, const Rect& rect, ThreadPool* pool,
     72                    Image3F* JXL_RESTRICT linear,
     73                    const OpsinParams& opsin_params) {
     74   JXL_ASSERT(SameSize(rect, *linear));
     75   JXL_CHECK_IMAGE_INITIALIZED(opsin, rect);
     76 
     77   JXL_CHECK(RunOnPool(
     78       pool, 0, static_cast<int>(rect.ysize()), ThreadPool::NoInit,
     79       [&](const uint32_t task, size_t /*thread*/) {
     80         const size_t y = static_cast<size_t>(task);
     81 
     82         // Faster than adding via ByteOffset at end of loop.
     83         const float* JXL_RESTRICT row_opsin_0 = rect.ConstPlaneRow(opsin, 0, y);
     84         const float* JXL_RESTRICT row_opsin_1 = rect.ConstPlaneRow(opsin, 1, y);
     85         const float* JXL_RESTRICT row_opsin_2 = rect.ConstPlaneRow(opsin, 2, y);
     86         float* JXL_RESTRICT row_linear_0 = linear->PlaneRow(0, y);
     87         float* JXL_RESTRICT row_linear_1 = linear->PlaneRow(1, y);
     88         float* JXL_RESTRICT row_linear_2 = linear->PlaneRow(2, y);
     89 
     90         const HWY_FULL(float) d;
     91 
     92         for (size_t x = 0; x < rect.xsize(); x += Lanes(d)) {
     93           const auto in_opsin_x = Load(d, row_opsin_0 + x);
     94           const auto in_opsin_y = Load(d, row_opsin_1 + x);
     95           const auto in_opsin_b = Load(d, row_opsin_2 + x);
     96           auto linear_r = Undefined(d);
     97           auto linear_g = Undefined(d);
     98           auto linear_b = Undefined(d);
     99           XybToRgb(d, in_opsin_x, in_opsin_y, in_opsin_b, opsin_params,
    100                    &linear_r, &linear_g, &linear_b);
    101 
    102           Store(linear_r, d, row_linear_0 + x);
    103           Store(linear_g, d, row_linear_1 + x);
    104           Store(linear_b, d, row_linear_2 + x);
    105         }
    106       },
    107       "OpsinToLinear(Rect)"));
    108   JXL_CHECK_IMAGE_INITIALIZED(*linear, rect);
    109 }
    110 
    111 // Transform YCbCr to RGB.
    112 // Could be performed in-place (i.e. Y, Cb and Cr could alias R, B and B).
    113 void YcbcrToRgb(const Image3F& ycbcr, Image3F* rgb, const Rect& rect) {
    114   JXL_CHECK_IMAGE_INITIALIZED(ycbcr, rect);
    115   const HWY_CAPPED(float, kBlockDim) df;
    116   const size_t S = Lanes(df);  // Step.
    117 
    118   const size_t xsize = rect.xsize();
    119   const size_t ysize = rect.ysize();
    120   if ((xsize == 0) || (ysize == 0)) return;
    121 
    122   // Full-range BT.601 as defined by JFIF Clause 7:
    123   // https://www.itu.int/rec/T-REC-T.871-201105-I/en
    124   const auto c128 = Set(df, 128.0f / 255);
    125   const auto crcr = Set(df, 1.402f);
    126   const auto cgcb = Set(df, -0.114f * 1.772f / 0.587f);
    127   const auto cgcr = Set(df, -0.299f * 1.402f / 0.587f);
    128   const auto cbcb = Set(df, 1.772f);
    129 
    130   for (size_t y = 0; y < ysize; y++) {
    131     const float* y_row = rect.ConstPlaneRow(ycbcr, 1, y);
    132     const float* cb_row = rect.ConstPlaneRow(ycbcr, 0, y);
    133     const float* cr_row = rect.ConstPlaneRow(ycbcr, 2, y);
    134     float* r_row = rect.PlaneRow(rgb, 0, y);
    135     float* g_row = rect.PlaneRow(rgb, 1, y);
    136     float* b_row = rect.PlaneRow(rgb, 2, y);
    137     for (size_t x = 0; x < xsize; x += S) {
    138       const auto y_vec = Add(Load(df, y_row + x), c128);
    139       const auto cb_vec = Load(df, cb_row + x);
    140       const auto cr_vec = Load(df, cr_row + x);
    141       const auto r_vec = MulAdd(crcr, cr_vec, y_vec);
    142       const auto g_vec = MulAdd(cgcr, cr_vec, MulAdd(cgcb, cb_vec, y_vec));
    143       const auto b_vec = MulAdd(cbcb, cb_vec, y_vec);
    144       Store(r_vec, df, r_row + x);
    145       Store(g_vec, df, g_row + x);
    146       Store(b_vec, df, b_row + x);
    147     }
    148   }
    149   JXL_CHECK_IMAGE_INITIALIZED(*rgb, rect);
    150 }
    151 
    152 // NOLINTNEXTLINE(google-readability-namespace-comments)
    153 }  // namespace HWY_NAMESPACE
    154 }  // namespace jxl
    155 HWY_AFTER_NAMESPACE();
    156 
    157 #if HWY_ONCE
    158 namespace jxl {
    159 
    160 HWY_EXPORT(OpsinToLinearInplace);
    161 void OpsinToLinearInplace(Image3F* JXL_RESTRICT inout, ThreadPool* pool,
    162                           const OpsinParams& opsin_params) {
    163   HWY_DYNAMIC_DISPATCH(OpsinToLinearInplace)(inout, pool, opsin_params);
    164 }
    165 
    166 HWY_EXPORT(OpsinToLinear);
    167 void OpsinToLinear(const Image3F& opsin, const Rect& rect, ThreadPool* pool,
    168                    Image3F* JXL_RESTRICT linear,
    169                    const OpsinParams& opsin_params) {
    170   HWY_DYNAMIC_DISPATCH(OpsinToLinear)(opsin, rect, pool, linear, opsin_params);
    171 }
    172 
    173 HWY_EXPORT(YcbcrToRgb);
    174 void YcbcrToRgb(const Image3F& ycbcr, Image3F* rgb, const Rect& rect) {
    175   HWY_DYNAMIC_DISPATCH(YcbcrToRgb)(ycbcr, rgb, rect);
    176 }
    177 
    178 HWY_EXPORT(HasFastXYBTosRGB8);
    179 bool HasFastXYBTosRGB8() { return HWY_DYNAMIC_DISPATCH(HasFastXYBTosRGB8)(); }
    180 
    181 HWY_EXPORT(FastXYBTosRGB8);
    182 void FastXYBTosRGB8(const float* input[4], uint8_t* output, bool is_rgba,
    183                     size_t xsize) {
    184   HWY_DYNAMIC_DISPATCH(FastXYBTosRGB8)(input, output, is_rgba, xsize);
    185 }
    186 
    187 void OpsinParams::Init(float intensity_target) {
    188   InitSIMDInverseMatrix(GetOpsinAbsorbanceInverseMatrix(), inverse_opsin_matrix,
    189                         intensity_target);
    190   memcpy(opsin_biases, jxl::cms::kNegOpsinAbsorbanceBiasRGB.data(),
    191          sizeof(jxl::cms::kNegOpsinAbsorbanceBiasRGB));
    192   memcpy(quant_biases, kDefaultQuantBias, sizeof(kDefaultQuantBias));
    193   for (size_t c = 0; c < 4; c++) {
    194     opsin_biases_cbrt[c] = cbrtf(opsin_biases[c]);
    195   }
    196 }
    197 
    198 bool CanOutputToColorEncoding(const ColorEncoding& c_desired) {
    199   if (!c_desired.HaveFields()) {
    200     return false;
    201   }
    202   // TODO(veluca): keep in sync with dec_reconstruct.cc
    203   const auto& tf = c_desired.Tf();
    204   if (!tf.IsPQ() && !tf.IsSRGB() && !tf.have_gamma && !tf.IsLinear() &&
    205       !tf.IsHLG() && !tf.IsDCI() && !tf.Is709()) {
    206     return false;
    207   }
    208   if (c_desired.IsGray() && c_desired.GetWhitePointType() != WhitePoint::kD65) {
    209     // TODO(veluca): figure out what should happen here.
    210     return false;
    211   }
    212   return true;
    213 }
    214 
    215 Status OutputEncodingInfo::SetFromMetadata(const CodecMetadata& metadata) {
    216   orig_color_encoding = metadata.m.color_encoding;
    217   orig_intensity_target = metadata.m.IntensityTarget();
    218   desired_intensity_target = orig_intensity_target;
    219   const auto& im = metadata.transform_data.opsin_inverse_matrix;
    220   memcpy(orig_inverse_matrix, im.inverse_matrix, sizeof(orig_inverse_matrix));
    221   default_transform = im.all_default;
    222   xyb_encoded = metadata.m.xyb_encoded;
    223   std::copy(std::begin(im.opsin_biases), std::end(im.opsin_biases),
    224             opsin_params.opsin_biases);
    225   for (int i = 0; i < 3; ++i) {
    226     opsin_params.opsin_biases_cbrt[i] = cbrtf(opsin_params.opsin_biases[i]);
    227   }
    228   opsin_params.opsin_biases_cbrt[3] = opsin_params.opsin_biases[3] = 1;
    229   std::copy(std::begin(im.quant_biases), std::end(im.quant_biases),
    230             opsin_params.quant_biases);
    231   bool orig_ok = CanOutputToColorEncoding(orig_color_encoding);
    232   bool orig_grey = orig_color_encoding.IsGray();
    233   return SetColorEncoding(!xyb_encoded || orig_ok
    234                               ? orig_color_encoding
    235                               : ColorEncoding::LinearSRGB(orig_grey));
    236 }
    237 
    238 Status OutputEncodingInfo::MaybeSetColorEncoding(
    239     const ColorEncoding& c_desired) {
    240   if (c_desired.GetColorSpace() == ColorSpace::kXYB &&
    241       ((color_encoding.GetColorSpace() == ColorSpace::kRGB &&
    242         color_encoding.GetPrimariesType() != Primaries::kSRGB) ||
    243        color_encoding.Tf().IsPQ())) {
    244     return false;
    245   }
    246   if (!xyb_encoded && !CanOutputToColorEncoding(c_desired)) {
    247     return false;
    248   }
    249   return SetColorEncoding(c_desired);
    250 }
    251 
    252 Status OutputEncodingInfo::SetColorEncoding(const ColorEncoding& c_desired) {
    253   color_encoding = c_desired;
    254   linear_color_encoding = color_encoding;
    255   linear_color_encoding.Tf().SetTransferFunction(TransferFunction::kLinear);
    256   color_encoding_is_original = orig_color_encoding.SameColorEncoding(c_desired);
    257 
    258   // Compute the opsin inverse matrix and luminances based on primaries and
    259   // white point.
    260   float inverse_matrix[9];
    261   bool inverse_matrix_is_default = default_transform;
    262   memcpy(inverse_matrix, orig_inverse_matrix, sizeof(inverse_matrix));
    263   constexpr float kSRGBLuminances[3] = {0.2126, 0.7152, 0.0722};
    264   memcpy(luminances, kSRGBLuminances, sizeof(luminances));
    265   if ((c_desired.GetPrimariesType() != Primaries::kSRGB ||
    266        c_desired.GetWhitePointType() != WhitePoint::kD65) &&
    267       !c_desired.IsGray()) {
    268     float srgb_to_xyzd50[9];
    269     const auto& srgb = ColorEncoding::SRGB(/*is_gray=*/false);
    270     PrimariesCIExy p = srgb.GetPrimaries();
    271     CIExy w = srgb.GetWhitePoint();
    272     JXL_CHECK(PrimariesToXYZD50(p.r.x, p.r.y, p.g.x, p.g.y, p.b.x, p.b.y, w.x,
    273                                 w.y, srgb_to_xyzd50));
    274     float original_to_xyz[3][3];
    275     p = c_desired.GetPrimaries();
    276     w = c_desired.GetWhitePoint();
    277     if (!PrimariesToXYZ(p.r.x, p.r.y, p.g.x, p.g.y, p.b.x, p.b.y, w.x, w.y,
    278                         &original_to_xyz[0][0])) {
    279       return JXL_FAILURE("PrimariesToXYZ failed");
    280     }
    281     memcpy(luminances, original_to_xyz[1], sizeof luminances);
    282     if (xyb_encoded) {
    283       float adapt_to_d50[9];
    284       if (!AdaptToXYZD50(c_desired.GetWhitePoint().x,
    285                          c_desired.GetWhitePoint().y, adapt_to_d50)) {
    286         return JXL_FAILURE("AdaptToXYZD50 failed");
    287       }
    288       float xyzd50_to_original[9];
    289       Mul3x3Matrix(adapt_to_d50, &original_to_xyz[0][0], xyzd50_to_original);
    290       JXL_RETURN_IF_ERROR(Inv3x3Matrix(xyzd50_to_original));
    291       float srgb_to_original[9];
    292       Mul3x3Matrix(xyzd50_to_original, srgb_to_xyzd50, srgb_to_original);
    293       Mul3x3Matrix(srgb_to_original, orig_inverse_matrix, inverse_matrix);
    294       inverse_matrix_is_default = false;
    295     }
    296   }
    297 
    298   if (c_desired.IsGray()) {
    299     float tmp_inv_matrix[9];
    300     memcpy(tmp_inv_matrix, inverse_matrix, sizeof(inverse_matrix));
    301     float srgb_to_luma[9];
    302     memcpy(&srgb_to_luma[0], luminances, sizeof(luminances));
    303     memcpy(&srgb_to_luma[3], luminances, sizeof(luminances));
    304     memcpy(&srgb_to_luma[6], luminances, sizeof(luminances));
    305     Mul3x3Matrix(srgb_to_luma, tmp_inv_matrix, inverse_matrix);
    306   }
    307 
    308   // The internal XYB color space uses absolute luminance, so we scale back the
    309   // opsin inverse matrix to relative luminance where 1.0 corresponds to the
    310   // original intensity target.
    311   if (xyb_encoded) {
    312     InitSIMDInverseMatrix(inverse_matrix, opsin_params.inverse_opsin_matrix,
    313                           orig_intensity_target);
    314     all_default_opsin = (std::abs(orig_intensity_target - 255.0) <= 0.1f &&
    315                          inverse_matrix_is_default);
    316   }
    317 
    318   // Set the inverse gamma based on color space transfer function.
    319   const auto& tf = c_desired.Tf();
    320   inverse_gamma = (tf.have_gamma ? tf.GetGamma()
    321                    : tf.IsDCI()  ? 1.0f / 2.6f
    322                                  : 1.0);
    323   return true;
    324 }
    325 
    326 }  // namespace jxl
    327 #endif  // HWY_ONCE