libjxl

FORK: libjxl patches used on blog
git clone https://git.neptards.moe/blog/libjxl.git
Log | Files | Refs | Submodules | README | LICENSE

image_metadata.h (16018B)


      1 // Copyright (c) the JPEG XL Project Authors. All rights reserved.
      2 //
      3 // Use of this source code is governed by a BSD-style
      4 // license that can be found in the LICENSE file.
      5 
      6 // Main codestream header bundles, the metadata that applies to all frames.
      7 // Enums must align with the C API definitions in codestream_header.h.
      8 
      9 #ifndef LIB_JXL_IMAGE_METADATA_H_
     10 #define LIB_JXL_IMAGE_METADATA_H_
     11 
     12 #include <jxl/codestream_header.h>
     13 #include <stddef.h>
     14 #include <stdint.h>
     15 
     16 #include <string>
     17 #include <vector>
     18 
     19 #include "lib/jxl/base/compiler_specific.h"
     20 #include "lib/jxl/base/status.h"
     21 #include "lib/jxl/color_encoding_internal.h"
     22 #include "lib/jxl/dec_bit_reader.h"
     23 #include "lib/jxl/field_encodings.h"
     24 #include "lib/jxl/fields.h"
     25 #include "lib/jxl/headers.h"
     26 
     27 namespace jxl {
     28 
     29 struct AuxOut;
     30 
     31 // EXIF orientation of the image. This field overrides any field present in
     32 // actual EXIF metadata. The value tells which transformation the decoder must
     33 // apply after decoding to display the image with the correct orientation.
     34 enum class Orientation : uint32_t {
     35   // Values 1..8 match the EXIF definitions.
     36   kIdentity = JXL_ORIENT_IDENTITY,
     37   kFlipHorizontal = JXL_ORIENT_FLIP_HORIZONTAL,
     38   kRotate180 = JXL_ORIENT_ROTATE_180,
     39   kFlipVertical = JXL_ORIENT_FLIP_VERTICAL,
     40   kTranspose = JXL_ORIENT_TRANSPOSE,
     41   kRotate90 = JXL_ORIENT_ROTATE_90_CW,
     42   kAntiTranspose = JXL_ORIENT_ANTI_TRANSPOSE,
     43   kRotate270 = JXL_ORIENT_ROTATE_90_CCW,
     44 };
     45 // Don't need an EnumBits because Orientation is not read via Enum().
     46 
     47 enum class ExtraChannel : uint32_t {
     48   // First two enumerators (most common) are cheaper to encode
     49   kAlpha = JXL_CHANNEL_ALPHA,
     50   kDepth = JXL_CHANNEL_DEPTH,
     51 
     52   kSpotColor = JXL_CHANNEL_SPOT_COLOR,
     53   kSelectionMask = JXL_CHANNEL_SELECTION_MASK,
     54   kBlack = JXL_CHANNEL_BLACK,  // for CMYK
     55   kCFA = JXL_CHANNEL_CFA,      // Bayer channel
     56   kThermal = JXL_CHANNEL_THERMAL,
     57   kReserved0 = JXL_CHANNEL_RESERVED0,
     58   kReserved1 = JXL_CHANNEL_RESERVED1,
     59   kReserved2 = JXL_CHANNEL_RESERVED2,
     60   kReserved3 = JXL_CHANNEL_RESERVED3,
     61   kReserved4 = JXL_CHANNEL_RESERVED4,
     62   kReserved5 = JXL_CHANNEL_RESERVED5,
     63   kReserved6 = JXL_CHANNEL_RESERVED6,
     64   kReserved7 = JXL_CHANNEL_RESERVED7,
     65   // disambiguated via name string, raise warning if unsupported
     66   kUnknown = JXL_CHANNEL_UNKNOWN,
     67   // like kUnknown but can silently be ignored
     68   kOptional = JXL_CHANNEL_OPTIONAL
     69 };
     70 static inline const char* EnumName(ExtraChannel /*unused*/) {
     71   return "ExtraChannel";
     72 }
     73 static inline constexpr uint64_t EnumBits(ExtraChannel /*unused*/) {
     74   using EC = ExtraChannel;
     75   return MakeBit(EC::kAlpha) | MakeBit(EC::kDepth) | MakeBit(EC::kSpotColor) |
     76          MakeBit(EC::kSelectionMask) | MakeBit(EC::kBlack) | MakeBit(EC::kCFA) |
     77          MakeBit(EC::kThermal) | MakeBit(EC::kUnknown) | MakeBit(EC::kOptional);
     78 }
     79 
     80 // Used in ImageMetadata and ExtraChannelInfo.
     81 struct BitDepth : public Fields {
     82   BitDepth();
     83   JXL_FIELDS_NAME(BitDepth)
     84 
     85   Status VisitFields(Visitor* JXL_RESTRICT visitor) override;
     86 
     87   std::string DebugString() const;
     88 
     89   // Whether the original (uncompressed) samples are floating point or
     90   // unsigned integer.
     91   bool floating_point_sample;
     92 
     93   // Bit depth of the original (uncompressed) image samples. Must be in the
     94   // range [1, 32].
     95   uint32_t bits_per_sample;
     96 
     97   // Floating point exponent bits of the original (uncompressed) image samples,
     98   // only used if floating_point_sample is true.
     99   // If used, the samples are floating point with:
    100   // - 1 sign bit
    101   // - exponent_bits_per_sample exponent bits
    102   // - (bits_per_sample - exponent_bits_per_sample - 1) mantissa bits
    103   // If used, exponent_bits_per_sample must be in the range
    104   // [2, 8] and amount of mantissa bits must be in the range [2, 23].
    105   // NOTE: exponent_bits_per_sample is 8 for single precision binary32
    106   // point, 5 for half precision binary16, 7 for fp24.
    107   uint32_t exponent_bits_per_sample;
    108 };
    109 
    110 // Describes one extra channel.
    111 struct ExtraChannelInfo : public Fields {
    112   ExtraChannelInfo();
    113   JXL_FIELDS_NAME(ExtraChannelInfo)
    114 
    115   Status VisitFields(Visitor* JXL_RESTRICT visitor) override;
    116 
    117   std::string DebugString() const;
    118 
    119   mutable bool all_default;
    120 
    121   ExtraChannel type;
    122   BitDepth bit_depth;
    123   uint32_t dim_shift;  // downsampled by 2^dim_shift on each axis
    124 
    125   std::string name;  // UTF-8
    126 
    127   // Conditional:
    128   bool alpha_associated;  // i.e. premultiplied
    129   float spot_color[4];    // spot color in linear RGBA
    130   uint32_t cfa_channel;
    131 };
    132 
    133 struct OpsinInverseMatrix : public Fields {
    134   OpsinInverseMatrix();
    135   JXL_FIELDS_NAME(OpsinInverseMatrix)
    136 
    137   Status VisitFields(Visitor* JXL_RESTRICT visitor) override;
    138 
    139   mutable bool all_default;
    140 
    141   float inverse_matrix[9];
    142   float opsin_biases[3];
    143   float quant_biases[4];
    144 };
    145 
    146 // Information useful for mapping HDR images to lower dynamic range displays.
    147 struct ToneMapping : public Fields {
    148   ToneMapping();
    149   JXL_FIELDS_NAME(ToneMapping)
    150 
    151   Status VisitFields(Visitor* JXL_RESTRICT visitor) override;
    152 
    153   mutable bool all_default;
    154 
    155   // Upper bound on the intensity level present in the image. For unsigned
    156   // integer pixel encodings, this is the brightness of the largest
    157   // representable value. The image does not necessarily contain a pixel
    158   // actually this bright. An encoder is allowed to set 255 for SDR images
    159   // without computing a histogram.
    160   float intensity_target;  // [nits]
    161 
    162   // Lower bound on the intensity level present in the image. This may be
    163   // loose, i.e. lower than the actual darkest pixel. When tone mapping, a
    164   // decoder will map [min_nits, intensity_target] to the display range.
    165   float min_nits;
    166 
    167   bool relative_to_max_display;  // see below
    168   // The tone mapping will leave unchanged (linear mapping) any pixels whose
    169   // brightness is strictly below this. The interpretation depends on
    170   // relative_to_max_display. If true, this is a ratio [0, 1] of the maximum
    171   // display brightness [nits], otherwise an absolute brightness [nits].
    172   float linear_below;
    173 };
    174 
    175 // Contains weights to customize some transforms - in particular, XYB and
    176 // upsampling.
    177 struct CustomTransformData : public Fields {
    178   CustomTransformData();
    179   JXL_FIELDS_NAME(CustomTransformData)
    180 
    181   Status VisitFields(Visitor* JXL_RESTRICT visitor) override;
    182 
    183   // Must be set before calling VisitFields. Must equal xyb_encoded of
    184   // ImageMetadata, should be set by ImageMetadata during VisitFields.
    185   bool nonserialized_xyb_encoded = false;
    186 
    187   mutable bool all_default;
    188 
    189   OpsinInverseMatrix opsin_inverse_matrix;
    190 
    191   uint32_t custom_weights_mask;
    192   float upsampling2_weights[15];
    193   float upsampling4_weights[55];
    194   float upsampling8_weights[210];
    195 };
    196 
    197 // Properties of the original image bundle. This enables Encode(Decode()) to
    198 // re-create an equivalent image without user input.
    199 struct ImageMetadata : public Fields {
    200   ImageMetadata();
    201   JXL_FIELDS_NAME(ImageMetadata)
    202 
    203   Status VisitFields(Visitor* JXL_RESTRICT visitor) override;
    204 
    205   // Returns bit depth of the JPEG XL compressed alpha channel, or 0 if no alpha
    206   // channel present. In the theoretical case that there are multiple alpha
    207   // channels, returns the bit depth of the first.
    208   uint32_t GetAlphaBits() const {
    209     const ExtraChannelInfo* alpha = Find(ExtraChannel::kAlpha);
    210     if (alpha == nullptr) return 0;
    211     JXL_ASSERT(alpha->bit_depth.bits_per_sample != 0);
    212     return alpha->bit_depth.bits_per_sample;
    213   }
    214 
    215   // Sets bit depth of alpha channel, adding extra channel if needed, or
    216   // removing all alpha channels if bits is 0.
    217   // Assumes integer alpha channel and not designed to support multiple
    218   // alpha channels (it's possible to use those features by manipulating
    219   // extra_channel_info directly).
    220   //
    221   // Callers must insert the actual channel image at the same index before any
    222   // further modifications to extra_channel_info.
    223   void SetAlphaBits(uint32_t bits, bool alpha_is_premultiplied = false);
    224 
    225   bool HasAlpha() const { return GetAlphaBits() != 0; }
    226 
    227   // Sets the original bit depth fields to indicate unsigned integer of the
    228   // given bit depth.
    229   // TODO(lode): move function to BitDepth
    230   void SetUintSamples(uint32_t bits) {
    231     bit_depth.bits_per_sample = bits;
    232     bit_depth.exponent_bits_per_sample = 0;
    233     bit_depth.floating_point_sample = false;
    234     // RCT / Squeeze may add one bit each, and this is about int16_t,
    235     // so uint13 should still be OK but limiting it to 12 seems safer.
    236     // TODO(jon): figure out a better way to set this header field.
    237     // (in particular, if modular mode is not used it doesn't matter,
    238     // and if transforms are restricted, up to 15-bit could be done)
    239     if (bits > 12) modular_16_bit_buffer_sufficient = false;
    240   }
    241   // Sets the original bit depth fields to indicate single precision floating
    242   // point.
    243   // TODO(lode): move function to BitDepth
    244   void SetFloat32Samples() {
    245     bit_depth.bits_per_sample = 32;
    246     bit_depth.exponent_bits_per_sample = 8;
    247     bit_depth.floating_point_sample = true;
    248     modular_16_bit_buffer_sufficient = false;
    249   }
    250 
    251   void SetFloat16Samples() {
    252     bit_depth.bits_per_sample = 16;
    253     bit_depth.exponent_bits_per_sample = 5;
    254     bit_depth.floating_point_sample = true;
    255     modular_16_bit_buffer_sufficient = false;
    256   }
    257 
    258   void SetIntensityTarget(float intensity_target) {
    259     tone_mapping.intensity_target = intensity_target;
    260   }
    261   float IntensityTarget() const {
    262     JXL_ASSERT(tone_mapping.intensity_target != 0);
    263     return tone_mapping.intensity_target;
    264   }
    265 
    266   // Returns first ExtraChannelInfo of the given type, or nullptr if none.
    267   const ExtraChannelInfo* Find(ExtraChannel type) const {
    268     for (const ExtraChannelInfo& eci : extra_channel_info) {
    269       if (eci.type == type) return &eci;
    270     }
    271     return nullptr;
    272   }
    273 
    274   // Returns first ExtraChannelInfo of the given type, or nullptr if none.
    275   ExtraChannelInfo* Find(ExtraChannel type) {
    276     for (ExtraChannelInfo& eci : extra_channel_info) {
    277       if (eci.type == type) return &eci;
    278     }
    279     return nullptr;
    280   }
    281 
    282   Orientation GetOrientation() const {
    283     return static_cast<Orientation>(orientation);
    284   }
    285 
    286   bool ExtraFieldsDefault() const;
    287 
    288   std::string DebugString() const;
    289 
    290   mutable bool all_default;
    291 
    292   BitDepth bit_depth;
    293   bool modular_16_bit_buffer_sufficient;  // otherwise 32 is.
    294 
    295   // Whether the colors values of the pixels of frames are encoded in the
    296   // codestream using the absolute XYB color space, or the using values that
    297   // follow the color space defined by the ColorEncoding or ICC profile. This
    298   // determines when or whether a CMS (Color Management System) is needed to get
    299   // the pixels in a desired color space. In one case, the pixels have one known
    300   // color space and a CMS is needed to convert them to the original image's
    301   // color space, in the other case the pixels have the color space of the
    302   // original image and a CMS is required if a different display space, or a
    303   // single known consistent color space for multiple decoded images, is
    304   // desired. In all cases, the color space of all frames from a single image is
    305   // the same, both VarDCT and modular frames.
    306   //
    307   // If true: then frames can be decoded to XYB (which can also be converted to
    308   // linear and non-linear sRGB with the built in conversion without CMS). The
    309   // attached ColorEncoding or ICC profile has no effect on the meaning of the
    310   // pixel's color values, but instead indicates what the color profile of the
    311   // original image was, and what color profile one should convert to when
    312   // decoding to integers to prevent clipping and precision loss. To do that
    313   // conversion requires a CMS.
    314   //
    315   // If false: then the color values of decoded frames are in the space defined
    316   // by the attached ColorEncoding or ICC profile. To instead get the pixels in
    317   // a chosen known color space, such as sRGB, requires a CMS, since the
    318   // attached ColorEncoding or ICC profile could be any arbitrary color space.
    319   // This mode is typically used for lossless images encoded as integers.
    320   // Frames can also use YCbCr encoding, some frames may and some may not, but
    321   // this is not a different color space but a certain encoding of the RGB
    322   // values.
    323   //
    324   // Note: if !xyb_encoded, but the attached color profile indicates XYB (which
    325   // can happen either if it's a ColorEncoding with color_space_ ==
    326   // ColorSpace::kXYB, or if it's an ICC Profile that has been crafted to
    327   // represent XYB), then the frames still may not use ColorEncoding kXYB, they
    328   // must still use kNone (or kYCbCr, which would mean applying the YCbCr
    329   // transform to the 3-channel XYB data), since with !xyb_encoded, the 3
    330   // channels are stored as-is, no matter what meaning the color profile assigns
    331   // to them. To use ColorSpace::kXYB, xyb_encoded must be true.
    332   //
    333   // This value is defined in image metadata because this is the global
    334   // codestream header. This value does not affect the image itself, so is not
    335   // image metadata per se, it only affects the encoding, and what color space
    336   // the decoder can receive the pixels in without needing a CMS.
    337   bool xyb_encoded;
    338 
    339   ColorEncoding color_encoding;
    340 
    341   // These values are initialized to defaults such that the 'extra_fields'
    342   // condition in VisitFields uses correctly initialized values.
    343   uint32_t orientation = 1;
    344   bool have_preview = false;
    345   bool have_animation = false;
    346   bool have_intrinsic_size = false;
    347 
    348   // If present, the stored image has the dimensions of the first SizeHeader,
    349   // but decoders are advised to resample or display per `intrinsic_size`.
    350   SizeHeader intrinsic_size;  // only if have_intrinsic_size
    351 
    352   ToneMapping tone_mapping;
    353 
    354   // When reading: deserialized. When writing: automatically set from vector.
    355   uint32_t num_extra_channels;
    356   std::vector<ExtraChannelInfo> extra_channel_info;
    357 
    358   // Only present if m.have_preview.
    359   PreviewHeader preview_size;
    360   // Only present if m.have_animation.
    361   AnimationHeader animation;
    362 
    363   uint64_t extensions;
    364 
    365   // Option to stop parsing after basic info, and treat as if the later
    366   // fields do not participate. Use to parse only basic image information
    367   // excluding the final larger or variable sized data.
    368   bool nonserialized_only_parse_basic_info = false;
    369 };
    370 
    371 Status ReadImageMetadata(BitReader* JXL_RESTRICT reader,
    372                          ImageMetadata* JXL_RESTRICT metadata);
    373 
    374 Status WriteImageMetadata(const ImageMetadata& metadata,
    375                           BitWriter* JXL_RESTRICT writer, size_t layer,
    376                           AuxOut* aux_out);
    377 
    378 // All metadata applicable to the entire codestream (dimensions, extra channels,
    379 // ...)
    380 struct CodecMetadata {
    381   // TODO(lode): use the preview and animation fields too, in place of the
    382   // nonserialized_ ones in ImageMetadata.
    383   ImageMetadata m;
    384   // The size of the codestream: this is the nominal size applicable to all
    385   // frames, although some frames can have a different effective size through
    386   // crop, dc_level or representing a the preview.
    387   SizeHeader size;
    388   // Often default.
    389   CustomTransformData transform_data;
    390 
    391   size_t xsize() const { return size.xsize(); }
    392   size_t ysize() const { return size.ysize(); }
    393   size_t oriented_xsize(bool keep_orientation) const {
    394     if (static_cast<uint32_t>(m.GetOrientation()) > 4 && !keep_orientation) {
    395       return ysize();
    396     } else {
    397       return xsize();
    398     }
    399   }
    400   size_t oriented_preview_xsize(bool keep_orientation) const {
    401     if (static_cast<uint32_t>(m.GetOrientation()) > 4 && !keep_orientation) {
    402       return m.preview_size.ysize();
    403     } else {
    404       return m.preview_size.xsize();
    405     }
    406   }
    407   size_t oriented_ysize(bool keep_orientation) const {
    408     if (static_cast<uint32_t>(m.GetOrientation()) > 4 && !keep_orientation) {
    409       return xsize();
    410     } else {
    411       return ysize();
    412     }
    413   }
    414   size_t oriented_preview_ysize(bool keep_orientation) const {
    415     if (static_cast<uint32_t>(m.GetOrientation()) > 4 && !keep_orientation) {
    416       return m.preview_size.xsize();
    417     } else {
    418       return m.preview_size.ysize();
    419     }
    420   }
    421 
    422   std::string DebugString() const;
    423 };
    424 
    425 }  // namespace jxl
    426 
    427 #endif  // LIB_JXL_IMAGE_METADATA_H_