image_metadata.h (16018B)
1 // Copyright (c) the JPEG XL Project Authors. All rights reserved. 2 // 3 // Use of this source code is governed by a BSD-style 4 // license that can be found in the LICENSE file. 5 6 // Main codestream header bundles, the metadata that applies to all frames. 7 // Enums must align with the C API definitions in codestream_header.h. 8 9 #ifndef LIB_JXL_IMAGE_METADATA_H_ 10 #define LIB_JXL_IMAGE_METADATA_H_ 11 12 #include <jxl/codestream_header.h> 13 #include <stddef.h> 14 #include <stdint.h> 15 16 #include <string> 17 #include <vector> 18 19 #include "lib/jxl/base/compiler_specific.h" 20 #include "lib/jxl/base/status.h" 21 #include "lib/jxl/color_encoding_internal.h" 22 #include "lib/jxl/dec_bit_reader.h" 23 #include "lib/jxl/field_encodings.h" 24 #include "lib/jxl/fields.h" 25 #include "lib/jxl/headers.h" 26 27 namespace jxl { 28 29 struct AuxOut; 30 31 // EXIF orientation of the image. This field overrides any field present in 32 // actual EXIF metadata. The value tells which transformation the decoder must 33 // apply after decoding to display the image with the correct orientation. 34 enum class Orientation : uint32_t { 35 // Values 1..8 match the EXIF definitions. 36 kIdentity = JXL_ORIENT_IDENTITY, 37 kFlipHorizontal = JXL_ORIENT_FLIP_HORIZONTAL, 38 kRotate180 = JXL_ORIENT_ROTATE_180, 39 kFlipVertical = JXL_ORIENT_FLIP_VERTICAL, 40 kTranspose = JXL_ORIENT_TRANSPOSE, 41 kRotate90 = JXL_ORIENT_ROTATE_90_CW, 42 kAntiTranspose = JXL_ORIENT_ANTI_TRANSPOSE, 43 kRotate270 = JXL_ORIENT_ROTATE_90_CCW, 44 }; 45 // Don't need an EnumBits because Orientation is not read via Enum(). 46 47 enum class ExtraChannel : uint32_t { 48 // First two enumerators (most common) are cheaper to encode 49 kAlpha = JXL_CHANNEL_ALPHA, 50 kDepth = JXL_CHANNEL_DEPTH, 51 52 kSpotColor = JXL_CHANNEL_SPOT_COLOR, 53 kSelectionMask = JXL_CHANNEL_SELECTION_MASK, 54 kBlack = JXL_CHANNEL_BLACK, // for CMYK 55 kCFA = JXL_CHANNEL_CFA, // Bayer channel 56 kThermal = JXL_CHANNEL_THERMAL, 57 kReserved0 = JXL_CHANNEL_RESERVED0, 58 kReserved1 = JXL_CHANNEL_RESERVED1, 59 kReserved2 = JXL_CHANNEL_RESERVED2, 60 kReserved3 = JXL_CHANNEL_RESERVED3, 61 kReserved4 = JXL_CHANNEL_RESERVED4, 62 kReserved5 = JXL_CHANNEL_RESERVED5, 63 kReserved6 = JXL_CHANNEL_RESERVED6, 64 kReserved7 = JXL_CHANNEL_RESERVED7, 65 // disambiguated via name string, raise warning if unsupported 66 kUnknown = JXL_CHANNEL_UNKNOWN, 67 // like kUnknown but can silently be ignored 68 kOptional = JXL_CHANNEL_OPTIONAL 69 }; 70 static inline const char* EnumName(ExtraChannel /*unused*/) { 71 return "ExtraChannel"; 72 } 73 static inline constexpr uint64_t EnumBits(ExtraChannel /*unused*/) { 74 using EC = ExtraChannel; 75 return MakeBit(EC::kAlpha) | MakeBit(EC::kDepth) | MakeBit(EC::kSpotColor) | 76 MakeBit(EC::kSelectionMask) | MakeBit(EC::kBlack) | MakeBit(EC::kCFA) | 77 MakeBit(EC::kThermal) | MakeBit(EC::kUnknown) | MakeBit(EC::kOptional); 78 } 79 80 // Used in ImageMetadata and ExtraChannelInfo. 81 struct BitDepth : public Fields { 82 BitDepth(); 83 JXL_FIELDS_NAME(BitDepth) 84 85 Status VisitFields(Visitor* JXL_RESTRICT visitor) override; 86 87 std::string DebugString() const; 88 89 // Whether the original (uncompressed) samples are floating point or 90 // unsigned integer. 91 bool floating_point_sample; 92 93 // Bit depth of the original (uncompressed) image samples. Must be in the 94 // range [1, 32]. 95 uint32_t bits_per_sample; 96 97 // Floating point exponent bits of the original (uncompressed) image samples, 98 // only used if floating_point_sample is true. 99 // If used, the samples are floating point with: 100 // - 1 sign bit 101 // - exponent_bits_per_sample exponent bits 102 // - (bits_per_sample - exponent_bits_per_sample - 1) mantissa bits 103 // If used, exponent_bits_per_sample must be in the range 104 // [2, 8] and amount of mantissa bits must be in the range [2, 23]. 105 // NOTE: exponent_bits_per_sample is 8 for single precision binary32 106 // point, 5 for half precision binary16, 7 for fp24. 107 uint32_t exponent_bits_per_sample; 108 }; 109 110 // Describes one extra channel. 111 struct ExtraChannelInfo : public Fields { 112 ExtraChannelInfo(); 113 JXL_FIELDS_NAME(ExtraChannelInfo) 114 115 Status VisitFields(Visitor* JXL_RESTRICT visitor) override; 116 117 std::string DebugString() const; 118 119 mutable bool all_default; 120 121 ExtraChannel type; 122 BitDepth bit_depth; 123 uint32_t dim_shift; // downsampled by 2^dim_shift on each axis 124 125 std::string name; // UTF-8 126 127 // Conditional: 128 bool alpha_associated; // i.e. premultiplied 129 float spot_color[4]; // spot color in linear RGBA 130 uint32_t cfa_channel; 131 }; 132 133 struct OpsinInverseMatrix : public Fields { 134 OpsinInverseMatrix(); 135 JXL_FIELDS_NAME(OpsinInverseMatrix) 136 137 Status VisitFields(Visitor* JXL_RESTRICT visitor) override; 138 139 mutable bool all_default; 140 141 float inverse_matrix[9]; 142 float opsin_biases[3]; 143 float quant_biases[4]; 144 }; 145 146 // Information useful for mapping HDR images to lower dynamic range displays. 147 struct ToneMapping : public Fields { 148 ToneMapping(); 149 JXL_FIELDS_NAME(ToneMapping) 150 151 Status VisitFields(Visitor* JXL_RESTRICT visitor) override; 152 153 mutable bool all_default; 154 155 // Upper bound on the intensity level present in the image. For unsigned 156 // integer pixel encodings, this is the brightness of the largest 157 // representable value. The image does not necessarily contain a pixel 158 // actually this bright. An encoder is allowed to set 255 for SDR images 159 // without computing a histogram. 160 float intensity_target; // [nits] 161 162 // Lower bound on the intensity level present in the image. This may be 163 // loose, i.e. lower than the actual darkest pixel. When tone mapping, a 164 // decoder will map [min_nits, intensity_target] to the display range. 165 float min_nits; 166 167 bool relative_to_max_display; // see below 168 // The tone mapping will leave unchanged (linear mapping) any pixels whose 169 // brightness is strictly below this. The interpretation depends on 170 // relative_to_max_display. If true, this is a ratio [0, 1] of the maximum 171 // display brightness [nits], otherwise an absolute brightness [nits]. 172 float linear_below; 173 }; 174 175 // Contains weights to customize some transforms - in particular, XYB and 176 // upsampling. 177 struct CustomTransformData : public Fields { 178 CustomTransformData(); 179 JXL_FIELDS_NAME(CustomTransformData) 180 181 Status VisitFields(Visitor* JXL_RESTRICT visitor) override; 182 183 // Must be set before calling VisitFields. Must equal xyb_encoded of 184 // ImageMetadata, should be set by ImageMetadata during VisitFields. 185 bool nonserialized_xyb_encoded = false; 186 187 mutable bool all_default; 188 189 OpsinInverseMatrix opsin_inverse_matrix; 190 191 uint32_t custom_weights_mask; 192 float upsampling2_weights[15]; 193 float upsampling4_weights[55]; 194 float upsampling8_weights[210]; 195 }; 196 197 // Properties of the original image bundle. This enables Encode(Decode()) to 198 // re-create an equivalent image without user input. 199 struct ImageMetadata : public Fields { 200 ImageMetadata(); 201 JXL_FIELDS_NAME(ImageMetadata) 202 203 Status VisitFields(Visitor* JXL_RESTRICT visitor) override; 204 205 // Returns bit depth of the JPEG XL compressed alpha channel, or 0 if no alpha 206 // channel present. In the theoretical case that there are multiple alpha 207 // channels, returns the bit depth of the first. 208 uint32_t GetAlphaBits() const { 209 const ExtraChannelInfo* alpha = Find(ExtraChannel::kAlpha); 210 if (alpha == nullptr) return 0; 211 JXL_ASSERT(alpha->bit_depth.bits_per_sample != 0); 212 return alpha->bit_depth.bits_per_sample; 213 } 214 215 // Sets bit depth of alpha channel, adding extra channel if needed, or 216 // removing all alpha channels if bits is 0. 217 // Assumes integer alpha channel and not designed to support multiple 218 // alpha channels (it's possible to use those features by manipulating 219 // extra_channel_info directly). 220 // 221 // Callers must insert the actual channel image at the same index before any 222 // further modifications to extra_channel_info. 223 void SetAlphaBits(uint32_t bits, bool alpha_is_premultiplied = false); 224 225 bool HasAlpha() const { return GetAlphaBits() != 0; } 226 227 // Sets the original bit depth fields to indicate unsigned integer of the 228 // given bit depth. 229 // TODO(lode): move function to BitDepth 230 void SetUintSamples(uint32_t bits) { 231 bit_depth.bits_per_sample = bits; 232 bit_depth.exponent_bits_per_sample = 0; 233 bit_depth.floating_point_sample = false; 234 // RCT / Squeeze may add one bit each, and this is about int16_t, 235 // so uint13 should still be OK but limiting it to 12 seems safer. 236 // TODO(jon): figure out a better way to set this header field. 237 // (in particular, if modular mode is not used it doesn't matter, 238 // and if transforms are restricted, up to 15-bit could be done) 239 if (bits > 12) modular_16_bit_buffer_sufficient = false; 240 } 241 // Sets the original bit depth fields to indicate single precision floating 242 // point. 243 // TODO(lode): move function to BitDepth 244 void SetFloat32Samples() { 245 bit_depth.bits_per_sample = 32; 246 bit_depth.exponent_bits_per_sample = 8; 247 bit_depth.floating_point_sample = true; 248 modular_16_bit_buffer_sufficient = false; 249 } 250 251 void SetFloat16Samples() { 252 bit_depth.bits_per_sample = 16; 253 bit_depth.exponent_bits_per_sample = 5; 254 bit_depth.floating_point_sample = true; 255 modular_16_bit_buffer_sufficient = false; 256 } 257 258 void SetIntensityTarget(float intensity_target) { 259 tone_mapping.intensity_target = intensity_target; 260 } 261 float IntensityTarget() const { 262 JXL_ASSERT(tone_mapping.intensity_target != 0); 263 return tone_mapping.intensity_target; 264 } 265 266 // Returns first ExtraChannelInfo of the given type, or nullptr if none. 267 const ExtraChannelInfo* Find(ExtraChannel type) const { 268 for (const ExtraChannelInfo& eci : extra_channel_info) { 269 if (eci.type == type) return &eci; 270 } 271 return nullptr; 272 } 273 274 // Returns first ExtraChannelInfo of the given type, or nullptr if none. 275 ExtraChannelInfo* Find(ExtraChannel type) { 276 for (ExtraChannelInfo& eci : extra_channel_info) { 277 if (eci.type == type) return &eci; 278 } 279 return nullptr; 280 } 281 282 Orientation GetOrientation() const { 283 return static_cast<Orientation>(orientation); 284 } 285 286 bool ExtraFieldsDefault() const; 287 288 std::string DebugString() const; 289 290 mutable bool all_default; 291 292 BitDepth bit_depth; 293 bool modular_16_bit_buffer_sufficient; // otherwise 32 is. 294 295 // Whether the colors values of the pixels of frames are encoded in the 296 // codestream using the absolute XYB color space, or the using values that 297 // follow the color space defined by the ColorEncoding or ICC profile. This 298 // determines when or whether a CMS (Color Management System) is needed to get 299 // the pixels in a desired color space. In one case, the pixels have one known 300 // color space and a CMS is needed to convert them to the original image's 301 // color space, in the other case the pixels have the color space of the 302 // original image and a CMS is required if a different display space, or a 303 // single known consistent color space for multiple decoded images, is 304 // desired. In all cases, the color space of all frames from a single image is 305 // the same, both VarDCT and modular frames. 306 // 307 // If true: then frames can be decoded to XYB (which can also be converted to 308 // linear and non-linear sRGB with the built in conversion without CMS). The 309 // attached ColorEncoding or ICC profile has no effect on the meaning of the 310 // pixel's color values, but instead indicates what the color profile of the 311 // original image was, and what color profile one should convert to when 312 // decoding to integers to prevent clipping and precision loss. To do that 313 // conversion requires a CMS. 314 // 315 // If false: then the color values of decoded frames are in the space defined 316 // by the attached ColorEncoding or ICC profile. To instead get the pixels in 317 // a chosen known color space, such as sRGB, requires a CMS, since the 318 // attached ColorEncoding or ICC profile could be any arbitrary color space. 319 // This mode is typically used for lossless images encoded as integers. 320 // Frames can also use YCbCr encoding, some frames may and some may not, but 321 // this is not a different color space but a certain encoding of the RGB 322 // values. 323 // 324 // Note: if !xyb_encoded, but the attached color profile indicates XYB (which 325 // can happen either if it's a ColorEncoding with color_space_ == 326 // ColorSpace::kXYB, or if it's an ICC Profile that has been crafted to 327 // represent XYB), then the frames still may not use ColorEncoding kXYB, they 328 // must still use kNone (or kYCbCr, which would mean applying the YCbCr 329 // transform to the 3-channel XYB data), since with !xyb_encoded, the 3 330 // channels are stored as-is, no matter what meaning the color profile assigns 331 // to them. To use ColorSpace::kXYB, xyb_encoded must be true. 332 // 333 // This value is defined in image metadata because this is the global 334 // codestream header. This value does not affect the image itself, so is not 335 // image metadata per se, it only affects the encoding, and what color space 336 // the decoder can receive the pixels in without needing a CMS. 337 bool xyb_encoded; 338 339 ColorEncoding color_encoding; 340 341 // These values are initialized to defaults such that the 'extra_fields' 342 // condition in VisitFields uses correctly initialized values. 343 uint32_t orientation = 1; 344 bool have_preview = false; 345 bool have_animation = false; 346 bool have_intrinsic_size = false; 347 348 // If present, the stored image has the dimensions of the first SizeHeader, 349 // but decoders are advised to resample or display per `intrinsic_size`. 350 SizeHeader intrinsic_size; // only if have_intrinsic_size 351 352 ToneMapping tone_mapping; 353 354 // When reading: deserialized. When writing: automatically set from vector. 355 uint32_t num_extra_channels; 356 std::vector<ExtraChannelInfo> extra_channel_info; 357 358 // Only present if m.have_preview. 359 PreviewHeader preview_size; 360 // Only present if m.have_animation. 361 AnimationHeader animation; 362 363 uint64_t extensions; 364 365 // Option to stop parsing after basic info, and treat as if the later 366 // fields do not participate. Use to parse only basic image information 367 // excluding the final larger or variable sized data. 368 bool nonserialized_only_parse_basic_info = false; 369 }; 370 371 Status ReadImageMetadata(BitReader* JXL_RESTRICT reader, 372 ImageMetadata* JXL_RESTRICT metadata); 373 374 Status WriteImageMetadata(const ImageMetadata& metadata, 375 BitWriter* JXL_RESTRICT writer, size_t layer, 376 AuxOut* aux_out); 377 378 // All metadata applicable to the entire codestream (dimensions, extra channels, 379 // ...) 380 struct CodecMetadata { 381 // TODO(lode): use the preview and animation fields too, in place of the 382 // nonserialized_ ones in ImageMetadata. 383 ImageMetadata m; 384 // The size of the codestream: this is the nominal size applicable to all 385 // frames, although some frames can have a different effective size through 386 // crop, dc_level or representing a the preview. 387 SizeHeader size; 388 // Often default. 389 CustomTransformData transform_data; 390 391 size_t xsize() const { return size.xsize(); } 392 size_t ysize() const { return size.ysize(); } 393 size_t oriented_xsize(bool keep_orientation) const { 394 if (static_cast<uint32_t>(m.GetOrientation()) > 4 && !keep_orientation) { 395 return ysize(); 396 } else { 397 return xsize(); 398 } 399 } 400 size_t oriented_preview_xsize(bool keep_orientation) const { 401 if (static_cast<uint32_t>(m.GetOrientation()) > 4 && !keep_orientation) { 402 return m.preview_size.ysize(); 403 } else { 404 return m.preview_size.xsize(); 405 } 406 } 407 size_t oriented_ysize(bool keep_orientation) const { 408 if (static_cast<uint32_t>(m.GetOrientation()) > 4 && !keep_orientation) { 409 return xsize(); 410 } else { 411 return ysize(); 412 } 413 } 414 size_t oriented_preview_ysize(bool keep_orientation) const { 415 if (static_cast<uint32_t>(m.GetOrientation()) > 4 && !keep_orientation) { 416 return m.preview_size.xsize(); 417 } else { 418 return m.preview_size.ysize(); 419 } 420 } 421 422 std::string DebugString() const; 423 }; 424 425 } // namespace jxl 426 427 #endif // LIB_JXL_IMAGE_METADATA_H_