iamf.h (21054B)
1 /* 2 * Immersive Audio Model and Formats helper functions and defines 3 * 4 * This file is part of FFmpeg. 5 * 6 * FFmpeg is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * FFmpeg is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with FFmpeg; if not, write to the Free Software 18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 19 */ 20 21 #ifndef AVUTIL_IAMF_H 22 #define AVUTIL_IAMF_H 23 24 /** 25 * @file 26 * Immersive Audio Model and Formats API header 27 * @see <a href="https://aomediacodec.github.io/iamf/">Immersive Audio Model and Formats</a> 28 */ 29 30 #include <stdint.h> 31 #include <stddef.h> 32 33 #include "attributes.h" 34 #include "avassert.h" 35 #include "channel_layout.h" 36 #include "dict.h" 37 #include "rational.h" 38 39 /** 40 * @defgroup lavu_iamf_params Parameter Definition 41 * @{ 42 * Parameters as defined in section 3.6.1 and 3.8 of IAMF. 43 * @} 44 * @defgroup lavu_iamf_audio Audio Element 45 * @{ 46 * Audio Elements as defined in section 3.6 of IAMF. 47 * @} 48 * @defgroup lavu_iamf_mix Mix Presentation 49 * @{ 50 * Mix Presentations as defined in section 3.7 of IAMF. 51 * @} 52 * 53 * @} 54 * @addtogroup lavu_iamf_params 55 * @{ 56 */ 57 enum AVIAMFAnimationType { 58 AV_IAMF_ANIMATION_TYPE_STEP, 59 AV_IAMF_ANIMATION_TYPE_LINEAR, 60 AV_IAMF_ANIMATION_TYPE_BEZIER, 61 }; 62 63 /** 64 * Mix Gain Parameter Data as defined in section 3.8.1 of IAMF. 65 * 66 * @note This struct's size is not a part of the public ABI. 67 */ 68 typedef struct AVIAMFMixGain { 69 const AVClass *av_class; 70 71 /** 72 * Duration for the given subblock, in units of 73 * 1 / @ref AVIAMFParamDefinition.parameter_rate "parameter_rate". 74 * It must not be 0. 75 */ 76 unsigned int subblock_duration; 77 /** 78 * The type of animation applied to the parameter values. 79 */ 80 enum AVIAMFAnimationType animation_type; 81 /** 82 * Parameter value that is applied at the start of the subblock. 83 * Applies to all defined Animation Types. 84 * 85 * Valid range of values is -128.0 to 128.0 86 */ 87 AVRational start_point_value; 88 /** 89 * Parameter value that is applied at the end of the subblock. 90 * Applies only to AV_IAMF_ANIMATION_TYPE_LINEAR and 91 * AV_IAMF_ANIMATION_TYPE_BEZIER Animation Types. 92 * 93 * Valid range of values is -128.0 to 128.0 94 */ 95 AVRational end_point_value; 96 /** 97 * Parameter value of the middle control point of a quadratic Bezier 98 * curve, i.e., its y-axis value. 99 * Applies only to AV_IAMF_ANIMATION_TYPE_BEZIER Animation Type. 100 * 101 * Valid range of values is -128.0 to 128.0 102 */ 103 AVRational control_point_value; 104 /** 105 * Parameter value of the time of the middle control point of a 106 * quadratic Bezier curve, i.e., its x-axis value. 107 * Applies only to AV_IAMF_ANIMATION_TYPE_BEZIER Animation Type. 108 * 109 * Valid range of values is 0.0 to 1.0 110 */ 111 AVRational control_point_relative_time; 112 } AVIAMFMixGain; 113 114 /** 115 * Demixing Info Parameter Data as defined in section 3.8.2 of IAMF. 116 * 117 * @note This struct's size is not a part of the public ABI. 118 */ 119 typedef struct AVIAMFDemixingInfo { 120 const AVClass *av_class; 121 122 /** 123 * Duration for the given subblock, in units of 124 * 1 / @ref AVIAMFParamDefinition.parameter_rate "parameter_rate". 125 * It must not be 0. 126 */ 127 unsigned int subblock_duration; 128 /** 129 * Pre-defined combination of demixing parameters. 130 */ 131 unsigned int dmixp_mode; 132 } AVIAMFDemixingInfo; 133 134 /** 135 * Recon Gain Info Parameter Data as defined in section 3.8.3 of IAMF. 136 * 137 * @note This struct's size is not a part of the public ABI. 138 */ 139 typedef struct AVIAMFReconGain { 140 const AVClass *av_class; 141 142 /** 143 * Duration for the given subblock, in units of 144 * 1 / @ref AVIAMFParamDefinition.parameter_rate "parameter_rate". 145 * It must not be 0. 146 */ 147 unsigned int subblock_duration; 148 149 /** 150 * Array of gain values to be applied to each channel for each layer 151 * defined in the Audio Element referencing the parent Parameter Definition. 152 * Values for layers where the AV_IAMF_LAYER_FLAG_RECON_GAIN flag is not set 153 * are undefined. 154 * 155 * Channel order is: FL, C, FR, SL, SR, TFL, TFR, BL, BR, TBL, TBR, LFE 156 */ 157 uint8_t recon_gain[6][12]; 158 } AVIAMFReconGain; 159 160 enum AVIAMFParamDefinitionType { 161 /** 162 * Subblocks are of struct type AVIAMFMixGain 163 */ 164 AV_IAMF_PARAMETER_DEFINITION_MIX_GAIN, 165 /** 166 * Subblocks are of struct type AVIAMFDemixingInfo 167 */ 168 AV_IAMF_PARAMETER_DEFINITION_DEMIXING, 169 /** 170 * Subblocks are of struct type AVIAMFReconGain 171 */ 172 AV_IAMF_PARAMETER_DEFINITION_RECON_GAIN, 173 }; 174 175 /** 176 * Parameters as defined in section 3.6.1 of IAMF. 177 * 178 * The struct is allocated by av_iamf_param_definition_alloc() along with an 179 * array of subblocks, its type depending on the value of type. 180 * This array is placed subblocks_offset bytes after the start of this struct. 181 * 182 * @note This struct's size is not a part of the public ABI. 183 */ 184 typedef struct AVIAMFParamDefinition { 185 const AVClass *av_class; 186 187 /** 188 * Offset in bytes from the start of this struct, at which the subblocks 189 * array is located. 190 */ 191 size_t subblocks_offset; 192 /** 193 * Size in bytes of each element in the subblocks array. 194 */ 195 size_t subblock_size; 196 /** 197 * Number of subblocks in the array. 198 */ 199 unsigned int nb_subblocks; 200 201 /** 202 * Parameters type. Determines the type of the subblock elements. 203 */ 204 enum AVIAMFParamDefinitionType type; 205 206 /** 207 * Identifier for the paremeter substream. 208 */ 209 unsigned int parameter_id; 210 /** 211 * Sample rate for the paremeter substream. It must not be 0. 212 */ 213 unsigned int parameter_rate; 214 215 /** 216 * The accumulated duration of all blocks in this parameter definition, 217 * in units of 1 / @ref parameter_rate. 218 * 219 * May be 0, in which case all duration values should be specified in 220 * another parameter definition referencing the same parameter_id. 221 */ 222 unsigned int duration; 223 /** 224 * The duration of every subblock in the case where all subblocks, with 225 * the optional exception of the last subblock, have equal durations. 226 * 227 * Must be 0 if subblocks have different durations. 228 */ 229 unsigned int constant_subblock_duration; 230 } AVIAMFParamDefinition; 231 232 const AVClass *av_iamf_param_definition_get_class(void); 233 234 /** 235 * Allocates memory for AVIAMFParamDefinition, plus an array of {@code nb_subblocks} 236 * amount of subblocks of the given type and initializes the variables. Can be 237 * freed with a normal av_free() call. 238 * 239 * @param size if non-NULL, the size in bytes of the resulting data array is written here. 240 */ 241 AVIAMFParamDefinition *av_iamf_param_definition_alloc(enum AVIAMFParamDefinitionType type, 242 unsigned int nb_subblocks, size_t *size); 243 244 /** 245 * Get the subblock at the specified {@code idx}. Must be between 0 and nb_subblocks - 1. 246 * 247 * The @ref AVIAMFParamDefinition.type "param definition type" defines 248 * the struct type of the returned pointer. 249 */ 250 static av_always_inline void* 251 av_iamf_param_definition_get_subblock(const AVIAMFParamDefinition *par, unsigned int idx) 252 { 253 av_assert0(idx < par->nb_subblocks); 254 return (void *)((uint8_t *)par + par->subblocks_offset + idx * par->subblock_size); 255 } 256 257 /** 258 * @} 259 * @addtogroup lavu_iamf_audio 260 * @{ 261 */ 262 263 enum AVIAMFAmbisonicsMode { 264 AV_IAMF_AMBISONICS_MODE_MONO, 265 AV_IAMF_AMBISONICS_MODE_PROJECTION, 266 }; 267 268 /** 269 * Recon gain information for the layer is present in AVIAMFReconGain 270 */ 271 #define AV_IAMF_LAYER_FLAG_RECON_GAIN (1 << 0) 272 273 /** 274 * A layer defining a Channel Layout in the Audio Element. 275 * 276 * When @ref AVIAMFAudioElement.audio_element_type "the parent's Audio Element type" 277 * is AV_IAMF_AUDIO_ELEMENT_TYPE_CHANNEL, this corresponds to an Scalable Channel 278 * Layout layer as defined in section 3.6.2 of IAMF. 279 * For AV_IAMF_AUDIO_ELEMENT_TYPE_SCENE, it is an Ambisonics channel 280 * layout as defined in section 3.6.3 of IAMF. 281 * 282 * @note The struct should be allocated with av_iamf_audio_element_add_layer() 283 * and its size is not a part of the public ABI. 284 */ 285 typedef struct AVIAMFLayer { 286 const AVClass *av_class; 287 288 AVChannelLayout ch_layout; 289 290 /** 291 * A bitmask which may contain a combination of AV_IAMF_LAYER_FLAG_* flags. 292 */ 293 unsigned int flags; 294 /** 295 * Output gain channel flags as defined in section 3.6.2 of IAMF. 296 * 297 * This field is defined only if @ref AVIAMFAudioElement.audio_element_type 298 * "the parent's Audio Element type" is AV_IAMF_AUDIO_ELEMENT_TYPE_CHANNEL, 299 * must be 0 otherwise. 300 */ 301 unsigned int output_gain_flags; 302 /** 303 * Output gain as defined in section 3.6.2 of IAMF. 304 * 305 * Must be 0 if @ref output_gain_flags is 0. 306 */ 307 AVRational output_gain; 308 /** 309 * Ambisonics mode as defined in section 3.6.3 of IAMF. 310 * 311 * This field is defined only if @ref AVIAMFAudioElement.audio_element_type 312 * "the parent's Audio Element type" is AV_IAMF_AUDIO_ELEMENT_TYPE_SCENE. 313 * 314 * If AV_IAMF_AMBISONICS_MODE_MONO, channel_mapping is defined implicitly 315 * (Ambisonic Order) or explicitly (Custom Order with ambi channels) in 316 * @ref ch_layout. 317 * If AV_IAMF_AMBISONICS_MODE_PROJECTION, @ref demixing_matrix must be set. 318 */ 319 enum AVIAMFAmbisonicsMode ambisonics_mode; 320 321 /** 322 * Demixing matrix as defined in section 3.6.3 of IAMF. 323 * 324 * The length of the array is ch_layout.nb_channels multiplied by the sum of 325 * the amount of streams in the group plus the amount of streams in the group 326 * that are stereo. 327 * 328 * May be set only if @ref ambisonics_mode == AV_IAMF_AMBISONICS_MODE_PROJECTION, 329 * must be NULL otherwise. 330 */ 331 AVRational *demixing_matrix; 332 } AVIAMFLayer; 333 334 335 enum AVIAMFAudioElementType { 336 AV_IAMF_AUDIO_ELEMENT_TYPE_CHANNEL, 337 AV_IAMF_AUDIO_ELEMENT_TYPE_SCENE, 338 }; 339 340 /** 341 * Information on how to combine one or more audio streams, as defined in 342 * section 3.6 of IAMF. 343 * 344 * @note The struct should be allocated with av_iamf_audio_element_alloc() 345 * and its size is not a part of the public ABI. 346 */ 347 typedef struct AVIAMFAudioElement { 348 const AVClass *av_class; 349 350 AVIAMFLayer **layers; 351 /** 352 * Number of layers, or channel groups, in the Audio Element. 353 * There may be 6 layers at most, and for @ref audio_element_type 354 * AV_IAMF_AUDIO_ELEMENT_TYPE_SCENE, there may be exactly 1. 355 * 356 * Set by av_iamf_audio_element_add_layer(), must not be 357 * modified by any other code. 358 */ 359 unsigned int nb_layers; 360 361 /** 362 * Demixing information used to reconstruct a scalable channel audio 363 * representation. 364 * The @ref AVIAMFParamDefinition.type "type" must be 365 * AV_IAMF_PARAMETER_DEFINITION_DEMIXING. 366 */ 367 AVIAMFParamDefinition *demixing_info; 368 /** 369 * Recon gain information used to reconstruct a scalable channel audio 370 * representation. 371 * The @ref AVIAMFParamDefinition.type "type" must be 372 * AV_IAMF_PARAMETER_DEFINITION_RECON_GAIN. 373 */ 374 AVIAMFParamDefinition *recon_gain_info; 375 376 /** 377 * Audio element type as defined in section 3.6 of IAMF. 378 */ 379 enum AVIAMFAudioElementType audio_element_type; 380 381 /** 382 * Default weight value as defined in section 3.6 of IAMF. 383 */ 384 unsigned int default_w; 385 } AVIAMFAudioElement; 386 387 const AVClass *av_iamf_audio_element_get_class(void); 388 389 /** 390 * Allocates a AVIAMFAudioElement, and initializes its fields with default values. 391 * No layers are allocated. Must be freed with av_iamf_audio_element_free(). 392 * 393 * @see av_iamf_audio_element_add_layer() 394 */ 395 AVIAMFAudioElement *av_iamf_audio_element_alloc(void); 396 397 /** 398 * Allocate a layer and add it to a given AVIAMFAudioElement. 399 * It is freed by av_iamf_audio_element_free() alongside the rest of the parent 400 * AVIAMFAudioElement. 401 * 402 * @return a pointer to the allocated layer. 403 */ 404 AVIAMFLayer *av_iamf_audio_element_add_layer(AVIAMFAudioElement *audio_element); 405 406 /** 407 * Free an AVIAMFAudioElement and all its contents. 408 * 409 * @param audio_element pointer to pointer to an allocated AVIAMFAudioElement. 410 * upon return, *audio_element will be set to NULL. 411 */ 412 void av_iamf_audio_element_free(AVIAMFAudioElement **audio_element); 413 414 /** 415 * @} 416 * @addtogroup lavu_iamf_mix 417 * @{ 418 */ 419 420 enum AVIAMFHeadphonesMode { 421 /** 422 * The referenced Audio Element shall be rendered to stereo loudspeakers. 423 */ 424 AV_IAMF_HEADPHONES_MODE_STEREO, 425 /** 426 * The referenced Audio Element shall be rendered with a binaural renderer. 427 */ 428 AV_IAMF_HEADPHONES_MODE_BINAURAL, 429 }; 430 431 /** 432 * Submix element as defined in section 3.7 of IAMF. 433 * 434 * @note The struct should be allocated with av_iamf_submix_add_element() 435 * and its size is not a part of the public ABI. 436 */ 437 typedef struct AVIAMFSubmixElement { 438 const AVClass *av_class; 439 440 /** 441 * The id of the Audio Element this submix element references. 442 */ 443 unsigned int audio_element_id; 444 445 /** 446 * Information required required for applying any processing to the 447 * referenced and rendered Audio Element before being summed with other 448 * processed Audio Elements. 449 * The @ref AVIAMFParamDefinition.type "type" must be 450 * AV_IAMF_PARAMETER_DEFINITION_MIX_GAIN. 451 */ 452 AVIAMFParamDefinition *element_mix_config; 453 454 /** 455 * Default mix gain value to apply when there are no AVIAMFParamDefinition 456 * with @ref element_mix_config "element_mix_config's" 457 * @ref AVIAMFParamDefinition.parameter_id "parameter_id" available for a 458 * given audio frame. 459 */ 460 AVRational default_mix_gain; 461 462 /** 463 * A value that indicates whether the referenced channel-based Audio Element 464 * shall be rendered to stereo loudspeakers or spatialized with a binaural 465 * renderer when played back on headphones. 466 * If the Audio Element is not of @ref AVIAMFAudioElement.audio_element_type 467 * "type" AV_IAMF_AUDIO_ELEMENT_TYPE_CHANNEL, then this field is undefined. 468 */ 469 enum AVIAMFHeadphonesMode headphones_rendering_mode; 470 471 /** 472 * A dictionary of strings describing the submix in different languages. 473 * Must have the same amount of entries as 474 * @ref AVIAMFMixPresentation.annotations "the mix's annotations", stored 475 * in the same order, and with the same key strings. 476 * 477 * @ref AVDictionaryEntry.key "key" is a string conforming to BCP-47 that 478 * specifies the language for the string stored in 479 * @ref AVDictionaryEntry.value "value". 480 */ 481 AVDictionary *annotations; 482 } AVIAMFSubmixElement; 483 484 enum AVIAMFSubmixLayoutType { 485 /** 486 * The layout follows the loudspeaker sound system convention of ITU-2051-3. 487 */ 488 AV_IAMF_SUBMIX_LAYOUT_TYPE_LOUDSPEAKERS = 2, 489 /** 490 * The layout is binaural. 491 */ 492 AV_IAMF_SUBMIX_LAYOUT_TYPE_BINAURAL = 3, 493 }; 494 495 /** 496 * Submix layout as defined in section 3.7.6 of IAMF. 497 * 498 * @note The struct should be allocated with av_iamf_submix_add_layout() 499 * and its size is not a part of the public ABI. 500 */ 501 typedef struct AVIAMFSubmixLayout { 502 const AVClass *av_class; 503 504 enum AVIAMFSubmixLayoutType layout_type; 505 506 /** 507 * Channel layout matching one of Sound Systems A to J of ITU-2051-3, plus 508 * 7.1.2ch and 3.1.2ch 509 * If layout_type is not AV_IAMF_SUBMIX_LAYOUT_TYPE_LOUDSPEAKERS, this field 510 * is undefined. 511 */ 512 AVChannelLayout sound_system; 513 /** 514 * The program integrated loudness information, as defined in 515 * ITU-1770-4. 516 */ 517 AVRational integrated_loudness; 518 /** 519 * The digital (sampled) peak value of the audio signal, as defined 520 * in ITU-1770-4. 521 */ 522 AVRational digital_peak; 523 /** 524 * The true peak of the audio signal, as defined in ITU-1770-4. 525 */ 526 AVRational true_peak; 527 /** 528 * The Dialogue loudness information, as defined in ITU-1770-4. 529 */ 530 AVRational dialogue_anchored_loudness; 531 /** 532 * The Album loudness information, as defined in ITU-1770-4. 533 */ 534 AVRational album_anchored_loudness; 535 } AVIAMFSubmixLayout; 536 537 /** 538 * Submix layout as defined in section 3.7 of IAMF. 539 * 540 * @note The struct should be allocated with av_iamf_mix_presentation_add_submix() 541 * and its size is not a part of the public ABI. 542 */ 543 typedef struct AVIAMFSubmix { 544 const AVClass *av_class; 545 546 /** 547 * Array of submix elements. 548 * 549 * Set by av_iamf_submix_add_element(), must not be modified by any 550 * other code. 551 */ 552 AVIAMFSubmixElement **elements; 553 /** 554 * Number of elements in the submix. 555 * 556 * Set by av_iamf_submix_add_element(), must not be modified by any 557 * other code. 558 */ 559 unsigned int nb_elements; 560 561 /** 562 * Array of submix layouts. 563 * 564 * Set by av_iamf_submix_add_layout(), must not be modified by any 565 * other code. 566 */ 567 AVIAMFSubmixLayout **layouts; 568 /** 569 * Number of layouts in the submix. 570 * 571 * Set by av_iamf_submix_add_layout(), must not be modified by any 572 * other code. 573 */ 574 unsigned int nb_layouts; 575 576 /** 577 * Information required for post-processing the mixed audio signal to 578 * generate the audio signal for playback. 579 * The @ref AVIAMFParamDefinition.type "type" must be 580 * AV_IAMF_PARAMETER_DEFINITION_MIX_GAIN. 581 */ 582 AVIAMFParamDefinition *output_mix_config; 583 584 /** 585 * Default mix gain value to apply when there are no AVIAMFParamDefinition 586 * with @ref output_mix_config "output_mix_config's" 587 * @ref AVIAMFParamDefinition.parameter_id "parameter_id" available for a 588 * given audio frame. 589 */ 590 AVRational default_mix_gain; 591 } AVIAMFSubmix; 592 593 /** 594 * Information on how to render and mix one or more AVIAMFAudioElement to generate 595 * the final audio output, as defined in section 3.7 of IAMF. 596 * 597 * @note The struct should be allocated with av_iamf_mix_presentation_alloc() 598 * and its size is not a part of the public ABI. 599 */ 600 typedef struct AVIAMFMixPresentation { 601 const AVClass *av_class; 602 603 /** 604 * Array of submixes. 605 * 606 * Set by av_iamf_mix_presentation_add_submix(), must not be modified 607 * by any other code. 608 */ 609 AVIAMFSubmix **submixes; 610 /** 611 * Number of submixes in the presentation. 612 * 613 * Set by av_iamf_mix_presentation_add_submix(), must not be modified 614 * by any other code. 615 */ 616 unsigned int nb_submixes; 617 618 /** 619 * A dictionary of strings describing the mix in different languages. 620 * Must have the same amount of entries as every 621 * @ref AVIAMFSubmixElement.annotations "Submix element annotations", 622 * stored in the same order, and with the same key strings. 623 * 624 * @ref AVDictionaryEntry.key "key" is a string conforming to BCP-47 625 * that specifies the language for the string stored in 626 * @ref AVDictionaryEntry.value "value". 627 */ 628 AVDictionary *annotations; 629 } AVIAMFMixPresentation; 630 631 const AVClass *av_iamf_mix_presentation_get_class(void); 632 633 /** 634 * Allocates a AVIAMFMixPresentation, and initializes its fields with default 635 * values. No submixes are allocated. 636 * Must be freed with av_iamf_mix_presentation_free(). 637 * 638 * @see av_iamf_mix_presentation_add_submix() 639 */ 640 AVIAMFMixPresentation *av_iamf_mix_presentation_alloc(void); 641 642 /** 643 * Allocate a submix and add it to a given AVIAMFMixPresentation. 644 * It is freed by av_iamf_mix_presentation_free() alongside the rest of the 645 * parent AVIAMFMixPresentation. 646 * 647 * @return a pointer to the allocated submix. 648 */ 649 AVIAMFSubmix *av_iamf_mix_presentation_add_submix(AVIAMFMixPresentation *mix_presentation); 650 651 /** 652 * Allocate a submix element and add it to a given AVIAMFSubmix. 653 * It is freed by av_iamf_mix_presentation_free() alongside the rest of the 654 * parent AVIAMFSubmix. 655 * 656 * @return a pointer to the allocated submix. 657 */ 658 AVIAMFSubmixElement *av_iamf_submix_add_element(AVIAMFSubmix *submix); 659 660 /** 661 * Allocate a submix layout and add it to a given AVIAMFSubmix. 662 * It is freed by av_iamf_mix_presentation_free() alongside the rest of the 663 * parent AVIAMFSubmix. 664 * 665 * @return a pointer to the allocated submix. 666 */ 667 AVIAMFSubmixLayout *av_iamf_submix_add_layout(AVIAMFSubmix *submix); 668 669 /** 670 * Free an AVIAMFMixPresentation and all its contents. 671 * 672 * @param mix_presentation pointer to pointer to an allocated AVIAMFMixPresentation. 673 * upon return, *mix_presentation will be set to NULL. 674 */ 675 void av_iamf_mix_presentation_free(AVIAMFMixPresentation **mix_presentation); 676 /** 677 * @} 678 */ 679 680 #endif /* AVUTIL_IAMF_H */