[FFmpeg-devel] [PATCH 8/8] avformat: Immersive Audio Model and Formats muxer
James Almer
jamrial at gmail.com
Wed Dec 6 00:44:02 EET 2023
Signed-off-by: James Almer <jamrial at gmail.com>
---
libavformat/Makefile | 1 +
libavformat/allformats.c | 1 +
libavformat/iamf_writer.c | 823 ++++++++++++++++++++++++++++++++++++++
libavformat/iamf_writer.h | 51 +++
libavformat/iamfenc.c | 388 ++++++++++++++++++
5 files changed, 1264 insertions(+)
create mode 100644 libavformat/iamf_writer.c
create mode 100644 libavformat/iamf_writer.h
create mode 100644 libavformat/iamfenc.c
diff --git a/libavformat/Makefile b/libavformat/Makefile
index f23c22792b..581e378d95 100644
--- a/libavformat/Makefile
+++ b/libavformat/Makefile
@@ -259,6 +259,7 @@ OBJS-$(CONFIG_HLS_DEMUXER) += hls.o hls_sample_encryption.o
OBJS-$(CONFIG_HLS_MUXER) += hlsenc.o hlsplaylist.o avc.o
OBJS-$(CONFIG_HNM_DEMUXER) += hnm.o
OBJS-$(CONFIG_IAMF_DEMUXER) += iamfdec.o iamf_parse.o iamf.o
+OBJS-$(CONFIG_IAMF_MUXER) += iamfenc.o iamf_writer.o iamf.o
OBJS-$(CONFIG_ICO_DEMUXER) += icodec.o
OBJS-$(CONFIG_ICO_MUXER) += icoenc.o
OBJS-$(CONFIG_IDCIN_DEMUXER) += idcin.o
diff --git a/libavformat/allformats.c b/libavformat/allformats.c
index 6e520b78a6..ce6be5f04d 100644
--- a/libavformat/allformats.c
+++ b/libavformat/allformats.c
@@ -213,6 +213,7 @@ extern const AVInputFormat ff_hls_demuxer;
extern const FFOutputFormat ff_hls_muxer;
extern const AVInputFormat ff_hnm_demuxer;
extern const AVInputFormat ff_iamf_demuxer;
+extern const FFOutputFormat ff_iamf_muxer;
extern const AVInputFormat ff_ico_demuxer;
extern const FFOutputFormat ff_ico_muxer;
extern const AVInputFormat ff_idcin_demuxer;
diff --git a/libavformat/iamf_writer.c b/libavformat/iamf_writer.c
new file mode 100644
index 0000000000..fc31174b53
--- /dev/null
+++ b/libavformat/iamf_writer.c
@@ -0,0 +1,823 @@
+/*
+ * Immersive Audio Model and Formats muxing helpers and structs
+ * Copyright (c) 2023 James Almer <jamrial at gmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/channel_layout.h"
+#include "libavutil/intreadwrite.h"
+#include "libavutil/iamf.h"
+#include "libavutil/mem.h"
+#include "libavcodec/get_bits.h"
+#include "libavcodec/flac.h"
+#include "libavcodec/mpeg4audio.h"
+#include "libavcodec/put_bits.h"
+#include "avformat.h"
+#include "avio_internal.h"
+#include "iamf.h"
+#include "iamf_writer.h"
+
+
+static int update_extradata(IAMFCodecConfig *codec_config)
+{
+ GetBitContext gb;
+ PutBitContext pb;
+ int ret;
+
+ switch(codec_config->codec_id) {
+ case AV_CODEC_ID_OPUS:
+ if (codec_config->extradata_size < 19)
+ return AVERROR_INVALIDDATA;
+ codec_config->extradata_size -= 8;
+ memmove(codec_config->extradata, codec_config->extradata + 8, codec_config->extradata_size);
+ AV_WB8(codec_config->extradata + 1, 2); // set channels to stereo
+ break;
+ case AV_CODEC_ID_FLAC: {
+ uint8_t buf[13];
+
+ init_put_bits(&pb, buf, sizeof(buf));
+ ret = init_get_bits8(&gb, codec_config->extradata, codec_config->extradata_size);
+ if (ret < 0)
+ return ret;
+
+ put_bits32(&pb, get_bits_long(&gb, 32)); // min/max blocksize
+ put_bits64(&pb, 48, get_bits64(&gb, 48)); // min/max framesize
+ put_bits(&pb, 20, get_bits(&gb, 20)); // samplerate
+ skip_bits(&gb, 3);
+ put_bits(&pb, 3, 1); // set channels to stereo
+ ret = put_bits_left(&pb);
+ put_bits(&pb, ret, get_bits(&gb, ret));
+ flush_put_bits(&pb);
+
+ memcpy(codec_config->extradata, buf, sizeof(buf));
+ break;
+ }
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static int fill_codec_config(IAMFContext *iamf, const AVStreamGroup *stg,
+ IAMFCodecConfig *codec_config)
+{
+ const AVStream *st = stg->streams[0];
+ IAMFCodecConfig **tmp;
+ int j, ret = 0;
+
+ codec_config->codec_id = st->codecpar->codec_id;
+ codec_config->sample_rate = st->codecpar->sample_rate;
+ codec_config->codec_tag = st->codecpar->codec_tag;
+ codec_config->nb_samples = st->codecpar->frame_size;
+ codec_config->seek_preroll = st->codecpar->seek_preroll;
+ if (st->codecpar->extradata_size) {
+ codec_config->extradata = av_memdup(st->codecpar->extradata, st->codecpar->extradata_size);
+ if (!codec_config->extradata)
+ return AVERROR(ENOMEM);
+ codec_config->extradata_size = st->codecpar->extradata_size;
+ ret = update_extradata(codec_config);
+ if (ret < 0)
+ goto fail;
+ }
+
+ for (j = 0; j < iamf->nb_codec_configs; j++) {
+ if (!memcmp(iamf->codec_configs[j], codec_config, offsetof(IAMFCodecConfig, extradata)) &&
+ (!codec_config->extradata_size || !memcmp(iamf->codec_configs[j]->extradata,
+ codec_config->extradata, codec_config->extradata_size)))
+ break;
+ }
+
+ if (j < iamf->nb_codec_configs) {
+ av_free(iamf->codec_configs[j]->extradata);
+ av_free(iamf->codec_configs[j]);
+ iamf->codec_configs[j] = codec_config;
+ return j;
+ }
+
+ tmp = av_realloc_array(iamf->codec_configs, iamf->nb_codec_configs + 1, sizeof(*iamf->codec_configs));
+ if (!tmp) {
+ ret = AVERROR(ENOMEM);
+ goto fail;
+ }
+
+ iamf->codec_configs = tmp;
+ iamf->codec_configs[iamf->nb_codec_configs] = codec_config;
+ codec_config->codec_config_id = iamf->nb_codec_configs;
+
+ return iamf->nb_codec_configs++;
+
+fail:
+ av_freep(&codec_config->extradata);
+ return ret;
+}
+
+static IAMFParamDefinition *add_param_definition(IAMFContext *iamf, AVIAMFParamDefinition *param)
+{
+ IAMFParamDefinition **tmp, *param_definition;
+
+ tmp = av_realloc_array(iamf->param_definitions, iamf->nb_param_definitions + 1,
+ sizeof(*iamf->param_definitions));
+ if (!tmp)
+ return NULL;
+
+ iamf->param_definitions = tmp;
+
+ param_definition = av_mallocz(sizeof(*param_definition));
+ if (!param_definition)
+ return NULL;
+
+ param_definition->param = param;
+ iamf->param_definitions[iamf->nb_param_definitions++] = param_definition;
+
+ return param_definition;
+}
+
+int ff_iamf_add_audio_element(IAMFContext *iamf, const AVStreamGroup *stg, void *log_ctx)
+{
+ const AVIAMFAudioElement *iamf_audio_element;
+ IAMFAudioElement **tmp, *audio_element;
+ IAMFCodecConfig *codec_config;
+ int ret;
+
+ if (stg->type != AV_STREAM_GROUP_PARAMS_IAMF_AUDIO_ELEMENT)
+ return AVERROR(EINVAL);
+
+ iamf_audio_element = stg->params.iamf_audio_element;
+ if (iamf_audio_element->audio_element_type == AV_IAMF_AUDIO_ELEMENT_TYPE_SCENE) {
+ const AVIAMFLayer *layer = iamf_audio_element->layers[0];
+ if (iamf_audio_element->nb_layers != 1) {
+ av_log(log_ctx, AV_LOG_ERROR, "Invalid amount of layers for SCENE_BASED audio element. Must be 1\n");
+ return AVERROR(EINVAL);
+ }
+ if (layer->ch_layout.order != AV_CHANNEL_ORDER_CUSTOM &&
+ layer->ch_layout.order != AV_CHANNEL_ORDER_AMBISONIC) {
+ av_log(log_ctx, AV_LOG_ERROR, "Invalid channel layout for SCENE_BASED audio element\n");
+ return AVERROR(EINVAL);
+ }
+ if (layer->ambisonics_mode >= AV_IAMF_AMBISONICS_MODE_PROJECTION) {
+ av_log(log_ctx, AV_LOG_ERROR, "Unsuported ambisonics mode %d\n", layer->ambisonics_mode);
+ return AVERROR_PATCHWELCOME;
+ }
+ for (int i = 0; i < stg->nb_streams; i++) {
+ if (stg->streams[i]->codecpar->ch_layout.nb_channels > 1) {
+ av_log(log_ctx, AV_LOG_ERROR, "Invalid amount of channels in a stream for MONO mode ambisonics\n");
+ return AVERROR(EINVAL);
+ }
+ }
+ } else
+ for (int j, i = 0; i < iamf_audio_element->nb_layers; i++) {
+ const AVIAMFLayer *layer = iamf_audio_element->layers[i];
+ for (j = 0; j < FF_ARRAY_ELEMS(ff_iamf_scalable_ch_layouts); j++)
+ if (!av_channel_layout_compare(&layer->ch_layout, &ff_iamf_scalable_ch_layouts[j]))
+ break;
+
+ if (j >= FF_ARRAY_ELEMS(ff_iamf_scalable_ch_layouts)) {
+ av_log(log_ctx, AV_LOG_ERROR, "Unsupported channel layout in stream group #%d\n", i);
+ return AVERROR(EINVAL);
+ }
+ }
+
+ for (int i = 0; i < iamf->nb_audio_elements; i++) {
+ if (stg->id == iamf->audio_elements[i]->audio_element_id) {
+ av_log(log_ctx, AV_LOG_ERROR, "Duplicated Audio Element id %"PRId64"\n", stg->id);
+ return AVERROR(EINVAL);
+ }
+ }
+
+ codec_config = av_mallocz(sizeof(*codec_config));
+ if (!codec_config)
+ return AVERROR(ENOMEM);
+
+ ret = fill_codec_config(iamf, stg, codec_config);
+ if (ret < 0) {
+ av_free(codec_config);
+ return ret;
+ }
+
+ audio_element = av_mallocz(sizeof(*audio_element));
+ if (!audio_element)
+ return AVERROR(ENOMEM);
+
+ audio_element->element = stg->params.iamf_audio_element;
+ audio_element->audio_element_id = stg->id;
+ audio_element->codec_config_id = ret;
+
+ audio_element->substreams = av_calloc(stg->nb_streams, sizeof(*audio_element->substreams));
+ if (!audio_element->substreams)
+ return AVERROR(ENOMEM);
+ audio_element->nb_substreams = stg->nb_streams;
+
+ audio_element->layers = av_calloc(iamf_audio_element->nb_layers, sizeof(*audio_element->layers));
+ if (!audio_element->layers)
+ return AVERROR(ENOMEM);
+
+ for (int i = 0, j = 0; i < iamf_audio_element->nb_layers; i++) {
+ int nb_channels = iamf_audio_element->layers[i]->ch_layout.nb_channels;
+
+ IAMFLayer *layer = &audio_element->layers[i];
+ if (!layer)
+ return AVERROR(ENOMEM);
+ memset(layer, 0, sizeof(*layer));
+
+ if (i)
+ nb_channels -= iamf_audio_element->layers[i - 1]->ch_layout.nb_channels;
+ for (; nb_channels > 0 && j < stg->nb_streams; j++) {
+ const AVStream *st = stg->streams[j];
+ IAMFSubStream *substream = &audio_element->substreams[j];
+
+ substream->audio_substream_id = st->id;
+ layer->substream_count++;
+ layer->coupled_substream_count += st->codecpar->ch_layout.nb_channels == 2;
+ nb_channels -= st->codecpar->ch_layout.nb_channels;
+ }
+ if (nb_channels) {
+ av_log(log_ctx, AV_LOG_ERROR, "Invalid channel count across substreams in layer %u from stream group %u\n",
+ i, stg->index);
+ return AVERROR(EINVAL);
+ }
+ }
+
+ if (iamf_audio_element->demixing_info) {
+ AVIAMFParamDefinition *param = iamf_audio_element->demixing_info;
+ IAMFParamDefinition *param_definition = ff_iamf_get_param_definition(iamf, param->parameter_id);
+
+ if (param->nb_subblocks != 1) {
+ av_log(log_ctx, AV_LOG_ERROR, "nb_subblocks in demixing_info for stream group %u is not 1\n", stg->index);
+ return AVERROR(EINVAL);
+ }
+ if (!param_definition) {
+ param_definition = add_param_definition(iamf, param);
+ if (!param_definition)
+ return AVERROR(ENOMEM);
+ }
+ param_definition->audio_element = iamf_audio_element;
+ }
+ if (iamf_audio_element->recon_gain_info) {
+ AVIAMFParamDefinition *param = iamf_audio_element->recon_gain_info;
+ IAMFParamDefinition *param_definition = ff_iamf_get_param_definition(iamf, param->parameter_id);
+
+ if (param->nb_subblocks != 1) {
+ av_log(log_ctx, AV_LOG_ERROR, "nb_subblocks in recon_gain_info for stream group %u is not 1\n", stg->index);
+ return AVERROR(EINVAL);
+ }
+
+ if (!param_definition) {
+ param_definition = add_param_definition(iamf, param);
+ if (!param_definition)
+ return AVERROR(ENOMEM);
+ }
+ param_definition->audio_element = iamf_audio_element;
+ }
+
+ tmp = av_realloc_array(iamf->audio_elements, iamf->nb_audio_elements + 1, sizeof(*iamf->audio_elements));
+ if (!tmp)
+ return AVERROR(ENOMEM);
+
+ iamf->audio_elements = tmp;
+ iamf->audio_elements[iamf->nb_audio_elements++] = audio_element;
+
+ return 0;
+}
+
+int ff_iamf_add_mix_presentation(IAMFContext *iamf, const AVStreamGroup *stg, void *log_ctx)
+{
+ IAMFMixPresentation **tmp, *mix_presentation;
+
+ if (stg->type != AV_STREAM_GROUP_PARAMS_IAMF_MIX_PRESENTATION)
+ return AVERROR(EINVAL);
+
+ for (int i = 0; i < iamf->nb_mix_presentations; i++) {
+ if (stg->id == iamf->mix_presentations[i]->mix_presentation_id) {
+ av_log(log_ctx, AV_LOG_ERROR, "Duplicate Mix Presentation id %"PRId64"\n", stg->id);
+ return AVERROR(EINVAL);
+ }
+ }
+
+ mix_presentation = av_mallocz(sizeof(*mix_presentation));
+ if (!mix_presentation)
+ return AVERROR(ENOMEM);
+
+ mix_presentation->mix = stg->params.iamf_mix_presentation;
+ mix_presentation->mix_presentation_id = stg->id;
+
+ for (int i = 0; i < mix_presentation->mix->nb_submixes; i++) {
+ const AVIAMFSubmix *submix = mix_presentation->mix->submixes[i];
+ AVIAMFParamDefinition *param = submix->output_mix_config;
+ IAMFParamDefinition *param_definition;
+
+ if (!param) {
+ av_log(log_ctx, AV_LOG_ERROR, "output_mix_config is not present in submix %u from "
+ "Mix Presentation ID %"PRId64"\n", i, stg->id);
+ return AVERROR(EINVAL);
+ }
+
+ param_definition = ff_iamf_get_param_definition(iamf, param->parameter_id);
+ if (!param_definition) {
+ param_definition = add_param_definition(iamf, param);
+ if (!param_definition)
+ return AVERROR(ENOMEM);
+ }
+
+ for (int j = 0; j < submix->nb_elements; j++) {
+ const AVIAMFAudioElement *iamf_audio_element = NULL;
+ const AVIAMFSubmixElement *element = submix->elements[j];
+ param = element->element_mix_config;
+
+ if (!param) {
+ av_log(log_ctx, AV_LOG_ERROR, "element_mix_config is not present for element %u in submix %u from "
+ "Mix Presentation ID %"PRId64"\n", j, i, stg->id);
+ return AVERROR(EINVAL);
+ }
+ param_definition = ff_iamf_get_param_definition(iamf, param->parameter_id);
+ if (!param_definition) {
+ param_definition = add_param_definition(iamf, param);
+ if (!param_definition)
+ return AVERROR(ENOMEM);
+ }
+ for (int k = 0; k < iamf->nb_audio_elements; k++)
+ if (iamf->audio_elements[k]->audio_element_id == element->audio_element_id) {
+ iamf_audio_element = iamf->audio_elements[k]->element;
+ break;
+ }
+ param_definition->audio_element = iamf_audio_element;
+ }
+ }
+
+ tmp = av_realloc_array(iamf->mix_presentations, iamf->nb_mix_presentations + 1, sizeof(*iamf->mix_presentations));
+ if (!tmp)
+ return AVERROR(ENOMEM);
+
+ iamf->mix_presentations = tmp;
+ iamf->mix_presentations[iamf->nb_mix_presentations++] = mix_presentation;
+
+ return 0;
+}
+
+static int iamf_write_codec_config(const IAMFContext *iamf,
+ const IAMFCodecConfig *codec_config,
+ AVIOContext *pb)
+{
+ uint8_t header[MAX_IAMF_OBU_HEADER_SIZE];
+ AVIOContext *dyn_bc;
+ uint8_t *dyn_buf = NULL;
+ PutBitContext pbc;
+ int dyn_size;
+
+ int ret = avio_open_dyn_buf(&dyn_bc);
+ if (ret < 0)
+ return ret;
+
+ ffio_write_leb(dyn_bc, codec_config->codec_config_id);
+ avio_wl32(dyn_bc, codec_config->codec_tag);
+
+ ffio_write_leb(dyn_bc, codec_config->nb_samples);
+ avio_wb16(dyn_bc, codec_config->seek_preroll);
+
+ switch(codec_config->codec_id) {
+ case AV_CODEC_ID_OPUS:
+ avio_write(dyn_bc, codec_config->extradata, codec_config->extradata_size);
+ break;
+ case AV_CODEC_ID_AAC:
+ return AVERROR_PATCHWELCOME;
+ case AV_CODEC_ID_FLAC:
+ avio_w8(dyn_bc, 0x80);
+ avio_wb24(dyn_bc, codec_config->extradata_size);
+ avio_write(dyn_bc, codec_config->extradata, codec_config->extradata_size);
+ break;
+ case AV_CODEC_ID_PCM_S16LE:
+ avio_w8(dyn_bc, 0);
+ avio_w8(dyn_bc, 16);
+ avio_wb32(dyn_bc, codec_config->sample_rate);
+ break;
+ case AV_CODEC_ID_PCM_S24LE:
+ avio_w8(dyn_bc, 0);
+ avio_w8(dyn_bc, 24);
+ avio_wb32(dyn_bc, codec_config->sample_rate);
+ break;
+ case AV_CODEC_ID_PCM_S32LE:
+ avio_w8(dyn_bc, 0);
+ avio_w8(dyn_bc, 32);
+ avio_wb32(dyn_bc, codec_config->sample_rate);
+ break;
+ case AV_CODEC_ID_PCM_S16BE:
+ avio_w8(dyn_bc, 1);
+ avio_w8(dyn_bc, 16);
+ avio_wb32(dyn_bc, codec_config->sample_rate);
+ break;
+ case AV_CODEC_ID_PCM_S24BE:
+ avio_w8(dyn_bc, 1);
+ avio_w8(dyn_bc, 24);
+ avio_wb32(dyn_bc, codec_config->sample_rate);
+ break;
+ case AV_CODEC_ID_PCM_S32BE:
+ avio_w8(dyn_bc, 1);
+ avio_w8(dyn_bc, 32);
+ avio_wb32(dyn_bc, codec_config->sample_rate);
+ break;
+ default:
+ break;
+ }
+
+ init_put_bits(&pbc, header, sizeof(header));
+ put_bits(&pbc, 5, IAMF_OBU_IA_CODEC_CONFIG);
+ put_bits(&pbc, 3, 0);
+ flush_put_bits(&pbc);
+
+ dyn_size = avio_close_dyn_buf(dyn_bc, &dyn_buf);
+ avio_write(pb, header, put_bytes_count(&pbc, 1));
+ ffio_write_leb(pb, dyn_size);
+ avio_write(pb, dyn_buf, dyn_size);
+ av_free(dyn_buf);
+
+ return 0;
+}
+
+static inline int rescale_rational(AVRational q, int b)
+{
+ return av_clip_int16(av_rescale(q.num, b, q.den));
+}
+
+static int scalable_channel_layout_config(const IAMFAudioElement *audio_element,
+ AVIOContext *dyn_bc)
+{
+ const AVIAMFAudioElement *element = audio_element->element;
+ uint8_t header[MAX_IAMF_OBU_HEADER_SIZE];
+ PutBitContext pb;
+
+ init_put_bits(&pb, header, sizeof(header));
+ put_bits(&pb, 3, element->nb_layers);
+ put_bits(&pb, 5, 0);
+ flush_put_bits(&pb);
+ avio_write(dyn_bc, header, put_bytes_count(&pb, 1));
+ for (int i = 0; i < element->nb_layers; i++) {
+ AVIAMFLayer *layer = element->layers[i];
+ int layout;
+ for (layout = 0; layout < FF_ARRAY_ELEMS(ff_iamf_scalable_ch_layouts); layout++) {
+ if (!av_channel_layout_compare(&layer->ch_layout, &ff_iamf_scalable_ch_layouts[layout]))
+ break;
+ }
+ init_put_bits(&pb, header, sizeof(header));
+ put_bits(&pb, 4, layout);
+ put_bits(&pb, 1, !!layer->output_gain_flags);
+ put_bits(&pb, 1, !!(layer->flags & AV_IAMF_LAYER_FLAG_RECON_GAIN));
+ put_bits(&pb, 2, 0); // reserved
+ put_bits(&pb, 8, audio_element->layers[i].substream_count);
+ put_bits(&pb, 8, audio_element->layers[i].coupled_substream_count);
+ if (layer->output_gain_flags) {
+ put_bits(&pb, 6, layer->output_gain_flags);
+ put_bits(&pb, 2, 0);
+ put_bits(&pb, 16, rescale_rational(layer->output_gain, 1 << 8));
+ }
+ flush_put_bits(&pb);
+ avio_write(dyn_bc, header, put_bytes_count(&pb, 1));
+ }
+
+ return 0;
+}
+
+static int ambisonics_config(const IAMFAudioElement *audio_element,
+ AVIOContext *dyn_bc)
+{
+ const AVIAMFAudioElement *element = audio_element->element;
+ AVIAMFLayer *layer = element->layers[0];
+
+ ffio_write_leb(dyn_bc, 0); // ambisonics_mode
+ ffio_write_leb(dyn_bc, layer->ch_layout.nb_channels); // output_channel_count
+ ffio_write_leb(dyn_bc, audio_element->nb_substreams); // substream_count
+
+ if (layer->ch_layout.order == AV_CHANNEL_ORDER_AMBISONIC)
+ for (int i = 0; i < layer->ch_layout.nb_channels; i++)
+ avio_w8(dyn_bc, i);
+ else
+ for (int i = 0; i < layer->ch_layout.nb_channels; i++)
+ avio_w8(dyn_bc, layer->ch_layout.u.map[i].id);
+
+ return 0;
+}
+
+static int param_definition(const AVIAMFParamDefinition *param,
+ AVIOContext *dyn_bc)
+{
+ ffio_write_leb(dyn_bc, param->parameter_id);
+ ffio_write_leb(dyn_bc, param->parameter_rate);
+ avio_w8(dyn_bc, !!param->param_definition_mode << 7);
+ if (!param->param_definition_mode) {
+ ffio_write_leb(dyn_bc, param->duration);
+ ffio_write_leb(dyn_bc, param->constant_subblock_duration);
+ if (param->constant_subblock_duration == 0) {
+ ffio_write_leb(dyn_bc, param->nb_subblocks);
+ for (int i = 0; i < param->nb_subblocks; i++) {
+ const void *subblock = av_iamf_param_definition_get_subblock(param, i);
+
+ switch (param->param_definition_type) {
+ case AV_IAMF_PARAMETER_DEFINITION_MIX_GAIN: {
+ const AVIAMFMixGain *mix = subblock;
+ ffio_write_leb(dyn_bc, mix->subblock_duration);
+ break;
+ }
+ case AV_IAMF_PARAMETER_DEFINITION_DEMIXING: {
+ const AVIAMFDemixingInfo *demix = subblock;
+ ffio_write_leb(dyn_bc, demix->subblock_duration);
+ break;
+ }
+ case AV_IAMF_PARAMETER_DEFINITION_RECON_GAIN: {
+ const AVIAMFReconGain *recon = subblock;
+ ffio_write_leb(dyn_bc, recon->subblock_duration);
+ break;
+ }
+ }
+ }
+ }
+ }
+
+ return 0;
+}
+
+static int iamf_write_audio_element(const IAMFContext *iamf,
+ const IAMFAudioElement *audio_element,
+ AVIOContext *pb, void *log_ctx)
+{
+ const AVIAMFAudioElement *element = audio_element->element;
+ const IAMFCodecConfig *codec_config = iamf->codec_configs[audio_element->codec_config_id];
+ uint8_t header[MAX_IAMF_OBU_HEADER_SIZE];
+ AVIOContext *dyn_bc;
+ uint8_t *dyn_buf = NULL;
+ PutBitContext pbc;
+ int param_definition_types = AV_IAMF_PARAMETER_DEFINITION_DEMIXING, dyn_size;
+
+ int ret = avio_open_dyn_buf(&dyn_bc);
+ if (ret < 0)
+ return ret;
+
+ ffio_write_leb(dyn_bc, audio_element->audio_element_id);
+
+ init_put_bits(&pbc, header, sizeof(header));
+ put_bits(&pbc, 3, element->audio_element_type);
+ put_bits(&pbc, 5, 0);
+ flush_put_bits(&pbc);
+ avio_write(dyn_bc, header, put_bytes_count(&pbc, 1));
+
+ ffio_write_leb(dyn_bc, audio_element->codec_config_id);
+ ffio_write_leb(dyn_bc, audio_element->nb_substreams);
+
+ for (int i = 0; i < audio_element->nb_substreams; i++)
+ ffio_write_leb(dyn_bc, audio_element->substreams[i].audio_substream_id);
+
+ if (element->nb_layers == 1)
+ param_definition_types &= ~AV_IAMF_PARAMETER_DEFINITION_DEMIXING;
+ if (element->nb_layers > 1)
+ param_definition_types |= AV_IAMF_PARAMETER_DEFINITION_RECON_GAIN;
+ if (codec_config->codec_tag == MKTAG('f','L','a','C') ||
+ codec_config->codec_tag == MKTAG('i','p','c','m'))
+ param_definition_types &= ~AV_IAMF_PARAMETER_DEFINITION_RECON_GAIN;
+
+ ffio_write_leb(dyn_bc, av_popcount(param_definition_types)); // num_parameters
+
+ if (param_definition_types & 1) {
+ const AVIAMFParamDefinition *param = element->demixing_info;
+ const AVIAMFDemixingInfo *demix;
+
+ if (!param) {
+ av_log(log_ctx, AV_LOG_ERROR, "demixing_info needed but not set in Stream Group #%u\n",
+ audio_element->audio_element_id);
+ return AVERROR(EINVAL);
+ }
+
+ demix = av_iamf_param_definition_get_subblock(param, 0);
+ ffio_write_leb(dyn_bc, AV_IAMF_PARAMETER_DEFINITION_DEMIXING); // param_definition_type
+ param_definition(param, dyn_bc);
+
+ avio_w8(dyn_bc, demix->dmixp_mode << 5); // dmixp_mode
+ avio_w8(dyn_bc, element->default_w << 4); // default_w
+ }
+ if (param_definition_types & 2) {
+ const AVIAMFParamDefinition *param = element->recon_gain_info;
+
+ if (!param) {
+ av_log(log_ctx, AV_LOG_ERROR, "recon_gain_info needed but not set in Stream Group #%u\n",
+ audio_element->audio_element_id);
+ return AVERROR(EINVAL);
+ }
+ ffio_write_leb(dyn_bc, AV_IAMF_PARAMETER_DEFINITION_RECON_GAIN); // param_definition_type
+ param_definition(param, dyn_bc);
+ }
+
+ if (element->audio_element_type == AV_IAMF_AUDIO_ELEMENT_TYPE_CHANNEL) {
+ ret = scalable_channel_layout_config(audio_element, dyn_bc);
+ if (ret < 0)
+ return ret;
+ } else {
+ ret = ambisonics_config(audio_element, dyn_bc);
+ if (ret < 0)
+ return ret;
+ }
+
+ init_put_bits(&pbc, header, sizeof(header));
+ put_bits(&pbc, 5, IAMF_OBU_IA_AUDIO_ELEMENT);
+ put_bits(&pbc, 3, 0);
+ flush_put_bits(&pbc);
+
+ dyn_size = avio_close_dyn_buf(dyn_bc, &dyn_buf);
+ avio_write(pb, header, put_bytes_count(&pbc, 1));
+ ffio_write_leb(pb, dyn_size);
+ avio_write(pb, dyn_buf, dyn_size);
+ av_free(dyn_buf);
+
+ return 0;
+}
+
+static int iamf_write_mixing_presentation(const IAMFContext *iamf,
+ const IAMFMixPresentation *mix_presentation,
+ AVIOContext *pb, void *log_ctx)
+{
+ uint8_t header[MAX_IAMF_OBU_HEADER_SIZE];
+ const AVIAMFMixPresentation *mix = mix_presentation->mix;
+ const AVDictionaryEntry *tag = NULL;
+ PutBitContext pbc;
+ AVIOContext *dyn_bc;
+ uint8_t *dyn_buf = NULL;
+ int dyn_size;
+
+ int ret = avio_open_dyn_buf(&dyn_bc);
+ if (ret < 0)
+ return ret;
+
+ ffio_write_leb(dyn_bc, mix_presentation->mix_presentation_id); // mix_presentation_id
+ ffio_write_leb(dyn_bc, av_dict_count(mix->annotations)); // count_label
+
+ while ((tag = av_dict_iterate(mix->annotations, tag)))
+ avio_put_str(dyn_bc, tag->key);
+ while ((tag = av_dict_iterate(mix->annotations, tag)))
+ avio_put_str(dyn_bc, tag->value);
+
+ ffio_write_leb(dyn_bc, mix->nb_submixes);
+ for (int i = 0; i < mix->nb_submixes; i++) {
+ const AVIAMFSubmix *sub_mix = mix->submixes[i];
+
+ ffio_write_leb(dyn_bc, sub_mix->nb_elements);
+ for (int j = 0; j < sub_mix->nb_elements; j++) {
+ const IAMFAudioElement *audio_element = NULL;
+ const AVIAMFSubmixElement *submix_element = sub_mix->elements[j];
+
+ for (int k = 0; k < iamf->nb_audio_elements; k++)
+ if (iamf->audio_elements[k]->audio_element_id == submix_element->audio_element_id) {
+ audio_element = iamf->audio_elements[k];
+ break;
+ }
+
+ av_assert0(audio_element);
+ ffio_write_leb(dyn_bc, submix_element->audio_element_id);
+
+ if (av_dict_count(submix_element->annotations) != av_dict_count(mix->annotations)) {
+ av_log(log_ctx, AV_LOG_ERROR, "Inconsistent amount of labels in submix %d from Mix Presentation id #%u\n",
+ j, audio_element->audio_element_id);
+ return AVERROR(EINVAL);
+ }
+ while ((tag = av_dict_iterate(submix_element->annotations, tag)))
+ avio_put_str(dyn_bc, tag->value);
+
+ init_put_bits(&pbc, header, sizeof(header));
+ put_bits(&pbc, 2, submix_element->headphones_rendering_mode);
+ put_bits(&pbc, 6, 0); // reserved
+ flush_put_bits(&pbc);
+ avio_write(dyn_bc, header, put_bytes_count(&pbc, 1));
+ ffio_write_leb(dyn_bc, 0); // rendering_config_extension_size
+ param_definition(submix_element->element_mix_config, dyn_bc);
+ avio_wb16(dyn_bc, rescale_rational(submix_element->default_mix_gain, 1 << 8));
+ }
+ param_definition(sub_mix->output_mix_config, dyn_bc);
+ avio_wb16(dyn_bc, rescale_rational(sub_mix->default_mix_gain, 1 << 8));
+
+ ffio_write_leb(dyn_bc, sub_mix->nb_layouts); // nb_layouts
+ for (int i = 0; i < sub_mix->nb_layouts; i++) {
+ const AVIAMFSubmixLayout *submix_layout = sub_mix->layouts[i];
+ int layout, info_type;
+ int dialogue = submix_layout->dialogue_anchored_loudness.num &&
+ submix_layout->dialogue_anchored_loudness.den;
+ int album = submix_layout->album_anchored_loudness.num &&
+ submix_layout->album_anchored_loudness.den;
+
+ if (layout == FF_ARRAY_ELEMS(ff_iamf_sound_system_map)) {
+ av_log(log_ctx, AV_LOG_ERROR, "Invalid Sound System value in a submix\n");
+ return AVERROR(EINVAL);
+ }
+
+ if (submix_layout->layout_type == AV_IAMF_SUBMIX_LAYOUT_TYPE_LOUDSPEAKERS) {
+ for (layout = 0; layout < FF_ARRAY_ELEMS(ff_iamf_sound_system_map); layout++) {
+ if (!av_channel_layout_compare(&submix_layout->sound_system, &ff_iamf_sound_system_map[layout].layout))
+ break;
+ }
+ if (layout == FF_ARRAY_ELEMS(ff_iamf_sound_system_map)) {
+ av_log(log_ctx, AV_LOG_ERROR, "Invalid Sound System value in a submix\n");
+ return AVERROR(EINVAL);
+ }
+ }
+ init_put_bits(&pbc, header, sizeof(header));
+ put_bits(&pbc, 2, submix_layout->layout_type); // layout_type
+ if (submix_layout->layout_type == AV_IAMF_SUBMIX_LAYOUT_TYPE_LOUDSPEAKERS) {
+ put_bits(&pbc, 4, ff_iamf_sound_system_map[layout].id); // sound_system
+ put_bits(&pbc, 2, 0); // reserved
+ } else
+ put_bits(&pbc, 6, 0); // reserved
+ flush_put_bits(&pbc);
+ avio_write(dyn_bc, header, put_bytes_count(&pbc, 1));
+
+ info_type = (submix_layout->true_peak.num && submix_layout->true_peak.den);
+ info_type |= (dialogue || album) << 1;
+ avio_w8(dyn_bc, info_type);
+ avio_wb16(dyn_bc, rescale_rational(submix_layout->integrated_loudness, 1 << 8));
+ avio_wb16(dyn_bc, rescale_rational(submix_layout->digital_peak, 1 << 8));
+ if (info_type & 1)
+ avio_wb16(dyn_bc, rescale_rational(submix_layout->true_peak, 1 << 8));
+ if (info_type & 2) {
+ avio_w8(dyn_bc, dialogue + album); // num_anchored_loudness
+ if (dialogue) {
+ avio_w8(dyn_bc, IAMF_ANCHOR_ELEMENT_DIALOGUE);
+ avio_wb16(dyn_bc, rescale_rational(submix_layout->dialogue_anchored_loudness, 1 << 8));
+ }
+ if (album) {
+ avio_w8(dyn_bc, IAMF_ANCHOR_ELEMENT_ALBUM);
+ avio_wb16(dyn_bc, rescale_rational(submix_layout->album_anchored_loudness, 1 << 8));
+ }
+ }
+ }
+ }
+
+ init_put_bits(&pbc, header, sizeof(header));
+ put_bits(&pbc, 5, IAMF_OBU_IA_MIX_PRESENTATION);
+ put_bits(&pbc, 3, 0);
+ flush_put_bits(&pbc);
+
+ dyn_size = avio_close_dyn_buf(dyn_bc, &dyn_buf);
+ avio_write(pb, header, put_bytes_count(&pbc, 1));
+ ffio_write_leb(pb, dyn_size);
+ avio_write(pb, dyn_buf, dyn_size);
+ av_free(dyn_buf);
+
+ return 0;
+}
+
+int ff_iamf_write_descriptors(const IAMFContext *iamf, AVIOContext *pb, void *log_ctx)
+{
+ uint8_t header[MAX_IAMF_OBU_HEADER_SIZE];
+ PutBitContext pbc;
+ AVIOContext *dyn_bc;
+ uint8_t *dyn_buf = NULL;
+ int dyn_size;
+
+ int ret = avio_open_dyn_buf(&dyn_bc);
+ if (ret < 0)
+ return ret;
+
+ // Sequence Header
+ init_put_bits(&pbc, header, sizeof(header));
+ put_bits(&pbc, 5, IAMF_OBU_IA_SEQUENCE_HEADER);
+ put_bits(&pbc, 3, 0);
+ flush_put_bits(&pbc);
+
+ avio_write(dyn_bc, header, put_bytes_count(&pbc, 1));
+ ffio_write_leb(dyn_bc, 6);
+ avio_wb32(dyn_bc, MKBETAG('i','a','m','f'));
+ avio_w8(dyn_bc, iamf->nb_audio_elements > 1); // primary_profile
+ avio_w8(dyn_bc, iamf->nb_audio_elements > 1); // additional_profile
+
+ dyn_size = avio_close_dyn_buf(dyn_bc, &dyn_buf);
+ avio_write(pb, dyn_buf, dyn_size);
+ av_free(dyn_buf);
+
+ for (int i = 0; i < iamf->nb_codec_configs; i++) {
+ ret = iamf_write_codec_config(iamf, iamf->codec_configs[i], pb);
+ if (ret < 0)
+ return ret;
+ }
+
+ for (int i = 0; i < iamf->nb_audio_elements; i++) {
+ ret = iamf_write_audio_element(iamf, iamf->audio_elements[i], pb, log_ctx);
+ if (ret < 0)
+ return ret;
+ }
+
+ for (int i = 0; i < iamf->nb_mix_presentations; i++) {
+ ret = iamf_write_mixing_presentation(iamf, iamf->mix_presentations[i], pb, log_ctx);
+ if (ret < 0)
+ return ret;
+ }
+
+ return 0;
+}
diff --git a/libavformat/iamf_writer.h b/libavformat/iamf_writer.h
new file mode 100644
index 0000000000..93354670b8
--- /dev/null
+++ b/libavformat/iamf_writer.h
@@ -0,0 +1,51 @@
+/*
+ * Immersive Audio Model and Formats muxing helpers and structs
+ * Copyright (c) 2023 James Almer <jamrial at gmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVFORMAT_IAMF_WRITER_H
+#define AVFORMAT_IAMF_WRITER_H
+
+#include <stdint.h>
+
+#include "libavutil/common.h"
+#include "avformat.h"
+#include "avio.h"
+#include "iamf.h"
+
+static inline IAMFParamDefinition *ff_iamf_get_param_definition(const IAMFContext *iamf,
+ unsigned int parameter_id)
+{
+ IAMFParamDefinition *param_definition = NULL;
+
+ for (int i = 0; i < iamf->nb_param_definitions; i++)
+ if (iamf->param_definitions[i]->param->parameter_id == parameter_id) {
+ param_definition = iamf->param_definitions[i];
+ break;
+ }
+
+ return param_definition;
+}
+
+int ff_iamf_add_audio_element(IAMFContext *iamf, const AVStreamGroup *stg, void *log_ctx);
+int ff_iamf_add_mix_presentation(IAMFContext *iamf, const AVStreamGroup *stg, void *log_ctx);
+
+int ff_iamf_write_descriptors(const IAMFContext *iamf, AVIOContext *pb, void *log_ctx);
+
+#endif /* AVFORMAT_IAMF_WRITER_H */
diff --git a/libavformat/iamfenc.c b/libavformat/iamfenc.c
new file mode 100644
index 0000000000..1dbb8b21d4
--- /dev/null
+++ b/libavformat/iamfenc.c
@@ -0,0 +1,388 @@
+/*
+ * IAMF muxer
+ * Copyright (c) 2023 James Almer
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdint.h>
+
+#include "libavutil/avassert.h"
+#include "libavutil/common.h"
+#include "libavutil/iamf.h"
+#include "libavcodec/get_bits.h"
+#include "libavcodec/put_bits.h"
+#include "avformat.h"
+#include "avio_internal.h"
+#include "iamf.h"
+#include "iamf_writer.h"
+#include "internal.h"
+#include "mux.h"
+
+typedef struct IAMFMuxContext {
+ IAMFContext iamf;
+
+ int first_stream_id;
+} IAMFMuxContext;
+
+static int iamf_init(AVFormatContext *s)
+{
+ IAMFMuxContext *const c = s->priv_data;
+ IAMFContext *const iamf = &c->iamf;
+ int nb_audio_elements = 0, nb_mix_presentations = 0;
+ int ret;
+
+ if (!s->nb_streams) {
+ av_log(s, AV_LOG_ERROR, "There must be at least one stream\n");
+ return AVERROR(EINVAL);
+ }
+
+ for (int i = 0; i < s->nb_streams; i++) {
+ if (s->streams[i]->codecpar->codec_type != AVMEDIA_TYPE_AUDIO ||
+ (s->streams[i]->codecpar->codec_tag != MKTAG('m','p','4','a') &&
+ s->streams[i]->codecpar->codec_tag != MKTAG('O','p','u','s') &&
+ s->streams[i]->codecpar->codec_tag != MKTAG('f','L','a','C') &&
+ s->streams[i]->codecpar->codec_tag != MKTAG('i','p','c','m'))) {
+ av_log(s, AV_LOG_ERROR, "Unsupported codec id %s\n",
+ avcodec_get_name(s->streams[i]->codecpar->codec_id));
+ return AVERROR(EINVAL);
+ }
+
+ if (s->streams[i]->codecpar->ch_layout.nb_channels > 2) {
+ av_log(s, AV_LOG_ERROR, "Unsupported channel layout on stream #%d\n", i);
+ return AVERROR(EINVAL);
+ }
+
+ for (int j = 0; j < i; j++) {
+ if (s->streams[i]->id == s->streams[j]->id) {
+ av_log(s, AV_LOG_ERROR, "Duplicated stream id %d\n", s->streams[j]->id);
+ return AVERROR(EINVAL);
+ }
+ }
+ }
+
+ if (!s->nb_stream_groups) {
+ av_log(s, AV_LOG_ERROR, "There must be at least two stream groups\n");
+ return AVERROR(EINVAL);
+ }
+
+ for (int i = 0; i < s->nb_stream_groups; i++) {
+ const AVStreamGroup *stg = s->stream_groups[i];
+
+ if (stg->type == AV_STREAM_GROUP_PARAMS_IAMF_AUDIO_ELEMENT)
+ nb_audio_elements++;
+ if (stg->type == AV_STREAM_GROUP_PARAMS_IAMF_MIX_PRESENTATION)
+ nb_mix_presentations++;
+ }
+ if ((nb_audio_elements < 1 && nb_audio_elements > 2) || nb_mix_presentations < 1) {
+ av_log(s, AV_LOG_ERROR, "There must be >= 1 and <= 2 IAMF_AUDIO_ELEMENT and at least "
+ "one IAMF_MIX_PRESENTATION stream groups\n");
+ return AVERROR(EINVAL);
+ }
+
+ for (int i = 0; i < s->nb_stream_groups; i++) {
+ const AVStreamGroup *stg = s->stream_groups[i];
+ if (stg->type != AV_STREAM_GROUP_PARAMS_IAMF_AUDIO_ELEMENT)
+ continue;
+
+ ret = ff_iamf_add_audio_element(iamf, stg, s);
+ if (ret < 0)
+ return ret;
+ }
+
+ for (int i = 0; i < s->nb_stream_groups; i++) {
+ const AVStreamGroup *stg = s->stream_groups[i];
+ if (stg->type != AV_STREAM_GROUP_PARAMS_IAMF_MIX_PRESENTATION)
+ continue;
+
+ ret = ff_iamf_add_mix_presentation(iamf, stg, s);
+ if (ret < 0)
+ return ret;
+ }
+
+ c->first_stream_id = s->streams[0]->id;
+
+ return 0;
+}
+
+static int iamf_write_header(AVFormatContext *s)
+{
+ IAMFMuxContext *const c = s->priv_data;
+ IAMFContext *const iamf = &c->iamf;
+ int ret;
+
+ ret = ff_iamf_write_descriptors(iamf, s->pb, s);
+ if (ret < 0)
+ return ret;
+
+ c->first_stream_id = s->streams[0]->id;
+
+ return 0;
+}
+
+static inline int rescale_rational(AVRational q, int b)
+{
+ return av_clip_int16(av_rescale(q.num, b, q.den));
+}
+
+static int write_parameter_block(AVFormatContext *s, const AVIAMFParamDefinition *param)
+{
+ const IAMFMuxContext *const c = s->priv_data;
+ const IAMFContext *const iamf = &c->iamf;
+ uint8_t header[MAX_IAMF_OBU_HEADER_SIZE];
+ IAMFParamDefinition *param_definition = ff_iamf_get_param_definition(iamf, param->parameter_id);
+ PutBitContext pb;
+ AVIOContext *dyn_bc;
+ uint8_t *dyn_buf = NULL;
+ int dyn_size, ret;
+
+ if (param->param_definition_type > AV_IAMF_PARAMETER_DEFINITION_RECON_GAIN) {
+ av_log(s, AV_LOG_DEBUG, "Ignoring side data with unknown param_definition_type %u\n",
+ param->param_definition_type);
+ return 0;
+ }
+
+ if (!param_definition) {
+ av_log(s, AV_LOG_ERROR, "Non-existent Parameter Definition with ID %u referenced by a packet\n",
+ param->parameter_id);
+ return AVERROR(EINVAL);
+ }
+
+ if (param->param_definition_type != param_definition->param->param_definition_type ||
+ param->param_definition_mode != param_definition->param->param_definition_mode) {
+ av_log(s, AV_LOG_ERROR, "Inconsistent param_definition_mode or param_definition_type values "
+ "for Parameter Definition with ID %u in a packet\n",
+ param->parameter_id);
+ return AVERROR(EINVAL);
+ }
+
+ ret = avio_open_dyn_buf(&dyn_bc);
+ if (ret < 0)
+ return ret;
+
+ // Sequence Header
+ init_put_bits(&pb, header, sizeof(header));
+ put_bits(&pb, 5, IAMF_OBU_IA_PARAMETER_BLOCK);
+ put_bits(&pb, 3, 0);
+ flush_put_bits(&pb);
+ avio_write(s->pb, header, put_bytes_count(&pb, 1));
+
+ ffio_write_leb(dyn_bc, param->parameter_id);
+ if (param->param_definition_mode) {
+ ffio_write_leb(dyn_bc, param->duration);
+ ffio_write_leb(dyn_bc, param->constant_subblock_duration);
+ if (param->constant_subblock_duration == 0)
+ ffio_write_leb(dyn_bc, param->nb_subblocks);
+ }
+
+ for (int i = 0; i < param->nb_subblocks; i++) {
+ const void *subblock = av_iamf_param_definition_get_subblock(param, i);
+
+ switch (param->param_definition_type) {
+ case AV_IAMF_PARAMETER_DEFINITION_MIX_GAIN: {
+ const AVIAMFMixGain *mix = subblock;
+ if (param->param_definition_mode && param->constant_subblock_duration == 0)
+ ffio_write_leb(dyn_bc, mix->subblock_duration);
+
+ ffio_write_leb(dyn_bc, mix->animation_type);
+
+ avio_wb16(dyn_bc, rescale_rational(mix->start_point_value, 1 << 8));
+ if (mix->animation_type >= AV_IAMF_ANIMATION_TYPE_LINEAR)
+ avio_wb16(dyn_bc, rescale_rational(mix->end_point_value, 1 << 8));
+ if (mix->animation_type == AV_IAMF_ANIMATION_TYPE_BEZIER) {
+ avio_wb16(dyn_bc, rescale_rational(mix->control_point_value, 1 << 8));
+ avio_w8(dyn_bc, av_clip_uint8(av_rescale(mix->control_point_relative_time.num, 1 << 8,
+ mix->control_point_relative_time.den)));
+ }
+ break;
+ }
+ case AV_IAMF_PARAMETER_DEFINITION_DEMIXING: {
+ const AVIAMFDemixingInfo *demix = subblock;
+ if (param->param_definition_mode && param->constant_subblock_duration == 0)
+ ffio_write_leb(dyn_bc, demix->subblock_duration);
+
+ avio_w8(dyn_bc, demix->dmixp_mode << 5);
+ break;
+ }
+ case AV_IAMF_PARAMETER_DEFINITION_RECON_GAIN: {
+ const AVIAMFReconGain *recon = subblock;
+ const AVIAMFAudioElement *audio_element = param_definition->audio_element;
+
+ if (param->param_definition_mode && param->constant_subblock_duration == 0)
+ ffio_write_leb(dyn_bc, recon->subblock_duration);
+
+ if (!audio_element) {
+ av_log(s, AV_LOG_ERROR, "Invalid Parameter Definition with ID %u referenced by a packet\n", param->parameter_id);
+ return AVERROR(EINVAL);
+ }
+
+ for (int j = 0; j < audio_element->nb_layers; j++) {
+ const AVIAMFLayer *layer = audio_element->layers[j];
+
+ if (layer->flags & AV_IAMF_LAYER_FLAG_RECON_GAIN) {
+ unsigned int recon_gain_flags = 0;
+ int k = 0;
+
+ for (; k < 7; k++)
+ recon_gain_flags |= (1 << k) * !!recon->recon_gain[j][k];
+ for (; k < 12; k++)
+ recon_gain_flags |= (2 << k) * !!recon->recon_gain[j][k];
+ if (recon_gain_flags >> 8)
+ recon_gain_flags |= (1 << k);
+
+ ffio_write_leb(dyn_bc, recon_gain_flags);
+ for (k = 0; k < 12; k++) {
+ if (recon->recon_gain[j][k])
+ avio_w8(dyn_bc, recon->recon_gain[j][k]);
+ }
+ }
+ }
+ break;
+ }
+ default:
+ av_assert0(0);
+ }
+ }
+
+ dyn_size = avio_close_dyn_buf(dyn_bc, &dyn_buf);
+ ffio_write_leb(s->pb, dyn_size);
+ avio_write(s->pb, dyn_buf, dyn_size);
+ av_free(dyn_buf);
+
+ return 0;
+}
+
+static int iamf_write_packet(AVFormatContext *s, AVPacket *pkt)
+{
+ const IAMFMuxContext *const c = s->priv_data;
+ AVStream *st = s->streams[pkt->stream_index];
+ uint8_t header[MAX_IAMF_OBU_HEADER_SIZE];
+ PutBitContext pb;
+ AVIOContext *dyn_bc;
+ uint8_t *side_data, *dyn_buf = NULL;
+ unsigned int skip_samples = 0, discard_padding = 0;
+ size_t side_data_size;
+ int dyn_size, type = st->id <= 17 ? st->id + IAMF_OBU_IA_AUDIO_FRAME_ID0 : IAMF_OBU_IA_AUDIO_FRAME;
+ int ret;
+
+ if (s->nb_stream_groups && st->id == c->first_stream_id) {
+ AVIAMFParamDefinition *mix =
+ (AVIAMFParamDefinition *)av_packet_get_side_data(pkt, AV_PKT_DATA_IAMF_MIX_GAIN_PARAM, NULL);
+ AVIAMFParamDefinition *demix =
+ (AVIAMFParamDefinition *)av_packet_get_side_data(pkt, AV_PKT_DATA_IAMF_DEMIXING_INFO_PARAM, NULL);
+ AVIAMFParamDefinition *recon =
+ (AVIAMFParamDefinition *)av_packet_get_side_data(pkt, AV_PKT_DATA_IAMF_RECON_GAIN_INFO_PARAM, NULL);
+
+ if (mix) {
+ ret = write_parameter_block(s, mix);
+ if (ret < 0)
+ return ret;
+ }
+ if (demix) {
+ ret = write_parameter_block(s, demix);
+ if (ret < 0)
+ return ret;
+ }
+ if (recon) {
+ ret = write_parameter_block(s, recon);
+ if (ret < 0)
+ return ret;
+ }
+ }
+ side_data = av_packet_get_side_data(pkt, AV_PKT_DATA_SKIP_SAMPLES,
+ &side_data_size);
+
+ if (side_data && side_data_size >= 10) {
+ skip_samples = AV_RL32(side_data);
+ discard_padding = AV_RL32(side_data + 4);
+ }
+
+ ret = avio_open_dyn_buf(&dyn_bc);
+ if (ret < 0)
+ return ret;
+
+ init_put_bits(&pb, header, sizeof(header));
+ put_bits(&pb, 5, type);
+ put_bits(&pb, 1, 0); // obu_redundant_copy
+ put_bits(&pb, 1, skip_samples || discard_padding);
+ put_bits(&pb, 1, 0); // obu_extension_flag
+ flush_put_bits(&pb);
+ avio_write(s->pb, header, put_bytes_count(&pb, 1));
+
+ if (skip_samples || discard_padding) {
+ ffio_write_leb(dyn_bc, discard_padding);
+ ffio_write_leb(dyn_bc, skip_samples);
+ }
+
+ if (st->id > 17)
+ ffio_write_leb(dyn_bc, st->id);
+
+ dyn_size = avio_close_dyn_buf(dyn_bc, &dyn_buf);
+ ffio_write_leb(s->pb, dyn_size + pkt->size);
+ avio_write(s->pb, dyn_buf, dyn_size);
+ av_free(dyn_buf);
+ avio_write(s->pb, pkt->data, pkt->size);
+
+ return 0;
+}
+
+static void iamf_deinit(AVFormatContext *s)
+{
+ IAMFMuxContext *const c = s->priv_data;
+ IAMFContext *const iamf = &c->iamf;
+
+ for (int i = 0; i < iamf->nb_audio_elements; i++) {
+ IAMFAudioElement *audio_element = iamf->audio_elements[i];
+ audio_element->element = NULL;
+ }
+
+ for (int i = 0; i < iamf->nb_mix_presentations; i++) {
+ IAMFMixPresentation *mix_presentation = iamf->mix_presentations[i];
+ mix_presentation->mix = NULL;
+ }
+
+ ff_iamf_uninit_context(iamf);
+
+ return;
+}
+
+static const AVCodecTag iamf_codec_tags[] = {
+ { AV_CODEC_ID_AAC, MKTAG('m','p','4','a') },
+ { AV_CODEC_ID_FLAC, MKTAG('f','L','a','C') },
+ { AV_CODEC_ID_OPUS, MKTAG('O','p','u','s') },
+ { AV_CODEC_ID_PCM_S16LE, MKTAG('i','p','c','m') },
+ { AV_CODEC_ID_PCM_S16BE, MKTAG('i','p','c','m') },
+ { AV_CODEC_ID_PCM_S24LE, MKTAG('i','p','c','m') },
+ { AV_CODEC_ID_PCM_S24BE, MKTAG('i','p','c','m') },
+ { AV_CODEC_ID_PCM_S32LE, MKTAG('i','p','c','m') },
+ { AV_CODEC_ID_PCM_S32BE, MKTAG('i','p','c','m') },
+ { AV_CODEC_ID_NONE, MKTAG('i','p','c','m') }
+};
+
+const FFOutputFormat ff_iamf_muxer = {
+ .p.name = "iamf",
+ .p.long_name = NULL_IF_CONFIG_SMALL("Raw Immersive Audio Model and Formats"),
+ .p.extensions = "iamf",
+ .priv_data_size = sizeof(IAMFMuxContext),
+ .p.audio_codec = AV_CODEC_ID_OPUS,
+ .init = iamf_init,
+ .deinit = iamf_deinit,
+ .write_header = iamf_write_header,
+ .write_packet = iamf_write_packet,
+ .p.codec_tag = (const AVCodecTag* const []){ iamf_codec_tags, NULL },
+ .p.flags = AVFMT_GLOBALHEADER | AVFMT_NOTIMESTAMPS,
+};
--
2.43.0
More information about the ffmpeg-devel
mailing list