[FFmpeg-devel] [PATCH 7/8] avformat/movenc: add support for Immersive Audio Model and Formats in ISOBMFF
Andreas Rheinhardt
andreas.rheinhardt at outlook.com
Tue Feb 20 15:37:15 EET 2024
James Almer:
> Signed-off-by: James Almer <jamrial at gmail.com>
> ---
> libavformat/movenc.c | 349 +++++++++++++++++++++++++++++++++++--------
> libavformat/movenc.h | 6 +
> 2 files changed, 293 insertions(+), 62 deletions(-)
>
> diff --git a/libavformat/movenc.c b/libavformat/movenc.c
> index c71a9983ed..cd63b353b8 100644
> --- a/libavformat/movenc.c
> +++ b/libavformat/movenc.c
> @@ -32,6 +32,7 @@
> #include "dovi_isom.h"
> #include "riff.h"
> #include "avio.h"
> +#include "iamf_writer.h"
> #include "isom.h"
> #include "av1.h"
> #include "avc.h"
> @@ -41,12 +42,14 @@
> #include "libavcodec/flac.h"
> #include "libavcodec/get_bits.h"
>
> +#include "libavcodec/bsf.h"
> #include "libavcodec/internal.h"
> #include "libavcodec/put_bits.h"
> #include "libavcodec/vc1_common.h"
> #include "libavcodec/raw.h"
> #include "internal.h"
> #include "libavutil/avstring.h"
> +#include "libavutil/bprint.h"
> #include "libavutil/channel_layout.h"
> #include "libavutil/csp.h"
> #include "libavutil/intfloat.h"
> @@ -316,6 +319,32 @@ static int mov_write_sdtp_tag(AVIOContext *pb, MOVTrack *track)
> return update_size(pb, pos);
> }
>
> +static int mov_write_iacb_tag(AVFormatContext *s, AVIOContext *pb, MOVTrack *track)
> +{
> + AVIOContext *dyn_bc;
> + int64_t pos = avio_tell(pb);
> + uint8_t *dyn_buf = NULL;
> + int dyn_size;
> + int ret = avio_open_dyn_buf(&dyn_bc);
> + if (ret < 0)
> + return ret;
> +
> + avio_wb32(pb, 0);
> + ffio_wfourcc(pb, "iacb");
> + avio_w8(pb, 1); // configurationVersion
> +
> + ret = ff_iamf_write_descriptors(track->iamf, dyn_bc, s);
> + if (ret < 0)
> + return ret;
> +
> + dyn_size = avio_close_dyn_buf(dyn_bc, &dyn_buf);
> + ffio_write_leb(pb, dyn_size);
> + avio_write(pb, dyn_buf, dyn_size);
> + av_free(dyn_buf);
> +
> + return update_size(pb, pos);
> +}
> +
> static int mov_write_amr_tag(AVIOContext *pb, MOVTrack *track)
> {
> avio_wb32(pb, 0x11); /* size */
> @@ -1358,6 +1387,8 @@ static int mov_write_audio_tag(AVFormatContext *s, AVIOContext *pb, MOVMuxContex
> ret = mov_write_wave_tag(s, pb, track);
> else if (track->tag == MKTAG('m','p','4','a'))
> ret = mov_write_esds_tag(pb, track);
> + else if (track->tag == MKTAG('i','a','m','f'))
> + ret = mov_write_iacb_tag(mov->fc, pb, track);
> else if (track->par->codec_id == AV_CODEC_ID_AMR_NB)
> ret = mov_write_amr_tag(pb, track);
> else if (track->par->codec_id == AV_CODEC_ID_AC3)
> @@ -2529,7 +2560,7 @@ static int mov_write_video_tag(AVFormatContext *s, AVIOContext *pb, MOVMuxContex
>
> if (track->mode == MODE_AVIF) {
> mov_write_ccst_tag(pb);
> - if (s->nb_streams > 0 && track == &mov->tracks[1])
> + if (mov->nb_streams > 0 && track == &mov->tracks[1])
> mov_write_aux_tag(pb, "auxi");
> }
>
> @@ -3124,9 +3155,9 @@ static int mov_write_iloc_tag(AVIOContext *pb, MOVMuxContext *mov, AVFormatConte
> avio_wb32(pb, 0); /* Version & flags */
> avio_w8(pb, (4 << 4) + 4); /* offset_size(4) and length_size(4) */
> avio_w8(pb, 0); /* base_offset_size(4) and reserved(4) */
> - avio_wb16(pb, s->nb_streams); /* item_count */
> + avio_wb16(pb, mov->nb_streams); /* item_count */
>
> - for (int i = 0; i < s->nb_streams; i++) {
> + for (int i = 0; i < mov->nb_streams; i++) {
> avio_wb16(pb, i + 1); /* item_id */
> avio_wb16(pb, 0); /* data_reference_index */
> avio_wb16(pb, 1); /* extent_count */
> @@ -3145,9 +3176,9 @@ static int mov_write_iinf_tag(AVIOContext *pb, MOVMuxContext *mov, AVFormatConte
> avio_wb32(pb, 0); /* size */
> ffio_wfourcc(pb, "iinf");
> avio_wb32(pb, 0); /* Version & flags */
> - avio_wb16(pb, s->nb_streams); /* entry_count */
> + avio_wb16(pb, mov->nb_streams); /* entry_count */
>
> - for (int i = 0; i < s->nb_streams; i++) {
> + for (int i = 0; i < mov->nb_streams; i++) {
> int64_t infe_pos = avio_tell(pb);
> avio_wb32(pb, 0); /* size */
> ffio_wfourcc(pb, "infe");
> @@ -3216,7 +3247,7 @@ static int mov_write_ipco_tag(AVIOContext *pb, MOVMuxContext *mov, AVFormatConte
> int64_t pos = avio_tell(pb);
> avio_wb32(pb, 0); /* size */
> ffio_wfourcc(pb, "ipco");
> - for (int i = 0; i < s->nb_streams; i++) {
> + for (int i = 0; i < mov->nb_streams; i++) {
> mov_write_ispe_tag(pb, mov, s, i);
> mov_write_pixi_tag(pb, mov, s, i);
> mov_write_av1c_tag(pb, &mov->tracks[i]);
> @@ -3234,9 +3265,9 @@ static int mov_write_ipma_tag(AVIOContext *pb, MOVMuxContext *mov, AVFormatConte
> avio_wb32(pb, 0); /* size */
> ffio_wfourcc(pb, "ipma");
> avio_wb32(pb, 0); /* Version & flags */
> - avio_wb32(pb, s->nb_streams); /* entry_count */
> + avio_wb32(pb, mov->nb_streams); /* entry_count */
>
> - for (int i = 0, index = 1; i < s->nb_streams; i++) {
> + for (int i = 0, index = 1; i < mov->nb_streams; i++) {
> avio_wb16(pb, i + 1); /* item_ID */
> avio_w8(pb, 4); /* association_count */
>
> @@ -4213,7 +4244,7 @@ static int mov_write_covr(AVIOContext *pb, AVFormatContext *s)
> int64_t pos = 0;
> int i;
>
> - for (i = 0; i < s->nb_streams; i++) {
> + for (i = 0; i < mov->nb_streams; i++) {
> MOVTrack *trk = &mov->tracks[i];
>
> if (!is_cover_image(trk->st) || trk->cover_image->size <= 0)
> @@ -4360,7 +4391,7 @@ static int mov_write_meta_tag(AVIOContext *pb, MOVMuxContext *mov,
> mov_write_pitm_tag(pb, 1);
> mov_write_iloc_tag(pb, mov, s);
> mov_write_iinf_tag(pb, mov, s);
> - if (s->nb_streams > 1)
> + if (mov->nb_streams > 1)
> mov_write_iref_tag(pb, mov, s);
> mov_write_iprp_tag(pb, mov, s);
> } else {
> @@ -4611,16 +4642,17 @@ static int mov_setup_track_ids(MOVMuxContext *mov, AVFormatContext *s)
>
> if (mov->use_stream_ids_as_track_ids) {
> int next_generated_track_id = 0;
> - for (i = 0; i < s->nb_streams; i++) {
> - if (s->streams[i]->id > next_generated_track_id)
> - next_generated_track_id = s->streams[i]->id;
> + for (i = 0; i < mov->nb_streams; i++) {
> + AVStream *st = mov->tracks[i].st;
> + if (st->id > next_generated_track_id)
> + next_generated_track_id = st->id;
> }
>
> for (i = 0; i < mov->nb_tracks; i++) {
> if (mov->tracks[i].entry <= 0 && !(mov->flags & FF_MOV_FLAG_FRAGMENT))
> continue;
>
> - mov->tracks[i].track_id = i >= s->nb_streams ? ++next_generated_track_id : s->streams[i]->id;
> + mov->tracks[i].track_id = i >= mov->nb_streams ? ++next_generated_track_id : mov->tracks[i].st->id;
> }
> } else {
> for (i = 0; i < mov->nb_tracks; i++) {
> @@ -4657,7 +4689,7 @@ static int mov_write_moov_tag(AVIOContext *pb, MOVMuxContext *mov,
> }
>
> if (mov->chapter_track)
> - for (i = 0; i < s->nb_streams; i++) {
> + for (i = 0; i < mov->nb_streams; i++) {
> mov->tracks[i].tref_tag = MKTAG('c','h','a','p');
> mov->tracks[i].tref_id = mov->tracks[mov->chapter_track].track_id;
> }
> @@ -4697,7 +4729,7 @@ static int mov_write_moov_tag(AVIOContext *pb, MOVMuxContext *mov,
> for (i = 0; i < mov->nb_tracks; i++) {
> if (mov->tracks[i].entry > 0 || mov->flags & FF_MOV_FLAG_FRAGMENT ||
> mov->mode == MODE_AVIF) {
> - int ret = mov_write_trak_tag(s, pb, mov, &(mov->tracks[i]), i < s->nb_streams ? s->streams[i] : NULL);
> + int ret = mov_write_trak_tag(s, pb, mov, &(mov->tracks[i]), i < mov->nb_streams ? mov->tracks[i].st : NULL);
> if (ret < 0)
> return ret;
> }
> @@ -5489,10 +5521,20 @@ static int mov_write_ftyp_tag(AVIOContext *pb, AVFormatContext *s)
> MOVMuxContext *mov = s->priv_data;
> int64_t pos = avio_tell(pb);
> int has_h264 = 0, has_av1 = 0, has_video = 0, has_dolby = 0;
> + int has_iamf = 0;
> int i;
>
> - for (i = 0; i < s->nb_streams; i++) {
> - AVStream *st = s->streams[i];
> + for (i = 0; i < s->nb_stream_groups; i++) {
> + const AVStreamGroup *stg = s->stream_groups[i];
> +
> + if (stg->type == AV_STREAM_GROUP_PARAMS_IAMF_AUDIO_ELEMENT ||
> + stg->type == AV_STREAM_GROUP_PARAMS_IAMF_MIX_PRESENTATION) {
> + has_iamf = 1;
> + break;
> + }
> + }
> + for (i = 0; i < mov->nb_streams; i++) {
> + AVStream *st = mov->tracks[i].st;
> if (is_cover_image(st))
> continue;
> if (st->codecpar->codec_type == AVMEDIA_TYPE_VIDEO)
> @@ -5560,6 +5602,8 @@ static int mov_write_ftyp_tag(AVIOContext *pb, AVFormatContext *s)
> ffio_wfourcc(pb, "av01");
> if (has_dolby)
> ffio_wfourcc(pb, "dby1");
> + if (has_iamf)
> + ffio_wfourcc(pb, "iamf");
> } else {
> if (mov->flags & FF_MOV_FLAG_FRAGMENT)
> ffio_wfourcc(pb, "iso6");
> @@ -5667,8 +5711,8 @@ static int mov_write_identification(AVIOContext *pb, AVFormatContext *s)
> mov_write_ftyp_tag(pb,s);
> if (mov->mode == MODE_PSP) {
> int video_streams_nb = 0, audio_streams_nb = 0, other_streams_nb = 0;
> - for (i = 0; i < s->nb_streams; i++) {
> - AVStream *st = s->streams[i];
> + for (i = 0; i < mov->nb_streams; i++) {
> + AVStream *st = mov->tracks[i].st;
> if (is_cover_image(st))
> continue;
> if (st->codecpar->codec_type == AVMEDIA_TYPE_VIDEO)
> @@ -5855,7 +5899,7 @@ static int mov_write_squashed_packets(AVFormatContext *s)
> {
> MOVMuxContext *mov = s->priv_data;
>
> - for (int i = 0; i < s->nb_streams; i++) {
> + for (int i = 0; i < mov->nb_streams; i++) {
> MOVTrack *track = &mov->tracks[i];
> int ret = AVERROR_BUG;
>
> @@ -5896,7 +5940,7 @@ static int mov_flush_fragment(AVFormatContext *s, int force)
> // of fragments was triggered automatically by an AVPacket, we
> // already have reliable info for the end of that track, but other
> // tracks may need to be filled in.
> - for (i = 0; i < s->nb_streams; i++) {
> + for (i = 0; i < mov->nb_streams; i++) {
> MOVTrack *track = &mov->tracks[i];
> if (!track->end_reliable) {
> const AVPacket *pkt = ff_interleaved_peek(s, i);
> @@ -6097,10 +6141,8 @@ static int mov_auto_flush_fragment(AVFormatContext *s, int force)
> return ret;
> }
>
> -static int check_pkt(AVFormatContext *s, AVPacket *pkt)
> +static int check_pkt(AVFormatContext *s, MOVTrack *trk, AVPacket *pkt)
> {
> - MOVMuxContext *mov = s->priv_data;
> - MOVTrack *trk = &mov->tracks[pkt->stream_index];
> int64_t ref;
> uint64_t duration;
>
> @@ -6138,15 +6180,21 @@ int ff_mov_write_packet(AVFormatContext *s, AVPacket *pkt)
> {
> MOVMuxContext *mov = s->priv_data;
> AVIOContext *pb = s->pb;
> - MOVTrack *trk = &mov->tracks[pkt->stream_index];
> - AVCodecParameters *par = trk->par;
> + MOVTrack *trk;
> + AVCodecParameters *par;
> AVProducerReferenceTime *prft;
> unsigned int samples_in_chunk = 0;
> int size = pkt->size, ret = 0, offset = 0;
> size_t prft_size;
> uint8_t *reformatted_data = NULL;
>
> - ret = check_pkt(s, pkt);
> + if (pkt->stream_index < s->nb_streams)
> + trk = s->streams[pkt->stream_index]->priv_data;
> + else // Timecode or chapter
> + trk = &mov->tracks[pkt->stream_index];
> + par = trk->par;
> +
> + ret = check_pkt(s, trk, pkt);
> if (ret < 0)
> return ret;
>
> @@ -6236,7 +6284,7 @@ int ff_mov_write_packet(AVFormatContext *s, AVPacket *pkt)
>
> if (par->codec_id == AV_CODEC_ID_AAC && pkt->size > 2 &&
> (AV_RB16(pkt->data) & 0xfff0) == 0xfff0) {
> - if (!s->streams[pkt->stream_index]->nb_frames) {
> + if (!trk->st->nb_frames) {
> av_log(s, AV_LOG_ERROR, "Malformed AAC bitstream detected: "
> "use the audio bitstream filter 'aac_adtstoasc' to fix it "
> "('-bsf:a aac_adtstoasc' option with ffmpeg)\n");
> @@ -6498,18 +6546,18 @@ err:
> static int mov_write_single_packet(AVFormatContext *s, AVPacket *pkt)
> {
> MOVMuxContext *mov = s->priv_data;
> - MOVTrack *trk = &mov->tracks[pkt->stream_index];
> + MOVTrack *trk = s->streams[pkt->stream_index]->priv_data;
> AVCodecParameters *par = trk->par;
> int64_t frag_duration = 0;
> int size = pkt->size;
>
> - int ret = check_pkt(s, pkt);
> + int ret = check_pkt(s, trk, pkt);
> if (ret < 0)
> return ret;
>
> if (mov->flags & FF_MOV_FLAG_FRAG_DISCONT) {
> int i;
> - for (i = 0; i < s->nb_streams; i++)
> + for (i = 0; i < mov->nb_streams; i++)
> mov->tracks[i].frag_discont = 1;
> mov->flags &= ~FF_MOV_FLAG_FRAG_DISCONT;
> }
> @@ -6551,7 +6599,7 @@ static int mov_write_single_packet(AVFormatContext *s, AVPacket *pkt)
> return 0; /* Discard 0 sized packets */
> }
>
> - if (trk->entry && pkt->stream_index < s->nb_streams)
> + if (trk->entry && pkt->stream_index < mov->nb_streams)
> frag_duration = av_rescale_q(pkt->dts - trk->cluster[0].dts,
> s->streams[pkt->stream_index]->time_base,
> AV_TIME_BASE_Q);
> @@ -6606,17 +6654,80 @@ static int mov_write_subtitle_end_packet(AVFormatContext *s,
> return ret;
> }
>
> +static int mov_build_iamf_packet(AVFormatContext *s, MOVTrack *trk, AVPacket *pkt)
> +{
> + int ret;
> +
> + if (pkt->stream_index == trk->first_iamf_idx) {
> + ret = ff_iamf_write_parameter_blocks(trk->iamf, trk->iamf_buf, pkt, s);
> + if (ret < 0)
> + return ret;
> + }
> +
> + ret = ff_iamf_write_audio_frame(trk->iamf, trk->iamf_buf,
> + s->streams[pkt->stream_index]->id, pkt);
> + if (ret < 0)
> + return ret;
> +
> + if (pkt->stream_index == trk->last_iamf_idx) {
> + uint8_t *data;
> +
> + ret = avio_close_dyn_buf(trk->iamf_buf, &data);
> + trk->iamf_buf = NULL;
> +
> + if (!ret) {
> + if (pkt->size) {
> + // Either all or none of the packets for a single
> + // IA Sample may be empty.
> + av_log(s, AV_LOG_ERROR, "Unexpected packet from "
> + "stream #%d\n", pkt->stream_index);
> + ret = AVERROR_INVALIDDATA;
> + }
> + av_free(data);
> + return ret;
> + }
> + av_buffer_unref(&pkt->buf);
> + pkt->buf = av_buffer_create(data, ret, NULL, NULL, 0);
> + if (!pkt->buf) {
> + av_free(data);
> + return AVERROR(ENOMEM);
> + }
> + pkt->data = data;
> + pkt->size = ret;
> + pkt->stream_index = trk->first_iamf_idx;
> +
> + ret = avio_open_dyn_buf(&trk->iamf_buf);
> + if (ret < 0)
> + return ret;
> + } else
> + ret = AVERROR(EAGAIN);
> +
> + return ret;
> +}
> +
> static int mov_write_packet(AVFormatContext *s, AVPacket *pkt)
> {
> MOVMuxContext *mov = s->priv_data;
> MOVTrack *trk;
> + int ret;
>
> if (!pkt) {
> mov_flush_fragment(s, 1);
> return 1;
> }
>
> - trk = &mov->tracks[pkt->stream_index];
> + trk = s->streams[pkt->stream_index]->priv_data;
> +
> + if (trk->iamf) {
> + ret = mov_build_iamf_packet(s, trk, pkt);
> + if (ret < 0) {
> + if (ret == AVERROR(EAGAIN))
> + return 0;
> + av_log(s, AV_LOG_ERROR, "Error assembling an IAMF packet "
> + "for stream #%d\n", trk->st->index);
> + return ret;
> + }
> + }
>
> if (is_cover_image(trk->st)) {
> int ret;
> @@ -6817,12 +6928,12 @@ static int mov_create_chapter_track(AVFormatContext *s, int tracknum)
> }
>
>
> -static int mov_check_timecode_track(AVFormatContext *s, AVTimecode *tc, int src_index, const char *tcstr)
> +static int mov_check_timecode_track(AVFormatContext *s, AVTimecode *tc, AVStream *src_st, const char *tcstr)
> {
> int ret;
>
> /* compute the frame number */
> - ret = av_timecode_init_from_string(tc, s->streams[src_index]->avg_frame_rate, tcstr, s);
> + ret = av_timecode_init_from_string(tc, src_st->avg_frame_rate, tcstr, s);
> return ret;
> }
>
> @@ -6830,7 +6941,7 @@ static int mov_create_timecode_track(AVFormatContext *s, int index, int src_inde
> {
> MOVMuxContext *mov = s->priv_data;
> MOVTrack *track = &mov->tracks[index];
> - AVStream *src_st = s->streams[src_index];
> + AVStream *src_st = mov->tracks[src_index].st;
> uint8_t data[4];
> AVPacket *pkt = mov->pkt;
> AVRational rate = src_st->avg_frame_rate;
> @@ -6890,8 +7001,8 @@ static void enable_tracks(AVFormatContext *s)
> first[i] = -1;
> }
>
> - for (i = 0; i < s->nb_streams; i++) {
> - AVStream *st = s->streams[i];
> + for (i = 0; i < mov->nb_streams; i++) {
> + AVStream *st = mov->tracks[i].st;
>
> if (st->codecpar->codec_type <= AVMEDIA_TYPE_UNKNOWN ||
> st->codecpar->codec_type >= AVMEDIA_TYPE_NB ||
> @@ -6925,6 +7036,9 @@ static void mov_free(AVFormatContext *s)
> MOVMuxContext *mov = s->priv_data;
> int i;
>
> + for (i = 0; i < s->nb_streams; i++)
> + s->streams[i]->priv_data = NULL;
> +
> if (!mov->tracks)
> return;
>
> @@ -6954,6 +7068,11 @@ static void mov_free(AVFormatContext *s)
> ff_mov_cenc_free(&track->cenc);
> ffio_free_dyn_buf(&track->mdat_buf);
>
> + ffio_free_dyn_buf(&track->iamf_buf);
> + if (track->iamf)
> + ff_iamf_write_deinit(track->iamf);
> + av_freep(&track->iamf);
> +
> avpriv_packet_list_free(&track->squashed_packet_queue);
> }
>
> @@ -7027,6 +7146,66 @@ static int mov_create_dvd_sub_decoder_specific_info(MOVTrack *track,
> return 0;
> }
>
> +static int mov_init_iamf_track(AVFormatContext *s)
> +{
> + MOVMuxContext *mov = s->priv_data;
> + MOVTrack *track = &mov->tracks[0]; // IAMF if present is always the first track
> + int nb_audio_elements = 0, nb_mix_presentations = 0;
> + int ret;
> +
> + for (int i = 0; i < s->nb_stream_groups; i++) {
> + const AVStreamGroup *stg = s->stream_groups[i];
> +
> + if (stg->type == AV_STREAM_GROUP_PARAMS_IAMF_AUDIO_ELEMENT)
> + nb_audio_elements++;
> + if (stg->type == AV_STREAM_GROUP_PARAMS_IAMF_MIX_PRESENTATION)
> + nb_mix_presentations++;
> + }
> +
> + if (!nb_audio_elements && !nb_mix_presentations)
> + return 0;
> +
> + if ((nb_audio_elements < 1 && nb_audio_elements > 2) || nb_mix_presentations < 1) {
> + av_log(s, AV_LOG_ERROR, "There must be >= 1 and <= 2 IAMF_AUDIO_ELEMENT and at least "
> + "one IAMF_MIX_PRESENTATION stream groups to write a IMAF track\n");
> + return AVERROR(EINVAL);
> + }
> +
> + track->iamf = av_mallocz(sizeof(*track->iamf));
> + if (!track->iamf)
> + return AVERROR(ENOMEM);
> +
> + for (int i = 0; i < s->nb_stream_groups; i++) {
> + const AVStreamGroup *stg = s->stream_groups[i];
> + switch(stg->type) {
> + case AV_STREAM_GROUP_PARAMS_IAMF_AUDIO_ELEMENT:
> + for (int j = 0; j < stg->nb_streams; j++) {
> + track->first_iamf_idx = FFMIN(stg->streams[j]->index, track->first_iamf_idx);
> + track->last_iamf_idx = FFMAX(stg->streams[j]->index, track->last_iamf_idx);
> + stg->streams[j]->priv_data = track;
> + }
> +
> + ret = ff_iamf_add_audio_element(track->iamf, stg, s);
> + break;
> + case AV_STREAM_GROUP_PARAMS_IAMF_MIX_PRESENTATION:
> + ret = ff_iamf_add_mix_presentation(track->iamf, stg, s);
> + break;
> + default:
> + av_assert0(0);
> + }
> + if (ret < 0)
> + return ret;
> + }
> +
> + track->tag = MKTAG('i','a','m','f');
> +
> + ret = avio_open_dyn_buf(&track->iamf_buf);
> + if (ret < 0)
> + return ret;
> +
> + return 0;
> +}
> +
> static int mov_init(AVFormatContext *s)
> {
> MOVMuxContext *mov = s->priv_data;
> @@ -7164,7 +7343,37 @@ static int mov_init(AVFormatContext *s)
> s->streams[0]->disposition |= AV_DISPOSITION_DEFAULT;
> }
>
> - mov->nb_tracks = s->nb_streams;
> + for (i = 0; i < s->nb_stream_groups; i++) {
> + AVStreamGroup *stg = s->stream_groups[i];
> +
> + if (stg->type != AV_STREAM_GROUP_PARAMS_IAMF_AUDIO_ELEMENT)
> + continue;
> +
> + for (int j = 0; j < stg->nb_streams; j++) {
> + AVStream *st = stg->streams[j];
> +
> + if (st->priv_data) {
> + av_log(s, AV_LOG_ERROR, "Stream %d is present in more than one Stream Group of type "
> + "IAMF Audio Element\n", j);
> + return AVERROR(EINVAL);
> + }
> + st->priv_data = st;
> + }
> +
> + if (!mov->nb_tracks) // We support one track for the entire IAMF structure
> + mov->nb_tracks++;
> + }
> +
> + for (i = 0; i < s->nb_streams; i++) {
> + AVStream *st = s->streams[i];
> + if (st->priv_data)
> + continue;
> + st->priv_data = st;
> + mov->nb_tracks++;
> + }
> +
> + mov->nb_streams = mov->nb_tracks;
> +
> if (mov->mode & (MODE_MP4|MODE_MOV|MODE_IPOD) && s->nb_chapters)
> mov->chapter_track = mov->nb_tracks++;
>
> @@ -7190,7 +7399,7 @@ static int mov_init(AVFormatContext *s)
> if (st->codecpar->codec_type == AVMEDIA_TYPE_VIDEO &&
> (t || (t=av_dict_get(st->metadata, "timecode", NULL, 0)))) {
> AVTimecode tc;
> - ret = mov_check_timecode_track(s, &tc, i, t->value);
> + ret = mov_check_timecode_track(s, &tc, st, t->value);
> if (ret >= 0)
> mov->nb_meta_tmcd++;
> }
> @@ -7239,18 +7448,33 @@ static int mov_init(AVFormatContext *s)
> }
> }
>
> + ret = mov_init_iamf_track(s);
> + if (ret < 0)
> + return ret;
> +
> + for (int j = 0, i = 0; j < s->nb_streams; j++) {
> + AVStream *st = s->streams[j];
> +
> + if (st != st->priv_data)
> + continue;
> + st->priv_data = &mov->tracks[i++];
> + }
> +
> for (i = 0; i < s->nb_streams; i++) {
> AVStream *st= s->streams[i];
> - MOVTrack *track= &mov->tracks[i];
> + MOVTrack *track = st->priv_data;
> AVDictionaryEntry *lang = av_dict_get(st->metadata, "language", NULL,0);
>
> - track->st = st;
> - track->par = st->codecpar;
> + if (!track->st) {
> + track->st = st;
> + track->par = st->codecpar;
> + }
> track->language = ff_mov_iso639_to_lang(lang?lang->value:"und", mov->mode!=MODE_MOV);
> if (track->language < 0)
> track->language = 32767; // Unspecified Macintosh language code
> track->mode = mov->mode;
> - track->tag = mov_find_codec_tag(s, track);
> + if (!track->tag)
> + track->tag = mov_find_codec_tag(s, track);
> if (!track->tag) {
> av_log(s, AV_LOG_ERROR, "Could not find tag for codec %s in stream #%d, "
> "codec not currently supported in container\n",
> @@ -7442,25 +7666,26 @@ static int mov_write_header(AVFormatContext *s)
> {
> AVIOContext *pb = s->pb;
> MOVMuxContext *mov = s->priv_data;
> - int i, ret, hint_track = 0, tmcd_track = 0, nb_tracks = s->nb_streams;
> + int i, ret, hint_track = 0, tmcd_track = 0, nb_tracks = mov->nb_streams;
>
> if (mov->mode & (MODE_MP4|MODE_MOV|MODE_IPOD) && s->nb_chapters)
> nb_tracks++;
>
> if (mov->flags & FF_MOV_FLAG_RTP_HINT) {
> hint_track = nb_tracks;
> - for (i = 0; i < s->nb_streams; i++)
> - if (rtp_hinting_needed(s->streams[i]))
> + for (i = 0; i < mov->nb_streams; i++) {
> + if (rtp_hinting_needed(mov->tracks[i].st))
> nb_tracks++;
> + }
> }
>
> if (mov->nb_meta_tmcd)
> tmcd_track = nb_tracks;
>
> - for (i = 0; i < s->nb_streams; i++) {
> + for (i = 0; i < mov->nb_streams; i++) {
> int j;
> - AVStream *st= s->streams[i];
> - MOVTrack *track= &mov->tracks[i];
> + MOVTrack *track = &mov->tracks[i];
> + AVStream *st = track->st;
>
> /* copy extradata if it exists */
> if (st->codecpar->extradata_size) {
> @@ -7482,8 +7707,8 @@ static int mov_write_header(AVFormatContext *s)
> &(AVChannelLayout)AV_CHANNEL_LAYOUT_MONO))
> continue;
>
> - for (j = 0; j < s->nb_streams; j++) {
> - AVStream *stj= s->streams[j];
> + for (j = 0; j < mov->nb_streams; j++) {
> + AVStream *stj= mov->tracks[j].st;
> MOVTrack *trackj= &mov->tracks[j];
> if (j == i)
> continue;
> @@ -7546,8 +7771,8 @@ static int mov_write_header(AVFormatContext *s)
> return ret;
>
> if (mov->flags & FF_MOV_FLAG_RTP_HINT) {
> - for (i = 0; i < s->nb_streams; i++) {
> - if (rtp_hinting_needed(s->streams[i])) {
> + for (i = 0; i < mov->nb_streams; i++) {
> + if (rtp_hinting_needed(mov->tracks[i].st)) {
> if ((ret = ff_mov_init_hinting(s, hint_track, i)) < 0)
> return ret;
> hint_track++;
> @@ -7559,8 +7784,8 @@ static int mov_write_header(AVFormatContext *s)
> const AVDictionaryEntry *t, *global_tcr = av_dict_get(s->metadata,
> "timecode", NULL, 0);
> /* Initialize the tmcd tracks */
> - for (i = 0; i < s->nb_streams; i++) {
> - AVStream *st = s->streams[i];
> + for (i = 0; i < mov->nb_streams; i++) {
> + AVStream *st = mov->tracks[i].st;
> t = global_tcr;
>
> if (st->codecpar->codec_type == AVMEDIA_TYPE_VIDEO) {
> @@ -7569,7 +7794,7 @@ static int mov_write_header(AVFormatContext *s)
> t = av_dict_get(st->metadata, "timecode", NULL, 0);
> if (!t)
> continue;
> - if (mov_check_timecode_track(s, &tc, i, t->value) < 0)
> + if (mov_check_timecode_track(s, &tc, st, t->value) < 0)
> continue;
> if ((ret = mov_create_timecode_track(s, tmcd_track, i, tc)) < 0)
> return ret;
> @@ -7690,7 +7915,7 @@ static int mov_write_trailer(AVFormatContext *s)
> int64_t moov_pos;
>
> if (mov->need_rewrite_extradata) {
> - for (i = 0; i < s->nb_streams; i++) {
> + for (i = 0; i < mov->nb_streams; i++) {
> MOVTrack *track = &mov->tracks[i];
> AVCodecParameters *par = track->par;
>
> @@ -7830,7 +8055,7 @@ static int avif_write_trailer(AVFormatContext *s)
> if (mov->moov_written) return 0;
>
> mov->is_animated_avif = s->streams[0]->nb_frames > 1;
> - if (mov->is_animated_avif && s->nb_streams > 1) {
> + if (mov->is_animated_avif && mov->nb_streams > 1) {
> // For animated avif with alpha channel, we need to write a tref tag
> // with type "auxl".
> mov->tracks[1].tref_tag = MKTAG('a', 'u', 'x', 'l');
> @@ -7840,7 +8065,7 @@ static int avif_write_trailer(AVFormatContext *s)
> mov_write_meta_tag(pb, mov, s);
>
> moov_size = get_moov_size(s);
> - for (i = 0; i < s->nb_streams; i++)
> + for (i = 0; i < mov->nb_tracks; i++)
> mov->tracks[i].data_offset = avio_tell(pb) + moov_size + 8;
>
> if (mov->is_animated_avif) {
> @@ -7862,7 +8087,7 @@ static int avif_write_trailer(AVFormatContext *s)
>
> // write extent offsets.
> pos_backup = avio_tell(pb);
> - for (i = 0; i < s->nb_streams; i++) {
> + for (i = 0; i < mov->nb_streams; i++) {
Can you use loop-scope for all the iterators that you touch where
possible, please?
> if (extent_offsets[i] != (uint32_t)extent_offsets[i]) {
> av_log(s, AV_LOG_ERROR, "extent offset does not fit in 32 bits\n");
> return AVERROR_INVALIDDATA;
> diff --git a/libavformat/movenc.h b/libavformat/movenc.h
> index 60363198c9..08d580594d 100644
> --- a/libavformat/movenc.h
> +++ b/libavformat/movenc.h
> @@ -170,6 +170,11 @@ typedef struct MOVTrack {
> unsigned int squash_fragment_samples_to_one; //< flag to note formats where all samples for a fragment are to be squashed
>
> PacketList squashed_packet_queue;
> +
> + struct IAMFContext *iamf;
> + int first_iamf_idx;
> + int last_iamf_idx;
> + AVIOContext *iamf_buf;
> } MOVTrack;
>
> typedef enum {
> @@ -188,6 +193,7 @@ typedef struct MOVMuxContext {
> const AVClass *av_class;
> int mode;
> int64_t time;
> + int nb_streams;
> int nb_tracks;
> int nb_meta_tmcd; ///< number of new created tmcd track based on metadata (aka not data copy)
> int chapter_track; ///< qt chapter track number
More information about the ffmpeg-devel
mailing list