[FFmpeg-devel] [PATCH 3/3] avformat/movenc: Add support for AVIF muxing

Gyan Doshi ffmpeg at gyani.pro
Wed May 11 20:25:10 EEST 2022



On 2022-05-11 10:24 pm, Vignesh Venkatasubramanian wrote:
> On Wed, May 4, 2022 at 10:15 AM Vignesh Venkatasubramanian
> <vigneshv at google.com> wrote:
>> Add an AVIF muxer by re-using the existing the mov/mp4 muxer.
>>
>> AVIF Specification: https://aomediacodec.github.io/av1-avif
>>
>> Sample usage for still image:
>> ffmpeg -i image.png -c:v libaom-av1 -avif-image 1 image.avif
>>
>> Sample usage for animated AVIF image:
>> ffmpeg -i video.mp4 animated.avif
>>
>> We can re-use any of the AV1 encoding options that will make
>> sense for image encoding (like bitrate, tiles, encoding speed,
>> etc).
>>
>> The files generated by this muxer has been verified to be valid
>> AVIF files by the following:
>> 1) Displays on Chrome (both still and animated images).
>> 2) Displays on Firefox (only still images, firefox does not support
>>     animated AVIF yet).
>> 3) Verified to be valid by Compliance Warden:
>>     https://github.com/gpac/ComplianceWarden
>>
>> Fixes the encoder/muxer part of Trac Ticket #7621
>>
>> Signed-off-by: Vignesh Venkatasubramanian <vigneshv at google.com>
>> ---
>>   configure                |   1 +
>>   libavformat/allformats.c |   1 +
>>   libavformat/movenc.c     | 333 ++++++++++++++++++++++++++++++++++++---
>>   libavformat/movenc.h     |   5 +
>>   4 files changed, 316 insertions(+), 24 deletions(-)
>>
>> diff --git a/configure b/configure
>> index 196873c4aa..2992f9760e 100755
>> --- a/configure
>> +++ b/configure
>> @@ -3404,6 +3404,7 @@ asf_stream_muxer_select="asf_muxer"
>>   av1_demuxer_select="av1_frame_merge_bsf av1_parser"
>>   avi_demuxer_select="riffdec exif"
>>   avi_muxer_select="riffenc"
>> +avif_muxer_select="mov_muxer"
>>   caf_demuxer_select="iso_media"
>>   caf_muxer_select="iso_media"
>>   dash_muxer_select="mp4_muxer"
>> diff --git a/libavformat/allformats.c b/libavformat/allformats.c
>> index 63876c468f..1802536633 100644
>> --- a/libavformat/allformats.c
>> +++ b/libavformat/allformats.c
>> @@ -81,6 +81,7 @@ extern const AVOutputFormat ff_au_muxer;
>>   extern const AVInputFormat  ff_av1_demuxer;
>>   extern const AVInputFormat  ff_avi_demuxer;
>>   extern const AVOutputFormat ff_avi_muxer;
>> +extern const AVOutputFormat ff_avif_muxer;
>>   extern const AVInputFormat  ff_avisynth_demuxer;
>>   extern const AVOutputFormat ff_avm2_muxer;
>>   extern const AVInputFormat  ff_avr_demuxer;
>> diff --git a/libavformat/movenc.c b/libavformat/movenc.c
>> index 271db99b46..1a20fe17ca 100644
>> --- a/libavformat/movenc.c
>> +++ b/libavformat/movenc.c
>> @@ -1335,7 +1335,7 @@ static int mov_write_av1c_tag(AVIOContext *pb, MOVTrack *track)
>>
>>       avio_wb32(pb, 0);
>>       ffio_wfourcc(pb, "av1C");
>> -    ff_isom_write_av1c(pb, track->vos_data, track->vos_len, 1);
>> +    ff_isom_write_av1c(pb, track->vos_data, track->vos_len, track->mode != MODE_AVIF);
>>       return update_size(pb, pos);
>>   }
>>
>> @@ -2037,12 +2037,13 @@ static int mov_write_colr_tag(AVIOContext *pb, MOVTrack *track, int prefer_icc)
>>           }
>>       }
>>
>> -    /* We should only ever be called by MOV or MP4. */
>> -    av_assert0(track->mode == MODE_MOV || track->mode == MODE_MP4);
>> +    /* We should only ever be called for MOV, MP4 and AVIF. */
>> +    av_assert0(track->mode == MODE_MOV || track->mode == MODE_MP4 ||
>> +               track->mode == MODE_AVIF);
>>
>>       avio_wb32(pb, 0); /* size */
>>       ffio_wfourcc(pb, "colr");
>> -    if (track->mode == MODE_MP4)
>> +    if (track->mode == MODE_MP4 || track->mode == MODE_AVIF)
>>           ffio_wfourcc(pb, "nclx");
>>       else
>>           ffio_wfourcc(pb, "nclc");
>> @@ -2052,7 +2053,7 @@ static int mov_write_colr_tag(AVIOContext *pb, MOVTrack *track, int prefer_icc)
>>       avio_wb16(pb, track->par->color_primaries);
>>       avio_wb16(pb, track->par->color_trc);
>>       avio_wb16(pb, track->par->color_space);
>> -    if (track->mode == MODE_MP4) {
>> +    if (track->mode == MODE_MP4 || track->mode == MODE_AVIF) {
>>           int full_range = track->par->color_range == AVCOL_RANGE_JPEG;
>>           avio_w8(pb, full_range << 7);
>>       }
>> @@ -2118,7 +2119,7 @@ static void find_compressor(char * compressor_name, int len, MOVTrack *track)
>>                     || (track->par->width == 1440 && track->par->height == 1080)
>>                     || (track->par->width == 1920 && track->par->height == 1080);
>>
>> -    if (track->mode == MODE_MOV &&
>> +    if ((track->mode == MODE_AVIF || track->mode == MODE_MOV) &&
>>           (encoder = av_dict_get(track->st->metadata, "encoder", NULL, 0))) {
>>           av_strlcpy(compressor_name, encoder->value, 32);
>>       } else if (track->par->codec_id == AV_CODEC_ID_MPEG2VIDEO && xdcam_res) {
>> @@ -2139,6 +2140,25 @@ static void find_compressor(char * compressor_name, int len, MOVTrack *track)
>>       }
>>   }
>>
>> +static int mov_write_ccst_tag(AVIOContext *pb)
>> +{
>> +    int64_t pos = avio_tell(pb);
>> +    // Write sane defaults:
>> +    // all_ref_pics_intra = 0 : all samples can use any type of reference.
>> +    // intra_pred_used = 1 : intra prediction may or may not be used.
>> +    // max_ref_per_pic = 15 : reserved value to indicate that any number of
>> +    //                        reference images can be used.
>> +    uint8_t ccstValue = (0 << 7) |  /* all_ref_pics_intra */
>> +                        (1 << 6) |  /* intra_pred_used */
>> +                        (15 << 2);  /* max_ref_per_pic */
>> +    avio_wb32(pb, 0); /* size */
>> +    ffio_wfourcc(pb, "ccst");
>> +    avio_wb32(pb, 0); /* Version & flags */
>> +    avio_w8(pb, ccstValue);
>> +    avio_wb24(pb, 0);  /* reserved */
>> +    return update_size(pb, pos);
>> +}
>> +
>>   static int mov_write_video_tag(AVFormatContext *s, AVIOContext *pb, MOVMuxContext *mov, MOVTrack *track)
>>   {
>>       int ret = AVERROR_BUG;
>> @@ -2272,7 +2292,7 @@ static int mov_write_video_tag(AVFormatContext *s, AVIOContext *pb, MOVMuxContex
>>           else
>>               av_log(mov->fc, AV_LOG_WARNING, "Not writing 'gama' atom. Format is not MOV.\n");
>>       }
>> -    if (track->mode == MODE_MOV || track->mode == MODE_MP4) {
>> +    if (track->mode == MODE_MOV || track->mode == MODE_MP4 || track->mode == MODE_AVIF) {
>>           int has_color_info = track->par->color_primaries != AVCOL_PRI_UNSPECIFIED &&
>>                                track->par->color_trc != AVCOL_TRC_UNSPECIFIED &&
>>                                track->par->color_space != AVCOL_SPC_UNSPECIFIED;
>> @@ -2324,6 +2344,9 @@ static int mov_write_video_tag(AVFormatContext *s, AVIOContext *pb, MOVMuxContex
>>       if (avid)
>>           avio_wb32(pb, 0);
>>
>> +    if (track->mode == MODE_AVIF)
>> +        mov_write_ccst_tag(pb);
>> +
>>       return update_size(pb, pos);
>>   }
>>
>> @@ -2826,8 +2849,13 @@ static int mov_write_hdlr_tag(AVFormatContext *s, AVIOContext *pb, MOVTrack *tra
>>       if (track) {
>>           hdlr = (track->mode == MODE_MOV) ? "mhlr" : "\0\0\0\0";
>>           if (track->par->codec_type == AVMEDIA_TYPE_VIDEO) {
>> -            hdlr_type = "vide";
>> -            descr     = "VideoHandler";
>> +            if (track->mode == MODE_AVIF) {
>> +                hdlr_type = "pict";
>> +                descr     = "PictureHandler";
>> +            } else {
>> +                hdlr_type = "vide";
>> +                descr     = "VideoHandler";
>> +            }
>>           } else if (track->par->codec_type == AVMEDIA_TYPE_AUDIO) {
>>               hdlr_type = "soun";
>>               descr     = "SoundHandler";
>> @@ -2892,6 +2920,128 @@ static int mov_write_hdlr_tag(AVFormatContext *s, AVIOContext *pb, MOVTrack *tra
>>       return update_size(pb, pos);
>>   }
>>
>> +static int mov_write_pitm_tag(AVIOContext *pb, int item_id)
>> +{
>> +    int64_t pos = avio_tell(pb);
>> +    avio_wb32(pb, 0); /* size */
>> +    ffio_wfourcc(pb, "pitm");
>> +    avio_wb32(pb, 0); /* Version & flags */
>> +    avio_wb16(pb, item_id); /* item_id */
>> +    return update_size(pb, pos);
>> +}
>> +
>> +static int mov_write_iloc_tag(AVIOContext *pb, MOVMuxContext *mov, AVFormatContext *s)
>> +{
>> +    int64_t pos = avio_tell(pb);
>> +    avio_wb32(pb, 0); /* size */
>> +    ffio_wfourcc(pb, "iloc");
>> +    avio_wb32(pb, 0); /* Version & flags */
>> +    avio_w8(pb, (4 << 4) + 4); /* offset_size(4) and length_size(4) */
>> +    avio_w8(pb, 0); /* base_offset_size(4) and reserved(4) */
>> +    avio_wb16(pb, 1); /* item_count */
>> +
>> +    avio_wb16(pb, 1); /* item_id */
>> +    avio_wb16(pb, 0); /* data_reference_index */
>> +    avio_wb16(pb, 1); /* extent_count */
>> +    mov->avif_extent_pos = avio_tell(pb);
>> +    avio_wb32(pb, 0); /* extent_offset (written later) */
>> +    // For animated AVIF, we simply write the first packet's size.
>> +    avio_wb32(pb, mov->avif_extent_length); /* extent_length */
>> +
>> +    return update_size(pb, pos);
>> +}
>> +
>> +static int mov_write_iinf_tag(AVIOContext *pb, MOVMuxContext *mov, AVFormatContext *s)
>> +{
>> +    int64_t infe_pos;
>> +    int64_t iinf_pos = avio_tell(pb);
>> +    avio_wb32(pb, 0); /* size */
>> +    ffio_wfourcc(pb, "iinf");
>> +    avio_wb32(pb, 0); /* Version & flags */
>> +    avio_wb16(pb, 1); /* entry_count */
>> +
>> +    infe_pos = avio_tell(pb);
>> +    avio_wb32(pb, 0); /* size */
>> +    ffio_wfourcc(pb, "infe");
>> +    avio_w8(pb, 0x2); /* Version */
>> +    avio_wb24(pb, 0); /* flags */
>> +    avio_wb16(pb, 1); /* item_id */
>> +    avio_wb16(pb, 0); /* item_protection_index */
>> +    avio_write(pb, "av01", 4); /* item_type */
>> +    avio_write(pb, "Color\0", 6); /* item_name */
>> +    update_size(pb, infe_pos);
>> +
>> +    return update_size(pb, iinf_pos);
>> +}
>> +
>> +static int mov_write_ispe_tag(AVIOContext *pb, MOVMuxContext *mov, AVFormatContext *s)
>> +{
>> +    int64_t pos = avio_tell(pb);
>> +    avio_wb32(pb, 0); /* size */
>> +    ffio_wfourcc(pb, "ispe");
>> +    avio_wb32(pb, 0); /* Version & flags */
>> +    avio_wb32(pb, s->streams[0]->codecpar->width); /* image_width */
>> +    avio_wb32(pb, s->streams[0]->codecpar->height); /* image_height */
>> +    return update_size(pb, pos);
>> +}
>> +
>> +static int mov_write_pixi_tag(AVIOContext *pb, MOVMuxContext *mov, AVFormatContext *s)
>> +{
>> +    int64_t pos = avio_tell(pb);
>> +    const AVPixFmtDescriptor *pixdesc = av_pix_fmt_desc_get(s->streams[0]->codecpar->format);
>> +    avio_wb32(pb, 0); /* size */
>> +    ffio_wfourcc(pb, "pixi");
>> +    avio_wb32(pb, 0); /* Version & flags */
>> +    avio_w8(pb, pixdesc->nb_components); /* num_channels */
>> +    for (int i = 0; i < pixdesc->nb_components; ++i) {
>> +      avio_w8(pb, pixdesc->comp[i].depth); /* bits_per_channel */
>> +    }
>> +    return update_size(pb, pos);
>> +}
>> +
>> +static int mov_write_ipco_tag(AVIOContext *pb, MOVMuxContext *mov, AVFormatContext *s)
>> +{
>> +    int64_t pos = avio_tell(pb);
>> +    avio_wb32(pb, 0); /* size */
>> +    ffio_wfourcc(pb, "ipco");
>> +    mov_write_ispe_tag(pb, mov, s);
>> +    mov_write_pixi_tag(pb, mov, s);
>> +    mov_write_av1c_tag(pb, &mov->tracks[0]);
>> +    mov_write_colr_tag(pb, &mov->tracks[0], 0);
>> +    return update_size(pb, pos);
>> +}
>> +
>> +static int mov_write_ipma_tag(AVIOContext *pb, MOVMuxContext *mov, AVFormatContext *s)
>> +{
>> +    int64_t pos = avio_tell(pb);
>> +    avio_wb32(pb, 0); /* size */
>> +    ffio_wfourcc(pb, "ipma");
>> +    avio_wb32(pb, 0); /* Version & flags */
>> +    avio_wb32(pb, 1); /* entry_count */
>> +    avio_wb16(pb, 1); /* item_ID */
>> +    avio_w8(pb, 4); /* association_count */
>> +
>> +    // ispe association.
>> +    avio_w8(pb, 1); /* essential and property_index */
>> +    // pixi association.
>> +    avio_w8(pb, 2); /* essential and property_index */
>> +    // av1C association.
>> +    avio_w8(pb, 0x80 | 3); /* essential and property_index */
>> +    // colr association.
>> +    avio_w8(pb, 4); /* essential and property_index */
>> +    return update_size(pb, pos);
>> +}
>> +
>> +static int mov_write_iprp_tag(AVIOContext *pb, MOVMuxContext *mov, AVFormatContext *s)
>> +{
>> +    int64_t pos = avio_tell(pb);
>> +    avio_wb32(pb, 0); /* size */
>> +    ffio_wfourcc(pb, "iprp");
>> +    mov_write_ipco_tag(pb, mov, s);
>> +    mov_write_ipma_tag(pb, mov, s);
>> +    return update_size(pb, pos);
>> +}
>> +
>>   static int mov_write_hmhd_tag(AVIOContext *pb)
>>   {
>>       /* This atom must be present, but leaving the values at zero
>> @@ -3137,7 +3287,7 @@ static int mov_write_tkhd_tag(AVIOContext *pb, MOVMuxContext *mov,
>>       if (st && (track->par->codec_type == AVMEDIA_TYPE_VIDEO ||
>>                  track->par->codec_type == AVMEDIA_TYPE_SUBTITLE)) {
>>           int64_t track_width_1616;
>> -        if (track->mode == MODE_MOV) {
>> +        if (track->mode == MODE_MOV || track->mode == MODE_AVIF) {
>>               track_width_1616 = track->par->width * 0x10000ULL;
>>           } else {
>>               track_width_1616 = av_rescale(st->sample_aspect_ratio.num,
>> @@ -3947,8 +4097,15 @@ static int mov_write_meta_tag(AVIOContext *pb, MOVMuxContext *mov,
>>           mov_write_mdta_hdlr_tag(pb, mov, s);
>>           mov_write_mdta_keys_tag(pb, mov, s);
>>           mov_write_mdta_ilst_tag(pb, mov, s);
>> -    }
>> -    else {
>> +    } else if (mov->mode == MODE_AVIF) {
>> +        mov_write_hdlr_tag(s, pb, &mov->tracks[0]);
>> +        // We always write the primary item id as 1 since only one track is
>> +        // supported for AVIF.
>> +        mov_write_pitm_tag(pb, 1);
>> +        mov_write_iloc_tag(pb, mov, s);
>> +        mov_write_iinf_tag(pb, mov, s);
>> +        mov_write_iprp_tag(pb, mov, s);
>> +    } else {
>>           /* iTunes metadata tag */
>>           mov_write_itunes_hdlr_tag(pb, mov, s);
>>           mov_write_ilst_tag(pb, mov, s);
>> @@ -4278,10 +4435,11 @@ static int mov_write_moov_tag(AVIOContext *pb, MOVMuxContext *mov,
>>       }
>>
>>       mov_write_mvhd_tag(pb, mov);
>> -    if (mov->mode != MODE_MOV && !mov->iods_skip)
>> +    if (mov->mode != MODE_MOV && mov->mode != MODE_AVIF && !mov->iods_skip)
>>           mov_write_iods_tag(pb, mov);
>>       for (i = 0; i < mov->nb_streams; i++) {
>> -        if (mov->tracks[i].entry > 0 || mov->flags & FF_MOV_FLAG_FRAGMENT) {
>> +        if (mov->tracks[i].entry > 0 || mov->flags & FF_MOV_FLAG_FRAGMENT ||
>> +            mov->mode == MODE_AVIF) {
>>               int ret = mov_write_trak_tag(s, pb, mov, &(mov->tracks[i]), i < s->nb_streams ? s->streams[i] : NULL);
>>               if (ret < 0)
>>                   return ret;
>> @@ -4292,7 +4450,7 @@ static int mov_write_moov_tag(AVIOContext *pb, MOVMuxContext *mov,
>>
>>       if (mov->mode == MODE_PSP)
>>           mov_write_uuidusmt_tag(pb, s);
>> -    else
>> +    else if (mov->mode != MODE_AVIF)
>>           mov_write_udta_tag(pb, mov, s);
>>
>>       return update_size(pb, pos);
>> @@ -5039,6 +5197,9 @@ static void mov_write_ftyp_tag_internal(AVIOContext *pb, AVFormatContext *s,
>>       else if (mov->mode == MODE_3GP) {
>>           ffio_wfourcc(pb, has_h264 ? "3gp6"  : "3gp4");
>>           minor =     has_h264 ?   0x100 :   0x200;
>> +    } else if (mov->mode == MODE_AVIF) {
>> +        ffio_wfourcc(pb, mov->is_animated_avif ? "avis" : "avif");
>> +        minor = 0;
>>       } else if (mov->mode & MODE_3G2) {
>>           ffio_wfourcc(pb, has_h264 ? "3g2b"  : "3g2a");
>>           minor =     has_h264 ? 0x20000 : 0x10000;
>> @@ -5102,6 +5263,31 @@ static int mov_write_ftyp_tag(AVIOContext *pb, AVFormatContext *s)
>>       // compatible brand a second time.
>>       if (mov->mode == MODE_ISM) {
>>           ffio_wfourcc(pb, "piff");
>> +    } else if (mov->mode == MODE_AVIF) {
>> +        const AVPixFmtDescriptor *pix_fmt_desc =
>> +            av_pix_fmt_desc_get(s->streams[0]->codecpar->format);
>> +        const int depth = pix_fmt_desc->comp[0].depth;
>> +        if (mov->is_animated_avif) {
>> +            // For animated AVIF, major brand is "avis". Add "avif" as a
>> +            // compatible brand.
>> +            ffio_wfourcc(pb, "avif");
>> +            ffio_wfourcc(pb, "msf1");
>> +            ffio_wfourcc(pb, "iso8");
>> +        }
>> +        ffio_wfourcc(pb, "mif1");
>> +        ffio_wfourcc(pb, "miaf");
>> +        if (depth == 8 || depth == 10) {
>> +            // MA1B and MA1A brands are based on AV1 profile. Short hand for
>> +            // computing that is based on chroma subsampling type. 420 chroma
>> +            // subsampling is MA1B.  444 chroma subsampling is MA1A.
>> +            if (!pix_fmt_desc->log2_chroma_w && !pix_fmt_desc->log2_chroma_h) {
>> +                // 444 chroma subsampling.
>> +                ffio_wfourcc(pb, "MA1A");
>> +            } else {
>> +                // 420 chroma subsampling.
>> +                ffio_wfourcc(pb, "MA1B");
>> +            }
>> +        }
>>       } else if (mov->mode != MODE_MOV) {
>>           // We add tfdt atoms when fragmenting, signal this with the iso6 compatible
>>           // brand, if not already the major brand. This is compatible with users that
>> @@ -5705,7 +5891,7 @@ int ff_mov_write_packet(AVFormatContext *s, AVPacket *pkt)
>>       if (ret < 0)
>>           return ret;
>>
>> -    if (mov->flags & FF_MOV_FLAG_FRAGMENT) {
>> +    if (mov->flags & FF_MOV_FLAG_FRAGMENT || mov->mode == MODE_AVIF) {
>>           int ret;
>>           if (mov->moov_written || mov->flags & FF_MOV_FLAG_EMPTY_MOOV) {
>>               if (mov->frag_interleave && mov->fragments > 0) {
>> @@ -5846,7 +6032,11 @@ int ff_mov_write_packet(AVFormatContext *s, AVPacket *pkt)
>>               avio_write(pb, reformatted_data, size);
>>           } else {
>>               size = ff_av1_filter_obus(pb, pkt->data, pkt->size);
>> +            if (trk->mode == MODE_AVIF && !mov->avif_extent_length) {
>> +                mov->avif_extent_length = size;
>> +            }
>>           }
>> +
>>   #if CONFIG_AC3_PARSER
>>       } else if (par->codec_id == AV_CODEC_ID_EAC3) {
>>           size = handle_eac3(mov, pkt, trk);
>> @@ -6579,11 +6769,15 @@ static int mov_init(AVFormatContext *s)
>>       else if (IS_MODE(ipod, IPOD)) mov->mode = MODE_IPOD;
>>       else if (IS_MODE(ismv, ISMV)) mov->mode = MODE_ISM;
>>       else if (IS_MODE(f4v,   F4V)) mov->mode = MODE_F4V;
>> +    else if (IS_MODE(avif, AVIF)) mov->mode = MODE_AVIF;
>>   #undef IS_MODE
>>
>>       if (mov->flags & FF_MOV_FLAG_DELAY_MOOV)
>>           mov->flags |= FF_MOV_FLAG_EMPTY_MOOV;
>>
>> +    if (mov->mode == MODE_AVIF)
>> +        mov->flags |= FF_MOV_FLAG_DELAY_MOOV;
>> +
>>       /* Set the FRAGMENT flag if any of the fragmentation methods are
>>        * enabled. */
>>       if (mov->max_fragment_duration || mov->max_fragment_size ||
>> @@ -6664,11 +6858,25 @@ static int mov_init(AVFormatContext *s)
>>       /* Non-seekable output is ok if using fragmentation. If ism_lookahead
>>        * is enabled, we don't support non-seekable output at all. */
>>       if (!(s->pb->seekable & AVIO_SEEKABLE_NORMAL) &&
>> -        (!(mov->flags & FF_MOV_FLAG_FRAGMENT) || mov->ism_lookahead)) {
>> +        (!(mov->flags & FF_MOV_FLAG_FRAGMENT) || mov->ism_lookahead ||
>> +         mov->mode == MODE_AVIF)) {
>>           av_log(s, AV_LOG_ERROR, "muxer does not support non seekable output\n");
>>           return AVERROR(EINVAL);
>>       }
>>
>> +    /* AVIF output must have exactly one video stream */
>> +    if (mov->mode == MODE_AVIF) {
>> +        if (s->nb_streams > 1) {
>> +            av_log(s, AV_LOG_ERROR, "AVIF output requires exactly one stream\n");
>> +            return AVERROR(EINVAL);
>> +        }
>> +        if (s->streams[0]->codecpar->codec_type != AVMEDIA_TYPE_VIDEO) {
>> +            av_log(s, AV_LOG_ERROR, "AVIF output requires one video stream\n");
>> +            return AVERROR(EINVAL);
>> +        }
>> +        s->streams[0]->disposition |= AV_DISPOSITION_DEFAULT;
>> +    }
>> +
>>       mov->nb_streams = s->nb_streams;
>>       if (mov->mode & (MODE_MP4|MODE_MOV|MODE_IPOD) && s->nb_chapters)
>>           mov->chapter_track = mov->nb_streams++;
>> @@ -6811,12 +7019,13 @@ static int mov_init(AVFormatContext *s)
>>                           pix_fmt == AV_PIX_FMT_MONOWHITE ||
>>                           pix_fmt == AV_PIX_FMT_MONOBLACK;
>>               }
>> -            if (track->par->codec_id == AV_CODEC_ID_VP9 ||
>> -                track->par->codec_id == AV_CODEC_ID_AV1) {
>> -                if (track->mode != MODE_MP4) {
>> -                    av_log(s, AV_LOG_ERROR, "%s only supported in MP4.\n", avcodec_get_name(track->par->codec_id));
>> -                    return AVERROR(EINVAL);
>> -                }
>> +            if (track->par->codec_id == AV_CODEC_ID_VP9 && track->mode != MODE_MP4) {
>> +                av_log(s, AV_LOG_ERROR, "%s only supported in MP4.\n", avcodec_get_name(track->par->codec_id));
>> +                return AVERROR(EINVAL);
>> +            } else if (track->par->codec_id == AV_CODEC_ID_AV1 &&
>> +                       track->mode != MODE_MP4 && track->mode != MODE_AVIF) {
>> +                av_log(s, AV_LOG_ERROR, "%s only supported in MP4 and AVIF.\n", avcodec_get_name(track->par->codec_id));
>> +                return AVERROR(EINVAL);
>>               } else if (track->par->codec_id == AV_CODEC_ID_VP8) {
>>                   /* altref frames handling is not defined in the spec as of version v1.0,
>>                    * so just forbid muxing VP8 streams altogether until a new version does */
>> @@ -7034,7 +7243,7 @@ static int mov_write_header(AVFormatContext *s)
>>                               FF_MOV_FLAG_FRAG_EVERY_FRAME)) &&
>>               !mov->max_fragment_duration && !mov->max_fragment_size)
>>               mov->flags |= FF_MOV_FLAG_FRAG_KEYFRAME;
>> -    } else {
>> +    } else if (mov->mode != MODE_AVIF) {
>>           if (mov->flags & FF_MOV_FLAG_FASTSTART)
>>               mov->reserved_header_pos = avio_tell(pb);
>>           mov_write_mdat_tag(pb, mov);
>> @@ -7322,6 +7531,50 @@ static int mov_check_bitstream(AVFormatContext *s, AVStream *st,
>>       return ret;
>>   }
>>
>> +static int avif_write_trailer(AVFormatContext *s)
>> +{
>> +    AVIOContext *pb = s->pb;
>> +    MOVMuxContext *mov = s->priv_data;
>> +    int64_t pos_backup, mdat_pos;
>> +    uint8_t *buf;
>> +    int buf_size, moov_size;
>> +
>> +    if (mov->moov_written) return 0;
>> +
>> +    mov->is_animated_avif = s->streams[0]->nb_frames > 1;
>> +    mov_write_identification(pb, s);
>> +    mov_write_meta_tag(pb, mov, s);
>> +
>> +    moov_size = get_moov_size(s);
>> +    mov->tracks[0].data_offset = avio_tell(pb) + moov_size + 8;
>> +
>> +    if (mov->is_animated_avif) {
>> +        int ret;
>> +        if ((ret = mov_write_moov_tag(pb, mov, s)) < 0)
>> +            return ret;
>> +    }
>> +
>> +    buf_size = avio_get_dyn_buf(mov->mdat_buf, &buf);
>> +    avio_wb32(pb, buf_size + 8);
>> +    ffio_wfourcc(pb, "mdat");
>> +    mdat_pos = avio_tell(pb);
>> +
>> +    if (mdat_pos != (uint32_t)mdat_pos) {
>> +        av_log(s, AV_LOG_ERROR, "mdat offset does not fit in 32 bits\n");
>> +        return AVERROR_INVALIDDATA;
>> +    }
>> +
>> +    avio_write(pb, buf, buf_size);
>> +
>> +    // write extent offset.
>> +    pos_backup = avio_tell(pb);
>> +    avio_seek(pb, mov->avif_extent_pos, SEEK_SET);
>> +    avio_wb32(pb, mdat_pos); /* rewrite offset */
>> +    avio_seek(pb, pos_backup, SEEK_SET);
>> +
>> +    return 0;
>> +}
>> +
>>   #if CONFIG_TGP_MUXER || CONFIG_TG2_MUXER
>>   static const AVCodecTag codec_3gp_tags[] = {
>>       { AV_CODEC_ID_H263,     MKTAG('s','2','6','3') },
>> @@ -7404,6 +7657,20 @@ static const AVCodecTag codec_f4v_tags[] = {
>>       { AV_CODEC_ID_NONE, 0 },
>>   };
>>
>> +#if CONFIG_AVIF_MUXER
>> +static const AVCodecTag codec_avif_tags[] = {
>> +    { AV_CODEC_ID_AV1,     MKTAG('a','v','0','1') },
>> +    { AV_CODEC_ID_NONE, 0 },
>> +};
>> +static const AVCodecTag *const codec_avif_tags_list[] = { codec_avif_tags, NULL };
>> +
>> +static const AVClass mov_avif_muxer_class = {
>> +    .class_name = "avif muxer",
>> +    .item_name  = av_default_item_name,
>> +    .version    = LIBAVUTIL_VERSION_INT,
>> +};
>> +#endif
>> +
>>   #if CONFIG_MOV_MUXER
>>   const AVOutputFormat ff_mov_muxer = {
>>       .name              = "mov",
>> @@ -7566,3 +7833,21 @@ const AVOutputFormat ff_f4v_muxer = {
>>       .priv_class        = &mov_isobmff_muxer_class,
>>   };
>>   #endif
>> +#if CONFIG_AVIF_MUXER
>> +const AVOutputFormat ff_avif_muxer = {
>> +    .name              = "avif",
>> +    .long_name         = NULL_IF_CONFIG_SMALL("AVIF"),
>> +    .mime_type         = "image/avif",
>> +    .extensions        = "avif",
>> +    .priv_data_size    = sizeof(MOVMuxContext),
>> +    .video_codec       = AV_CODEC_ID_AV1,
>> +    .init              = mov_init,
>> +    .write_header      = mov_write_header,
>> +    .write_packet      = mov_write_packet,
>> +    .write_trailer     = avif_write_trailer,
>> +    .deinit            = mov_free,
>> +    .flags             = AVFMT_GLOBALHEADER | AVFMT_ALLOW_FLUSH,
>> +    .codec_tag         = codec_avif_tags_list,
>> +    .priv_class        = &mov_avif_muxer_class,
>> +};
>> +#endif
>> diff --git a/libavformat/movenc.h b/libavformat/movenc.h
>> index ca507e0e04..281576cc66 100644
>> --- a/libavformat/movenc.h
>> +++ b/libavformat/movenc.h
>> @@ -43,6 +43,7 @@
>>   #define MODE_IPOD 0x20
>>   #define MODE_ISM  0x40
>>   #define MODE_F4V  0x80
>> +#define MODE_AVIF 0x100
>>
>>   typedef struct MOVIentry {
>>       uint64_t     pos;
>> @@ -244,6 +245,10 @@ typedef struct MOVMuxContext {
>>       MOVPrftBox write_prft;
>>       int empty_hdlr_name;
>>       int movie_timescale;
>> +
>> +    int64_t avif_extent_pos;
>> +    int avif_extent_length;
>> +    int is_animated_avif;
>>   } MOVMuxContext;
>>
>>   #define FF_MOV_FLAG_RTP_HINT              (1 <<  0)
>> --
>> 2.36.0.512.ge40c2bad7a-goog
>>
> Any more comments on this? If not, can this be merged please.

Will test and push.

Regards,
Gyan


More information about the ffmpeg-devel mailing list