[FFmpeg-devel] [PATCH] avformat/movenc: Support alpha channel for AVIF

Tue May 31 22:12:43 EEST 2022

On Thu, May 19, 2022 at 9:08 AM Vignesh Venkatasubramanian
<vigneshv at google.com> wrote:
>
> On Wed, May 18, 2022 at 7:36 PM Bang He <hezhanbang at gmail.com> wrote:
> >
> > error happened:
> >
> > ./ffmpeg -i alpha.png -filter_complex [0:v]alphaextract[a] -map 0 -map [a]
> > -still-picture 1 avif_with_alpha.avif
> > ffmpeg version N-106936-gff5ea89da2 Copyright (c) 2000-2022 the FFmpeg
> > developers
> >   built with gcc 11 (Ubuntu 11.2.0-19ubuntu1)
> >   configuration: --disable-ffplay --disable-ffprobe --enable-gpl
> > --enable-nonfree --prefix=/home/bang/Desktop/out
> >   libavutil      57. 24.101 / 57. 24.101
> >   libavcodec     59. 28.100 / 59. 28.100
> >   libavformat    59. 24.100 / 59. 24.100
> >   libavdevice    59.  6.100 / 59.  6.100
> >   libavfilter     8. 38.100 /  8. 38.100
> >   libswscale      6.  6.100 /  6.  6.100
> >   libswresample   4.  6.100 /  4.  6.100
> >   libpostproc    56.  5.100 / 56.  5.100
> > Unrecognized option 'still-picture'.
> > Error splitting the argument list: Option not found
> >
>
> You would have to build ffmpeg with libaom enabled for AVIF encoding
> to work (since AVIF is backed by an AV1 encoder). From the logs you
> posted, it seems like you don't have an AV1 encoder available in your
> ffmpeg build.
>
> The "-still-picture" flag was added in ab05e9a7f2. Please make sure
> that you are sync'ed past that to pick up that option.
>
> > On Thu, May 19, 2022 at 5:56 AM Vignesh Venkatasubramanian <
> > vigneshv-at-google.com at ffmpeg.org> wrote:
> >
> > > AVIF specification allows for alpha channel as an auxillary item (in
> > > case of still images) or as an auxillary track (in case of animated
> > > images).  Add support for both of these. The AVIF muxer will take
> > > exactly two streams (when alpha is present) as input (first one being
> > > the YUV planes and the second one being the alpha plane).
> > >
> > > The input has to come from two different images (one of it color and
> > > the other one being alpha), or it can come from a single file
> > > source with the alpha channel extracted using the "alphaextract"
> > > filter.
> > >
> > > Example using alphaextract:
> > > ffmpeg -i rgba.png -filter_complex "[0:v]alphaextract[a]" -map 0 -map
> > > "[a]" -still-picture 1 avif_with_alpha.avif
> > >
> > > Example using two sources (first source can be in any pixel format and
> > > the second source has to be in monochrome grey pixel format):
> > > ffmpeg -i color.avif -i grey.avif -map 0 -map 1 -c copy
> > > avif_with_alpha.avif
> > >
> > > The generated files pass the compliance checks in Compliance Warden:
> > > https://github.com/gpac/ComplianceWarden
> > >
> > > libavif (the reference avif library) is able to decode the files
> > > generated using this patch.
> > >
> > > They also play back properly (with transparent background) in:
> > > 1) Chrome
> > > 2) Firefox (only still AVIF, no animation support)
> > >
> > > Signed-off-by: Vignesh Venkatasubramanian <vigneshv at google.com>
> > > ---
> > >  libavformat/movenc.c | 185 +++++++++++++++++++++++++++++--------------
> > >  libavformat/movenc.h |   4 +-
> > >  2 files changed, 128 insertions(+), 61 deletions(-)
> > >
> > > diff --git a/libavformat/movenc.c b/libavformat/movenc.c
> > > index de971f94e8..00e42b7abb 100644
> > > --- a/libavformat/movenc.c
> > > +++ b/libavformat/movenc.c
> > > @@ -2852,7 +2852,7 @@ static int mov_write_hdlr_tag(AVFormatContext *s,
> > > AVIOContext *pb, MOVTrack *tra
> > >          hdlr = (track->mode == MODE_MOV) ? "mhlr" : "\0\0\0\0";
> > >          if (track->par->codec_type == AVMEDIA_TYPE_VIDEO) {
> > >              if (track->mode == MODE_AVIF) {
> > > -                hdlr_type = "pict";
> > > +                hdlr_type = (track == &mov->tracks[0]) ? "pict" : "auxv";
> > >                  descr     = "PictureHandler";
> > >              } else {
> > >                  hdlr_type = "vide";
> > > @@ -2940,57 +2940,83 @@ static int mov_write_iloc_tag(AVIOContext *pb,
> > > MOVMuxContext *mov, AVFormatConte
> > >      avio_wb32(pb, 0); /* Version & flags */
> > >      avio_w8(pb, (4 << 4) + 4); /* offset_size(4) and length_size(4) */
> > >      avio_w8(pb, 0); /* base_offset_size(4) and reserved(4) */
> > > -    avio_wb16(pb, 1); /* item_count */
> > > +    avio_wb16(pb, s->nb_streams); /* item_count */
> > >
> > > -    avio_wb16(pb, 1); /* item_id */
> > > -    avio_wb16(pb, 0); /* data_reference_index */
> > > -    avio_wb16(pb, 1); /* extent_count */
> > > -    mov->avif_extent_pos = avio_tell(pb);
> > > -    avio_wb32(pb, 0); /* extent_offset (written later) */
> > > -    // For animated AVIF, we simply write the first packet's size.
> > > -    avio_wb32(pb, mov->avif_extent_length); /* extent_length */
> > > +    for (int i = 0; i < s->nb_streams; i++) {
> > > +        avio_wb16(pb, i + 1); /* item_id */
> > > +        avio_wb16(pb, 0); /* data_reference_index */
> > > +        avio_wb16(pb, 1); /* extent_count */
> > > +        mov->avif_extent_pos[i] = avio_tell(pb);
> > > +        avio_wb32(pb, 0); /* extent_offset (written later) */
> > > +        // For animated AVIF, we simply write the first packet's size.
> > > +        avio_wb32(pb, mov->avif_extent_length[i]); /* extent_length */
> > > +    }
> > >
> > >      return update_size(pb, pos);
> > >  }
> > >
> > >  static int mov_write_iinf_tag(AVIOContext *pb, MOVMuxContext *mov,
> > > AVFormatContext *s)
> > >  {
> > > -    int64_t infe_pos;
> > >      int64_t iinf_pos = avio_tell(pb);
> > >      avio_wb32(pb, 0); /* size */
> > >      ffio_wfourcc(pb, "iinf");
> > >      avio_wb32(pb, 0); /* Version & flags */
> > > -    avio_wb16(pb, 1); /* entry_count */
> > > +    avio_wb16(pb, s->nb_streams); /* entry_count */
> > >
> > > -    infe_pos = avio_tell(pb);
> > > -    avio_wb32(pb, 0); /* size */
> > > -    ffio_wfourcc(pb, "infe");
> > > -    avio_w8(pb, 0x2); /* Version */
> > > -    avio_wb24(pb, 0); /* flags */
> > > -    avio_wb16(pb, 1); /* item_id */
> > > -    avio_wb16(pb, 0); /* item_protection_index */
> > > -    avio_write(pb, "av01", 4); /* item_type */
> > > -    avio_write(pb, "Color\0", 6); /* item_name */
> > > -    update_size(pb, infe_pos);
> > > +    for (int i = 0; i < s->nb_streams; i++) {
> > > +        int64_t infe_pos = avio_tell(pb);
> > > +        avio_wb32(pb, 0); /* size */
> > > +        ffio_wfourcc(pb, "infe");
> > > +        avio_w8(pb, 0x2); /* Version */
> > > +        avio_wb24(pb, 0); /* flags */
> > > +        avio_wb16(pb, i + 1); /* item_id */
> > > +        avio_wb16(pb, 0); /* item_protection_index */
> > > +        avio_write(pb, "av01", 4); /* item_type */
> > > +        avio_write(pb, !i ? "Color\0" : "Alpha\0", 6); /* item_name */
> > > +        update_size(pb, infe_pos);
> > > +    }
> > >
> > >      return update_size(pb, iinf_pos);
> > >  }
> > >
> > > -static int mov_write_ispe_tag(AVIOContext *pb, MOVMuxContext *mov,
> > > AVFormatContext *s)
> > > +
> > > +static int mov_write_iref_tag(AVIOContext *pb, MOVMuxContext *mov,
> > > AVFormatContext *s)
> > > +{
> > > +    int64_t auxl_pos;
> > > +    int64_t iref_pos = avio_tell(pb);
> > > +    avio_wb32(pb, 0); /* size */
> > > +    ffio_wfourcc(pb, "iref");
> > > +    avio_wb32(pb, 0); /* Version & flags */
> > > +
> > > +    auxl_pos = avio_tell(pb);
> > > +    avio_wb32(pb, 0); /* size */
> > > +    ffio_wfourcc(pb, "auxl");
> > > +    avio_wb16(pb, 2); /* from_item_ID */
> > > +    avio_wb16(pb, 1); /* reference_count */
> > > +    avio_wb16(pb, 1); /* to_item_ID */
> > > +    update_size(pb, auxl_pos);
> > > +
> > > +    return update_size(pb, iref_pos);
> > > +}
> > > +
> > > +static int mov_write_ispe_tag(AVIOContext *pb, MOVMuxContext *mov,
> > > AVFormatContext *s,
> > > +                              int stream_index)
> > >  {
> > >      int64_t pos = avio_tell(pb);
> > >      avio_wb32(pb, 0); /* size */
> > >      ffio_wfourcc(pb, "ispe");
> > >      avio_wb32(pb, 0); /* Version & flags */
> > > -    avio_wb32(pb, s->streams[0]->codecpar->width); /* image_width */
> > > -    avio_wb32(pb, s->streams[0]->codecpar->height); /* image_height */
> > > +    avio_wb32(pb, s->streams[stream_index]->codecpar->width); /*
> > > image_width */
> > > +    avio_wb32(pb, s->streams[stream_index]->codecpar->height); /*
> > > image_height */
> > >      return update_size(pb, pos);
> > >  }
> > >
> > > -static int mov_write_pixi_tag(AVIOContext *pb, MOVMuxContext *mov,
> > > AVFormatContext *s)
> > > +static int mov_write_pixi_tag(AVIOContext *pb, MOVMuxContext *mov,
> > > AVFormatContext *s,
> > > +                              int stream_index)
> > >  {
> > >      int64_t pos = avio_tell(pb);
> > > -    const AVPixFmtDescriptor *pixdesc =
> > > av_pix_fmt_desc_get(s->streams[0]->codecpar->format);
> > > +    const AVPixFmtDescriptor *pixdesc =
> > > +        av_pix_fmt_desc_get(s->streams[stream_index]->codecpar->format);
> > >      avio_wb32(pb, 0); /* size */
> > >      ffio_wfourcc(pb, "pixi");
> > >      avio_wb32(pb, 0); /* Version & flags */
> > > @@ -3001,15 +3027,30 @@ static int mov_write_pixi_tag(AVIOContext *pb,
> > > MOVMuxContext *mov, AVFormatConte
> > >      return update_size(pb, pos);
> > >  }
> > >
> > > +static int mov_write_auxC_tag(AVIOContext *pb)
> > > +{
> > > +    int64_t pos = avio_tell(pb);
> > > +    avio_wb32(pb, 0); /* size */
> > > +    ffio_wfourcc(pb, "auxC");
> > > +    avio_wb32(pb, 0); /* Version & flags */
> > > +    avio_write(pb, "urn:mpeg:mpegB:cicp:systems:auxiliary:alpha\0", 44);
> > > +    return update_size(pb, pos);
> > > +}
> > > +
> > >  static int mov_write_ipco_tag(AVIOContext *pb, MOVMuxContext *mov,
> > > AVFormatContext *s)
> > >  {
> > >      int64_t pos = avio_tell(pb);
> > >      avio_wb32(pb, 0); /* size */
> > >      ffio_wfourcc(pb, "ipco");
> > > -    mov_write_ispe_tag(pb, mov, s);
> > > -    mov_write_pixi_tag(pb, mov, s);
> > > -    mov_write_av1c_tag(pb, &mov->tracks[0]);
> > > -    mov_write_colr_tag(pb, &mov->tracks[0], 0);
> > > +    for (int i = 0; i < s->nb_streams; i++) {
> > > +        mov_write_ispe_tag(pb, mov, s, i);
> > > +        mov_write_pixi_tag(pb, mov, s, i);
> > > +        mov_write_av1c_tag(pb, &mov->tracks[i]);
> > > +        if (!i)
> > > +            mov_write_colr_tag(pb, &mov->tracks[0], 0);
> > > +        else
> > > +            mov_write_auxC_tag(pb);
> > > +    }
> > >      return update_size(pb, pos);
> > >  }
> > >
> > > @@ -3019,18 +3060,21 @@ static int mov_write_ipma_tag(AVIOContext *pb,
> > > MOVMuxContext *mov, AVFormatConte
> > >      avio_wb32(pb, 0); /* size */
> > >      ffio_wfourcc(pb, "ipma");
> > >      avio_wb32(pb, 0); /* Version & flags */
> > > -    avio_wb32(pb, 1); /* entry_count */
> > > -    avio_wb16(pb, 1); /* item_ID */
> > > -    avio_w8(pb, 4); /* association_count */
> > > -
> > > -    // ispe association.
> > > -    avio_w8(pb, 1); /* essential and property_index */
> > > -    // pixi association.
> > > -    avio_w8(pb, 2); /* essential and property_index */
> > > -    // av1C association.
> > > -    avio_w8(pb, 0x80 | 3); /* essential and property_index */
> > > -    // colr association.
> > > -    avio_w8(pb, 4); /* essential and property_index */
> > > +    avio_wb32(pb, s->nb_streams); /* entry_count */
> > > +
> > > +    for (int i = 0, index = 1; i < s->nb_streams; i++) {
> > > +        avio_wb16(pb, i + 1); /* item_ID */
> > > +        avio_w8(pb, 4); /* association_count */
> > > +
> > > +        // ispe association.
> > > +        avio_w8(pb, index++); /* essential and property_index */
> > > +        // pixi association.
> > > +        avio_w8(pb, index++); /* essential and property_index */
> > > +        // av1C association.
> > > +        avio_w8(pb, 0x80 | index++); /* essential and property_index */
> > > +        // colr/auxC association.
> > > +        avio_w8(pb, index++); /* essential and property_index */
> > > +    }
> > >      return update_size(pb, pos);
> > >  }
> > >
> > > @@ -4112,6 +4156,8 @@ static int mov_write_meta_tag(AVIOContext *pb,
> > > MOVMuxContext *mov,
> > >          mov_write_pitm_tag(pb, 1);
> > >          mov_write_iloc_tag(pb, mov, s);
> > >          mov_write_iinf_tag(pb, mov, s);
> > > +        if (s->nb_streams > 1)
> > > +            mov_write_iref_tag(pb, mov, s);
> > >          mov_write_iprp_tag(pb, mov, s);
> > >      } else {
> > >          /* iTunes metadata tag */
> > > @@ -6040,8 +6086,8 @@ int ff_mov_write_packet(AVFormatContext *s, AVPacket
> > > *pkt)
> > >              avio_write(pb, reformatted_data, size);
> > >          } else {
> > >              size = ff_av1_filter_obus(pb, pkt->data, pkt->size);
> > > -            if (trk->mode == MODE_AVIF && !mov->avif_extent_length) {
> > > -                mov->avif_extent_length = size;
> > > +            if (trk->mode == MODE_AVIF &&
> > > !mov->avif_extent_length[pkt->stream_index]) {
> > > +                mov->avif_extent_length[pkt->stream_index] = size;
> > >              }
> > >          }
> > >
> > > @@ -6874,14 +6920,23 @@ static int mov_init(AVFormatContext *s)
> > >
> > >      /* AVIF output must have exactly one video stream */
> > >      if (mov->mode == MODE_AVIF) {
> > > -        if (s->nb_streams > 1) {
> > > -            av_log(s, AV_LOG_ERROR, "AVIF output requires exactly one
> > > stream\n");
> > > +        if (s->nb_streams > 2) {
> > > +            av_log(s, AV_LOG_ERROR, "AVIF output requires exactly one or
> > > two streams\n");
> > >              return AVERROR(EINVAL);
> > >          }
> > > -        if (s->streams[0]->codecpar->codec_type != AVMEDIA_TYPE_VIDEO) {
> > > -            av_log(s, AV_LOG_ERROR, "AVIF output requires one video
> > > stream\n");
> > > +        if (s->streams[0]->codecpar->codec_type != AVMEDIA_TYPE_VIDEO &&
> > > +            (s->nb_streams > 1 && s->streams[1]->codecpar->codec_type !=
> > > AVMEDIA_TYPE_VIDEO)) {
> > > +            av_log(s, AV_LOG_ERROR, "AVIF output supports only video
> > > streams\n");
> > >              return AVERROR(EINVAL);
> > >          }
> > > +        if (s->nb_streams > 1) {
> > > +            const AVPixFmtDescriptor *pixdesc =
> > > +                av_pix_fmt_desc_get(s->streams[1]->codecpar->format);
> > > +            if (pixdesc->nb_components != 1) {
> > > +                av_log(s, AV_LOG_ERROR, "Second stream for AVIF (alpha)
> > > output must have exactly one plane\n");
> > > +                return AVERROR(EINVAL);
> > > +            }
> > > +        }
> > >          s->streams[0]->disposition |= AV_DISPOSITION_DEFAULT;
> > >      }
> > >
> > > @@ -7543,18 +7598,25 @@ static int avif_write_trailer(AVFormatContext *s)
> > >  {
> > >      AVIOContext *pb = s->pb;
> > >      MOVMuxContext *mov = s->priv_data;
> > > -    int64_t pos_backup, mdat_pos;
> > > +    int64_t pos_backup, extent_offsets[2];
> > >      uint8_t *buf;
> > > -    int buf_size, moov_size;
> > > +    int buf_size, moov_size, i;
> > >
> > >      if (mov->moov_written) return 0;
> > >
> > >      mov->is_animated_avif = s->streams[0]->nb_frames > 1;
> > > +    if (mov->is_animated_avif && s->nb_streams > 1) {
> > > +        // For animated avif with alpha channel, we need to write a the
> > > tref
> > > +        // tag with type "auxl".
> > > +        mov->tracks[1].tref_tag = MKTAG('a', 'u', 'x', 'l');
> > > +        mov->tracks[1].tref_id = 1;
> > > +    }
> > >      mov_write_identification(pb, s);
> > >      mov_write_meta_tag(pb, mov, s);
> > >
> > >      moov_size = get_moov_size(s);
> > > -    mov->tracks[0].data_offset = avio_tell(pb) + moov_size + 8;
> > > +    for (i = 0; i < s->nb_streams; i++)
> > > +        mov->tracks[i].data_offset = avio_tell(pb) + moov_size + 8;
> > >
> > >      if (mov->is_animated_avif) {
> > >          int ret;
> > > @@ -7565,19 +7627,24 @@ static int avif_write_trailer(AVFormatContext *s)
> > >      buf_size = avio_get_dyn_buf(mov->mdat_buf, &buf);
> > >      avio_wb32(pb, buf_size + 8);
> > >      ffio_wfourcc(pb, "mdat");
> > > -    mdat_pos = avio_tell(pb);
> > >
> > > -    if (mdat_pos != (uint32_t)mdat_pos) {
> > > -        av_log(s, AV_LOG_ERROR, "mdat offset does not fit in 32 bits\n");
> > > -        return AVERROR_INVALIDDATA;
> > > -    }
> > > +    // The offset for the YUV planes is the starting position of mdat.
> > > +    extent_offsets[0] = avio_tell(pb);
> > > +    // The offset for alpha plane is YUV offset + YUV size.
> > > +    extent_offsets[1] = extent_offsets[0] + mov->avif_extent_length[0];
> > >
> > >      avio_write(pb, buf, buf_size);
> > >
> > > -    // write extent offset.
> > > +    // write extent offsets.
> > >      pos_backup = avio_tell(pb);
> > > -    avio_seek(pb, mov->avif_extent_pos, SEEK_SET);
> > > -    avio_wb32(pb, mdat_pos); /* rewrite offset */
> > > +    for (i = 0; i < s->nb_streams; i++) {
> > > +        if (extent_offsets[i] != (uint32_t)extent_offsets[i]) {
> > > +            av_log(s, AV_LOG_ERROR, "extent offset does not fit in 32
> > > bits\n");
> > > +            return AVERROR_INVALIDDATA;
> > > +        }
> > > +        avio_seek(pb, mov->avif_extent_pos[i], SEEK_SET);
> > > +        avio_wb32(pb, extent_offsets[i]); /* rewrite offset */
> > > +    }
> > >      avio_seek(pb, pos_backup, SEEK_SET);
> > >
> > >      return 0;
> > > diff --git a/libavformat/movenc.h b/libavformat/movenc.h
> > > index 281576cc66..e4550f7900 100644
> > > --- a/libavformat/movenc.h
> > > +++ b/libavformat/movenc.h
> > > @@ -246,8 +246,8 @@ typedef struct MOVMuxContext {
> > >      int empty_hdlr_name;
> > >      int movie_timescale;
> > >
> > > -    int64_t avif_extent_pos;
> > > -    int avif_extent_length;
> > > +    int64_t avif_extent_pos[2];  // index 0 is YUV and 1 is Alpha.
> > > +    int avif_extent_length[2];   // index 0 is YUV and 1 is Alpha.
> > >      int is_animated_avif;
> > >  } MOVMuxContext;
> > >
> > > --
> > > 2.36.1.124.g0e6072fb45-goog
> > >
> > > _______________________________________________
> > > ffmpeg-devel mailing list
> > > ffmpeg-devel at ffmpeg.org
> > > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
> > >
> > > To unsubscribe, visit link above, or email
> > > ffmpeg-devel-request at ffmpeg.org with subject "unsubscribe".
> > >
> > _______________________________________________
> > ffmpeg-devel mailing list
> > ffmpeg-devel at ffmpeg.org
> > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
> >
> > To unsubscribe, visit link above, or email
> > ffmpeg-devel-request at ffmpeg.org with subject "unsubscribe".
>
>
>
> --
> Vignesh

Ping on this please. Any comments on this one?

-- 
Vignesh