[FFmpeg-devel] [PATCH] avcodec: add an AV1 parser

Tue Sep 25 02:41:10 EEST 2018

On 24/09/18 01:12, James Almer wrote:
> Simple parser to set keyframes, frame type, structure, width, height, and pixel
> format, plus stream profile and level.
> 
> Signed-off-by: James Almer <jamrial at gmail.com>
> ---
> Missing Changelog entry and version bump.
> 
> This depends on "[PATCH v2 2/3] lavc: Add coded bitstream read/write support
> for AV1" which should be committed in the coming days.
> 
> The AVCodecParser.split() implementation, added for the sake of completeness,
> is very naive and much like the h264 and hevc ones can result in useless OBUs
> being "extracted", but since it's no longer used by libavformat to fill global
> headers when reading raw containers it shouldn't really matter. It's pretty
> much used only by the remove_extradata bsf at this point.
> 
>  configure               |   1 +
>  libavcodec/Makefile     |   1 +
>  libavcodec/av1_parser.c | 218 ++++++++++++++++++++++++++++++++++++++++
>  libavcodec/parsers.c    |   1 +
>  4 files changed, 221 insertions(+)
>  create mode 100644 libavcodec/av1_parser.c
> 
> diff --git a/configure b/configure
> index ca8b599b63..b46c86ec95 100755
> --- a/configure
> +++ b/configure
> @@ -3020,6 +3020,7 @@ wmv3_crystalhd_decoder_select="crystalhd"
>  
>  # parsers
>  aac_parser_select="adts_header"
> +av1_parser_select="cbs_av1"
>  h264_parser_select="golomb h264dsp h264parse"
>  hevc_parser_select="hevcparse"
>  mpegaudio_parser_select="mpegaudioheader"
> diff --git a/libavcodec/Makefile b/libavcodec/Makefile
> index b2c6995f9a..dc28892e64 100644
> --- a/libavcodec/Makefile
> +++ b/libavcodec/Makefile
> @@ -1006,6 +1006,7 @@ OBJS-$(CONFIG_AAC_PARSER)              += aac_parser.o aac_ac3_parser.o \
>                                            mpeg4audio.o
>  OBJS-$(CONFIG_AC3_PARSER)              += ac3tab.o aac_ac3_parser.o
>  OBJS-$(CONFIG_ADX_PARSER)              += adx_parser.o adx.o
> +OBJS-$(CONFIG_AV1_PARSER)              += av1_parser.o
>  OBJS-$(CONFIG_AVS2_PARSER)             += avs2_parser.o
>  OBJS-$(CONFIG_BMP_PARSER)              += bmp_parser.o
>  OBJS-$(CONFIG_CAVSVIDEO_PARSER)        += cavs_parser.o
> diff --git a/libavcodec/av1_parser.c b/libavcodec/av1_parser.c
> new file mode 100644
> index 0000000000..b2e19e2119
> --- /dev/null
> +++ b/libavcodec/av1_parser.c
> @@ -0,0 +1,218 @@
> +/*
> + * AV1 parser
> + *
> + * Copyright (C) 2018 James Almer <jamrial at gmail.com>
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +#include "av1_parse.h"
> +#include "cbs.h"
> +#include "cbs_av1.h"
> +#include "parser.h"
> +
> +typedef struct AV1ParseContext {
> +    CodedBitstreamContext *cbc;
> +    CodedBitstreamFragment temporal_unit;
> +    int parsed_extradata;
> +} AV1ParseContext;
> +
> +static int av1_parser_parse(AVCodecParserContext *ctx,
> +                            AVCodecContext *avctx,
> +                            const uint8_t **out_data, int *out_size,
> +                            const uint8_t *data, int size)
> +{
> +    AV1ParseContext *s = ctx->priv_data;
> +    CodedBitstreamFragment *td = &s->temporal_unit;
> +    CodedBitstreamAV1Context *av1 = s->cbc->priv_data;
> +    int ret;
> +
> +    *out_data = data;
> +    *out_size = size;
> +
> +    ctx->key_frame         = -1;
> +    ctx->pict_type         = AV_PICTURE_TYPE_NONE;
> +    ctx->picture_structure = AV_PICTURE_STRUCTURE_UNKNOWN;
> +
> +    if (avctx->extradata_size && !s->parsed_extradata) {
> +        ret = ff_cbs_read(s->cbc, td, avctx->extradata, avctx->extradata_size);
> +        if (ret < 0) {
> +            av_log(avctx, AV_LOG_ERROR, "Failed to parse extradata.\n");
> +            return size;
> +        }
> +
> +        s->parsed_extradata = 1;
> +
> +        ff_cbs_fragment_uninit(s->cbc, td);
> +    }
> +
> +    ret = ff_cbs_read(s->cbc, td, data, size);
> +    if (ret < 0) {
> +        av_log(avctx, AV_LOG_ERROR, "Failed to parse temporal unit.\n");
> +        return size;
> +    }
> +
> +    if (!av1->sequence_header) {
> +        av_log(avctx, AV_LOG_ERROR, "No sequence header available\n");
> +        goto end;
> +    }
> +
> +    for (int i = 0; i < td->nb_units; i++) {
> +        CodedBitstreamUnit *unit = &td->units[i];
> +        AV1RawOBU *obu = unit->content;
> +        AV1RawSequenceHeader *seq = av1->sequence_header;
> +        AV1RawFrameHeader *frame;
> +        int frame_type, bitdepth, subsampling;
> +
> +        if (unit->type == AV1_OBU_FRAME)
> +            frame = &obu->obu.frame.header;
> +        else if (unit->type == AV1_OBU_FRAME_HEADER)
> +            frame = &obu->obu.frame_header;
> +        else
> +            continue;
> +
> +        if (frame->show_existing_frame) {
> +            AV1ReferenceFrameState *ref = &av1->ref[frame->frame_to_show_map_idx];
> +
> +            if (!ref->valid) {
> +                av_log(avctx, AV_LOG_ERROR, "Invalid reference frame\n");
> +                goto end;
> +            }
> +
> +            ctx->width  = ref->frame_width;
> +            ctx->height = ref->frame_height;
> +            frame_type  = ref->frame_type;
> +
> +            ctx->key_frame = 0;
> +        } else if (!frame->show_frame) {
> +            continue;

I think you want to set the key_frame flag if the you see any key frame, even an invisible one.

(E.g. if the first output frame is in a packet after an invisible key frame then you would miss it here, which possibly leads to incorrectly discarding some of the stream.)

> +        } else {
> +            ctx->width  = av1->frame_width;
> +            ctx->height = av1->frame_height;
> +            frame_type  = frame->frame_type;
> +
> +            ctx->key_frame = frame_type == AV1_FRAME_KEY;
> +        }
> +
> +        avctx->profile = seq->seq_profile;
> +        avctx->level   = seq->seq_level_idx[0];
> +
> +        switch (frame_type) {
> +        case AV1_FRAME_KEY:
> +        case AV1_FRAME_INTRA_ONLY:
> +            ctx->pict_type = AV_PICTURE_TYPE_I;
> +            break;
> +        case AV1_FRAME_INTER:
> +            ctx->pict_type = AV_PICTURE_TYPE_P;
> +            break;
> +        case AV1_FRAME_SWITCH:
> +            ctx->pict_type = AV_PICTURE_TYPE_SP;
> +            break;
> +        }
> +
> +        ctx->picture_structure = AV_PICTURE_STRUCTURE_FRAME;
> +
> +        subsampling = seq->color_config.subsampling_x << 1 & seq->color_config.subsampling_y;
> +        bitdepth    = 8 + seq->color_config.high_bitdepth * 2 + seq->color_config.twelve_bit * 2;
> +        switch (bitdepth) {
> +        case 8:
> +            if (subsampling == 3)      ctx->format = seq->color_config.mono_chrome ? AV_PIX_FMT_GRAY8 :
> +                                                                                     AV_PIX_FMT_YUV420P;
> +            else if (subsampling == 2) ctx->format = AV_PIX_FMT_YUV422P;
> +            else                       ctx->format = AV_PIX_FMT_YUV444P;
> +            break;
> +        case 10:
> +            if (subsampling == 3)      ctx->format = seq->color_config.mono_chrome ? AV_PIX_FMT_GRAY10 :
> +                                                                                     AV_PIX_FMT_YUV420P10;
> +            else if (subsampling == 2) ctx->format = AV_PIX_FMT_YUV422P10;
> +            else                       ctx->format = AV_PIX_FMT_YUV444P10;
> +            break;
> +        case 12:
> +            if (subsampling == 3)      ctx->format = seq->color_config.mono_chrome ? AV_PIX_FMT_GRAY12 :
> +                                                                                     AV_PIX_FMT_YUV420P12;
> +            else if (subsampling == 2) ctx->format = AV_PIX_FMT_YUV422P12;
> +            else                       ctx->format = AV_PIX_FMT_YUV444P12;
> +            break;
> +        }

I think I'd put mono_chrome outside the switch - while it does imply that subsampling_(x|y) are both 1, that is still slightly confusing.

Also, perhaps this would be nicer as a lookup table?

> +    }
> +
> +end:
> +    ff_cbs_fragment_uninit(s->cbc, td);
> +
> +    return size;
> +}
> +
> +static const CodedBitstreamUnitType decompose_unit_types[] = {
> +    AV1_OBU_TEMPORAL_DELIMITER,
> +    AV1_OBU_SEQUENCE_HEADER,
> +    AV1_OBU_FRAME_HEADER,
> +    AV1_OBU_TILE_GROUP,
> +    AV1_OBU_FRAME,
> +};
> +
> +static av_cold int av1_parser_init(AVCodecParserContext *ctx)
> +{
> +    AV1ParseContext *s = ctx->priv_data;
> +    int ret;
> +
> +    ret = ff_cbs_init(&s->cbc, AV_CODEC_ID_AV1, NULL);

Can we forge a logging context here?  Having NULL is not very nice if it does find errors.

(Or overwrite it with the passed AVCodecContext only during the parse call?)

> +    if (ret < 0)
> +        return ret;
> +
> +    s->cbc->decompose_unit_types    = (CodedBitstreamUnitType *)decompose_unit_types;
> +    s->cbc->nb_decompose_unit_types = FF_ARRAY_ELEMS(decompose_unit_types);
> +
> +    return 0;
> +}
> +
> +static void av1_parser_close(AVCodecParserContext *ctx)
> +{
> +    AV1ParseContext *s = ctx->priv_data;
> +
> +    ff_cbs_close(&s->cbc);
> +}
> +
> +static int av1_parser_split(AVCodecContext *avctx,
> +                            const uint8_t *buf, int buf_size)
> +{
> +    AV1OBU obu;
> +    const uint8_t *ptr = buf, *end = buf + buf_size;
> +
> +    while (ptr < end) {
> +        int len = ff_av1_extract_obu(&obu, ptr, buf_size, avctx);
> +        if (len < 0)
> +            break;
> +
> +        if (obu.type == AV1_OBU_FRAME_HEADER ||
> +            obu.type == AV1_OBU_FRAME) {
> +            return ptr - buf;
> +        }
> +        ptr      += len;
> +        buf_size -= len;
> +    }
> +
> +    return 0;
> +}
> +
> +AVCodecParser ff_av1_parser = {
> +    .codec_ids      = { AV_CODEC_ID_AV1 },
> +    .priv_data_size = sizeof(AV1ParseContext),
> +    .parser_init    = av1_parser_init,
> +    .parser_close   = av1_parser_close,
> +    .parser_parse   = av1_parser_parse,
> +    .split          = av1_parser_split,
> +};
> diff --git a/libavcodec/parsers.c b/libavcodec/parsers.c
> index cb86cceecc..f01cad4c84 100644
> --- a/libavcodec/parsers.c
> +++ b/libavcodec/parsers.c
> @@ -26,6 +26,7 @@ extern AVCodecParser ff_aac_parser;
>  extern AVCodecParser ff_aac_latm_parser;
>  extern AVCodecParser ff_ac3_parser;
>  extern AVCodecParser ff_adx_parser;
> +extern AVCodecParser ff_av1_parser;
>  extern AVCodecParser ff_avs2_parser;
>  extern AVCodecParser ff_bmp_parser;
>  extern AVCodecParser ff_cavsvideo_parser;
> 

Looks good!

Thanks,

- Mark