[FFmpeg-devel] [PATCH] avcodec: add an AV1 parser
Mark Thompson
sw at jkqxz.net
Tue Sep 25 02:41:10 EEST 2018
On 24/09/18 01:12, James Almer wrote:
> Simple parser to set keyframes, frame type, structure, width, height, and pixel
> format, plus stream profile and level.
>
> Signed-off-by: James Almer <jamrial at gmail.com>
> ---
> Missing Changelog entry and version bump.
>
> This depends on "[PATCH v2 2/3] lavc: Add coded bitstream read/write support
> for AV1" which should be committed in the coming days.
>
> The AVCodecParser.split() implementation, added for the sake of completeness,
> is very naive and much like the h264 and hevc ones can result in useless OBUs
> being "extracted", but since it's no longer used by libavformat to fill global
> headers when reading raw containers it shouldn't really matter. It's pretty
> much used only by the remove_extradata bsf at this point.
>
> configure | 1 +
> libavcodec/Makefile | 1 +
> libavcodec/av1_parser.c | 218 ++++++++++++++++++++++++++++++++++++++++
> libavcodec/parsers.c | 1 +
> 4 files changed, 221 insertions(+)
> create mode 100644 libavcodec/av1_parser.c
>
> diff --git a/configure b/configure
> index ca8b599b63..b46c86ec95 100755
> --- a/configure
> +++ b/configure
> @@ -3020,6 +3020,7 @@ wmv3_crystalhd_decoder_select="crystalhd"
>
> # parsers
> aac_parser_select="adts_header"
> +av1_parser_select="cbs_av1"
> h264_parser_select="golomb h264dsp h264parse"
> hevc_parser_select="hevcparse"
> mpegaudio_parser_select="mpegaudioheader"
> diff --git a/libavcodec/Makefile b/libavcodec/Makefile
> index b2c6995f9a..dc28892e64 100644
> --- a/libavcodec/Makefile
> +++ b/libavcodec/Makefile
> @@ -1006,6 +1006,7 @@ OBJS-$(CONFIG_AAC_PARSER) += aac_parser.o aac_ac3_parser.o \
> mpeg4audio.o
> OBJS-$(CONFIG_AC3_PARSER) += ac3tab.o aac_ac3_parser.o
> OBJS-$(CONFIG_ADX_PARSER) += adx_parser.o adx.o
> +OBJS-$(CONFIG_AV1_PARSER) += av1_parser.o
> OBJS-$(CONFIG_AVS2_PARSER) += avs2_parser.o
> OBJS-$(CONFIG_BMP_PARSER) += bmp_parser.o
> OBJS-$(CONFIG_CAVSVIDEO_PARSER) += cavs_parser.o
> diff --git a/libavcodec/av1_parser.c b/libavcodec/av1_parser.c
> new file mode 100644
> index 0000000000..b2e19e2119
> --- /dev/null
> +++ b/libavcodec/av1_parser.c
> @@ -0,0 +1,218 @@
> +/*
> + * AV1 parser
> + *
> + * Copyright (C) 2018 James Almer <jamrial at gmail.com>
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +#include "av1_parse.h"
> +#include "cbs.h"
> +#include "cbs_av1.h"
> +#include "parser.h"
> +
> +typedef struct AV1ParseContext {
> + CodedBitstreamContext *cbc;
> + CodedBitstreamFragment temporal_unit;
> + int parsed_extradata;
> +} AV1ParseContext;
> +
> +static int av1_parser_parse(AVCodecParserContext *ctx,
> + AVCodecContext *avctx,
> + const uint8_t **out_data, int *out_size,
> + const uint8_t *data, int size)
> +{
> + AV1ParseContext *s = ctx->priv_data;
> + CodedBitstreamFragment *td = &s->temporal_unit;
> + CodedBitstreamAV1Context *av1 = s->cbc->priv_data;
> + int ret;
> +
> + *out_data = data;
> + *out_size = size;
> +
> + ctx->key_frame = -1;
> + ctx->pict_type = AV_PICTURE_TYPE_NONE;
> + ctx->picture_structure = AV_PICTURE_STRUCTURE_UNKNOWN;
> +
> + if (avctx->extradata_size && !s->parsed_extradata) {
> + ret = ff_cbs_read(s->cbc, td, avctx->extradata, avctx->extradata_size);
> + if (ret < 0) {
> + av_log(avctx, AV_LOG_ERROR, "Failed to parse extradata.\n");
> + return size;
> + }
> +
> + s->parsed_extradata = 1;
> +
> + ff_cbs_fragment_uninit(s->cbc, td);
> + }
> +
> + ret = ff_cbs_read(s->cbc, td, data, size);
> + if (ret < 0) {
> + av_log(avctx, AV_LOG_ERROR, "Failed to parse temporal unit.\n");
> + return size;
> + }
> +
> + if (!av1->sequence_header) {
> + av_log(avctx, AV_LOG_ERROR, "No sequence header available\n");
> + goto end;
> + }
> +
> + for (int i = 0; i < td->nb_units; i++) {
> + CodedBitstreamUnit *unit = &td->units[i];
> + AV1RawOBU *obu = unit->content;
> + AV1RawSequenceHeader *seq = av1->sequence_header;
> + AV1RawFrameHeader *frame;
> + int frame_type, bitdepth, subsampling;
> +
> + if (unit->type == AV1_OBU_FRAME)
> + frame = &obu->obu.frame.header;
> + else if (unit->type == AV1_OBU_FRAME_HEADER)
> + frame = &obu->obu.frame_header;
> + else
> + continue;
> +
> + if (frame->show_existing_frame) {
> + AV1ReferenceFrameState *ref = &av1->ref[frame->frame_to_show_map_idx];
> +
> + if (!ref->valid) {
> + av_log(avctx, AV_LOG_ERROR, "Invalid reference frame\n");
> + goto end;
> + }
> +
> + ctx->width = ref->frame_width;
> + ctx->height = ref->frame_height;
> + frame_type = ref->frame_type;
> +
> + ctx->key_frame = 0;
> + } else if (!frame->show_frame) {
> + continue;
I think you want to set the key_frame flag if the you see any key frame, even an invisible one.
(E.g. if the first output frame is in a packet after an invisible key frame then you would miss it here, which possibly leads to incorrectly discarding some of the stream.)
> + } else {
> + ctx->width = av1->frame_width;
> + ctx->height = av1->frame_height;
> + frame_type = frame->frame_type;
> +
> + ctx->key_frame = frame_type == AV1_FRAME_KEY;
> + }
> +
> + avctx->profile = seq->seq_profile;
> + avctx->level = seq->seq_level_idx[0];
> +
> + switch (frame_type) {
> + case AV1_FRAME_KEY:
> + case AV1_FRAME_INTRA_ONLY:
> + ctx->pict_type = AV_PICTURE_TYPE_I;
> + break;
> + case AV1_FRAME_INTER:
> + ctx->pict_type = AV_PICTURE_TYPE_P;
> + break;
> + case AV1_FRAME_SWITCH:
> + ctx->pict_type = AV_PICTURE_TYPE_SP;
> + break;
> + }
> +
> + ctx->picture_structure = AV_PICTURE_STRUCTURE_FRAME;
> +
> + subsampling = seq->color_config.subsampling_x << 1 & seq->color_config.subsampling_y;
> + bitdepth = 8 + seq->color_config.high_bitdepth * 2 + seq->color_config.twelve_bit * 2;
> + switch (bitdepth) {
> + case 8:
> + if (subsampling == 3) ctx->format = seq->color_config.mono_chrome ? AV_PIX_FMT_GRAY8 :
> + AV_PIX_FMT_YUV420P;
> + else if (subsampling == 2) ctx->format = AV_PIX_FMT_YUV422P;
> + else ctx->format = AV_PIX_FMT_YUV444P;
> + break;
> + case 10:
> + if (subsampling == 3) ctx->format = seq->color_config.mono_chrome ? AV_PIX_FMT_GRAY10 :
> + AV_PIX_FMT_YUV420P10;
> + else if (subsampling == 2) ctx->format = AV_PIX_FMT_YUV422P10;
> + else ctx->format = AV_PIX_FMT_YUV444P10;
> + break;
> + case 12:
> + if (subsampling == 3) ctx->format = seq->color_config.mono_chrome ? AV_PIX_FMT_GRAY12 :
> + AV_PIX_FMT_YUV420P12;
> + else if (subsampling == 2) ctx->format = AV_PIX_FMT_YUV422P12;
> + else ctx->format = AV_PIX_FMT_YUV444P12;
> + break;
> + }
I think I'd put mono_chrome outside the switch - while it does imply that subsampling_(x|y) are both 1, that is still slightly confusing.
Also, perhaps this would be nicer as a lookup table?
> + }
> +
> +end:
> + ff_cbs_fragment_uninit(s->cbc, td);
> +
> + return size;
> +}
> +
> +static const CodedBitstreamUnitType decompose_unit_types[] = {
> + AV1_OBU_TEMPORAL_DELIMITER,
> + AV1_OBU_SEQUENCE_HEADER,
> + AV1_OBU_FRAME_HEADER,
> + AV1_OBU_TILE_GROUP,
> + AV1_OBU_FRAME,
> +};
> +
> +static av_cold int av1_parser_init(AVCodecParserContext *ctx)
> +{
> + AV1ParseContext *s = ctx->priv_data;
> + int ret;
> +
> + ret = ff_cbs_init(&s->cbc, AV_CODEC_ID_AV1, NULL);
Can we forge a logging context here? Having NULL is not very nice if it does find errors.
(Or overwrite it with the passed AVCodecContext only during the parse call?)
> + if (ret < 0)
> + return ret;
> +
> + s->cbc->decompose_unit_types = (CodedBitstreamUnitType *)decompose_unit_types;
> + s->cbc->nb_decompose_unit_types = FF_ARRAY_ELEMS(decompose_unit_types);
> +
> + return 0;
> +}
> +
> +static void av1_parser_close(AVCodecParserContext *ctx)
> +{
> + AV1ParseContext *s = ctx->priv_data;
> +
> + ff_cbs_close(&s->cbc);
> +}
> +
> +static int av1_parser_split(AVCodecContext *avctx,
> + const uint8_t *buf, int buf_size)
> +{
> + AV1OBU obu;
> + const uint8_t *ptr = buf, *end = buf + buf_size;
> +
> + while (ptr < end) {
> + int len = ff_av1_extract_obu(&obu, ptr, buf_size, avctx);
> + if (len < 0)
> + break;
> +
> + if (obu.type == AV1_OBU_FRAME_HEADER ||
> + obu.type == AV1_OBU_FRAME) {
> + return ptr - buf;
> + }
> + ptr += len;
> + buf_size -= len;
> + }
> +
> + return 0;
> +}
> +
> +AVCodecParser ff_av1_parser = {
> + .codec_ids = { AV_CODEC_ID_AV1 },
> + .priv_data_size = sizeof(AV1ParseContext),
> + .parser_init = av1_parser_init,
> + .parser_close = av1_parser_close,
> + .parser_parse = av1_parser_parse,
> + .split = av1_parser_split,
> +};
> diff --git a/libavcodec/parsers.c b/libavcodec/parsers.c
> index cb86cceecc..f01cad4c84 100644
> --- a/libavcodec/parsers.c
> +++ b/libavcodec/parsers.c
> @@ -26,6 +26,7 @@ extern AVCodecParser ff_aac_parser;
> extern AVCodecParser ff_aac_latm_parser;
> extern AVCodecParser ff_ac3_parser;
> extern AVCodecParser ff_adx_parser;
> +extern AVCodecParser ff_av1_parser;
> extern AVCodecParser ff_avs2_parser;
> extern AVCodecParser ff_bmp_parser;
> extern AVCodecParser ff_cavsvideo_parser;
>
Looks good!
Thanks,
- Mark
More information about the ffmpeg-devel
mailing list