[FFmpeg-devel] [PATCH 5/6] avcodec: add h266 parser

Nuo Mi nuomi2021 at gmail.com
Tue Dec 22 12:47:15 EET 2020


On Mon, Dec 21, 2020 at 11:15 PM James Almer <jamrial at gmail.com> wrote:

> On 12/21/2020 3:07 AM, Nuo Mi wrote:
> > ---
> >   configure                |   3 +
> >   libavcodec/Makefile      |   1 +
> >   libavcodec/h2645_parse.c |  73 +++++++++-
> >   libavcodec/h266_parser.c | 284 +++++++++++++++++++++++++++++++++++++++
> >   libavcodec/parsers.c     |   1 +
> >   5 files changed, 360 insertions(+), 2 deletions(-)
> >   create mode 100644 libavcodec/h266_parser.c
> >
> > diff --git a/configure b/configure
> > index 90914752f1..77272948e3 100755
> > --- a/configure
> > +++ b/configure
> > @@ -2354,6 +2354,7 @@ CONFIG_EXTRA="
> >       cbs_av1
> >       cbs_h264
> >       cbs_h265
> > +    cbs_h266
> >       cbs_jpeg
> >       cbs_mpeg2
> >       cbs_vp9
> > @@ -2622,6 +2623,7 @@ threads_if_any="$THREADS_LIST"
> >   cbs_av1_select="cbs"
> >   cbs_h264_select="cbs"
> >   cbs_h265_select="cbs"
> > +cbs_h266_select="cbs"
>
> These two chunks belong to the previous patch adding cbs_h266. Otherwise
> CONFIG_CBS_H266 will not be defined for Makefile to use.
>
> >   cbs_jpeg_select="cbs"
> >   cbs_mpeg2_select="cbs"
> >   cbs_vp9_select="cbs"
> > @@ -3158,6 +3160,7 @@ av1_qsv_decoder_select="qsvdec"
> >   aac_parser_select="adts_header"
> >   av1_parser_select="cbs_av1"
> >   h264_parser_select="atsc_a53 golomb h264dsp h264parse"
> > +h266_parser_select="cbs_h266"
> >   hevc_parser_select="hevcparse"
> >   mpegaudio_parser_select="mpegaudioheader"
> >   mpegvideo_parser_select="mpegvideo"
> > diff --git a/libavcodec/Makefile b/libavcodec/Makefile
> > index 4045c002b7..82cc9b8b93 100644
> > --- a/libavcodec/Makefile
> > +++ b/libavcodec/Makefile
> > @@ -1098,6 +1098,7 @@ OBJS-$(CONFIG_GSM_PARSER)              +=
> gsm_parser.o
> >   OBJS-$(CONFIG_H261_PARSER)             += h261_parser.o
> >   OBJS-$(CONFIG_H263_PARSER)             += h263_parser.o
> >   OBJS-$(CONFIG_H264_PARSER)             += h264_parser.o h264_sei.o
> h264data.o
> > +OBJS-$(CONFIG_H266_PARSER)             += h266_parser.o
> >   OBJS-$(CONFIG_HEVC_PARSER)             += hevc_parser.o hevc_data.o
> >   OBJS-$(CONFIG_IPU_PARSER)              += ipu_parser.o
> >   OBJS-$(CONFIG_JPEG2000_PARSER)         += jpeg2000_parser.o
> > diff --git a/libavcodec/h2645_parse.c b/libavcodec/h2645_parse.c
> > index 0f98b49fbe..2600371d3c 100644
> > --- a/libavcodec/h2645_parse.c
> > +++ b/libavcodec/h2645_parse.c
>
> cbs_h266 needs the changes to this file you're adding here to split
> NALUs, so it should be done in a new separate patch applied before patch
> 4/6.
>
> > @@ -29,6 +29,7 @@
> >   #include "bytestream.h"
> >   #include "hevc.h"
> >   #include "h264.h"
> > +#include "h266.h"
> >   #include "h2645_parse.h"
> >
> >   int ff_h2645_extract_rbsp(const uint8_t *src, int length,
> > @@ -146,6 +147,47 @@ nsc:
> >       return si;
> >   }
> >
> > +static const char *h266_nal_type_name[64] = {
> > +    "TRAIL", //H266_NAL_TRAIL
> > +    "STSA", //H266_NAL_STSA
> > +    "RADL", //H266_NAL_RADL
> > +    "RASL", //H266_NAL_RASL
> > +    "RSV_VCL_4", //H266_NAL_RSV_VCL_4
> > +    "RSV_VCL_5", //H266_NAL_RSV_VCL_5
> > +    "RSV_VCL_6", //H266_NAL_RSV_VCL_6
> > +    "IDR_W_RAD", //H266_NAL_IDR_W_RADL
> > +    "IDR_N_LP", //H266_NAL_IDR_N_LP
> > +    "CRA_NUT", //H266_NAL_CRA_NUT
> > +    "GDR_NUT", //H266_NAL_GDR_NUT
> > +    "RSV_IRAP_11", //H266_NAL_RSV_IRAP_11
> > +    "OPI", //H266_NAL_OPI
> > +    "DCI", //H266_NAL_DCI
> > +    "VPS", //H266_NAL_VPS
> > +    "SPS", //H266_NAL_SPS
> > +    "PPS", //H266_NAL_PPS
> > +    "PREFIX_AP", //H266_NAL_PREFIX_APS
> > +    "SUFFIX_AP", //H266_NAL_SUFFIX_APS
> > +    "PH", //H266_NAL_PH
> > +    "AUD", //H266_NAL_AUD
> > +    "EOS_NUT", //H266_NAL_EOS_NUT
> > +    "EOB_NUT", //H266_NAL_EOB_NUT
> > +    "PREFIX_SE", //H266_NAL_PREFIX_SEI
> > +    "SUFFIX_SE", //H266_NAL_SUFFIX_SEI
> > +    "FD_NUT", //H266_NAL_FD_NUT
> > +    "RSV_NVCL_26", //H266_NAL_RSV_NVCL_26
> > +    "RSV_NVCL_27", //H266_NAL_RSV_NVCL_27
> > +    "UNSPEC_28", //H266_NAL_UNSPEC_28
> > +    "UNSPEC_29", //H266_NAL_UNSPEC_29
> > +    "UNSPEC_30", //H266_NAL_UNSPEC_30
> > +    "UNSPEC_31", //H266_NAL_UNSPEC_31
> > +};
> > +
> > +static const char *h266_nal_unit_name(int nal_type)
> > +{
> > +    av_assert0(nal_type >= 0 && nal_type < 32);
> > +    return h266_nal_type_name[nal_type];
> > +}
> > +
> >   static const char *hevc_nal_type_name[64] = {
> >       "TRAIL_N", // HEVC_NAL_TRAIL_N
> >       "TRAIL_R", // HEVC_NAL_TRAIL_R
> > @@ -289,6 +331,32 @@ static int get_bit_length(H2645NAL *nal, int
> skip_trailing_zeros)
> >    * @return AVERROR_INVALIDDATA if the packet is not a valid NAL unit,
> >    * 0 otherwise
> >    */
> > +static int h266_parse_nal_header(H2645NAL *nal, void *logctx)
> > +{
> > +    GetBitContext *gb = &nal->gb;
> > +
> > +    if (get_bits1(gb) != 0)     //forbidden_zero_bit
> > +        return AVERROR_INVALIDDATA;
> > +
> > +    if (get_bits1(gb) != 0)     //nuh_reserved_zero_bit
>
> This one should be ignored, otherwise bitstreams compliant with an
> hypotetical future revision of the spec that defines this bit will not
> work, when they should.
>
> > +        return AVERROR_INVALIDDATA;
> > +
> > +    nal->nuh_layer_id = get_bits(gb, 6);
> > +    nal->type = get_bits(gb, 5);
> > +    nal->temporal_id = get_bits(gb, 3) - 1;
> > +    if (nal->temporal_id < 0)
> > +        return AVERROR_INVALIDDATA;
> > +
> > +    if ((nal->type >= H266_NAL_IDR_W_RADL && nal->type <=
> H266_NAL_RSV_IRAP_11) && nal->temporal_id)
> > +        return AVERROR_INVALIDDATA;
> > +
> > +    av_log(logctx, AV_LOG_DEBUG,
> > +      "nal_unit_type: %d(%s), nuh_layer_id: %d, temporal_id: %d\n",
> > +           nal->type, h266_nal_unit_name(nal->type), nal->nuh_layer_id,
> nal->temporal_id);
> > +
> > +    return 0;
> > +}
> > +
> >   static int hevc_parse_nal_header(H2645NAL *nal, void *logctx)
> >   {
> >       GetBitContext *gb = &nal->gb;
> > @@ -503,8 +571,9 @@ int ff_h2645_packet_split(H2645Packet *pkt, const
> uint8_t *buf, int length,
> >
> >           /* Reset type in case it contains a stale value from a
> previously parsed NAL */
> >           nal->type = 0;
> > -
> > -        if (codec_id == AV_CODEC_ID_HEVC)
> > +        if (codec_id == AV_CODEC_ID_H266)
> > +            ret = h266_parse_nal_header(nal, logctx);
> > +        else if (codec_id == AV_CODEC_ID_HEVC)
> >               ret = hevc_parse_nal_header(nal, logctx);
> >           else
> >               ret = h264_parse_nal_header(nal, logctx);
> > diff --git a/libavcodec/h266_parser.c b/libavcodec/h266_parser.c
> > new file mode 100644
> > index 0000000000..e472e51d59
> > --- /dev/null
> > +++ b/libavcodec/h266_parser.c
> > @@ -0,0 +1,284 @@
> > +/*
> > + * h266 parser
> > + *
> > + * Copyright (C) 2029 Nuo Mi <nuomi2021 at gmail.com>
> > + *
> > + * This file is part of FFmpeg.
> > + *
> > + * FFmpeg is free software; you can redistribute it and/or
> > + * modify it under the terms of the GNU Lesser General Public
> > + * License as published by the Free Software Foundation; either
> > + * version 2.1 of the License, or (at your option) any later version.
> > + *
> > + * FFmpeg is distributed in the hope that it will be useful,
> > + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> > + * Lesser General Public License for more details.
> > + *
> > + * You should have received a copy of the GNU Lesser General Public
> > + * License along with FFmpeg; if not, write to the Free Software
> > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
> 02110-1301 USA
> > + */
> > +
> > +#include "cbs.h"
> > +#include "cbs_h266.h"
> > +#include "internal.h"
> > +#include "parser.h"
> > +
> > +#define START_CODE 0x000001 ///< start_code_prefix_one_3bytes
> > +
> > +#define IS_SLICE(nut) (nut <= H266_NAL_RASL || (nut >=
> H266_NAL_IDR_W_RADL && nut <= H266_NAL_GDR_NUT))
> > +
> > +typedef struct H266ParserContext {
> > +    ParseContext pc;
> > +    CodedBitstreamContext *cbc;
> > +    CodedBitstreamFragment picture_unit;
> > +    int parsed_extradata;
> > +} H266ParserContext;
> > +
> > +static const enum AVPixelFormat pix_fmts_8bit[] = {
> > +    AV_PIX_FMT_GRAY8, AV_PIX_FMT_YUV420P,
> > +    AV_PIX_FMT_YUV422P, AV_PIX_FMT_YUV444P
> > +};
> > +
> > +static const enum AVPixelFormat pix_fmts_10bit[] = {
> > +    AV_PIX_FMT_GRAY10, AV_PIX_FMT_YUV420P10,
> > +    AV_PIX_FMT_YUV422P10, AV_PIX_FMT_YUV444P10
> > +};
> > +
> > +static int get_format(const H266RawSPS* sps)
> > +{
> > +    switch (sps->sps_bitdepth_minus8) {
> > +        case 0:
> > +            return pix_fmts_8bit[sps->sps_chroma_format_idc];
> > +        case 2:
> > +            return pix_fmts_10bit[sps->sps_chroma_format_idc];
> > +    }
> > +    return AV_PIX_FMT_NONE;
> > +}
> > +
> > +/**
> > + * Find the end of the current frame in the bitstream.
> > + * @return the position of the first byte of the next frame, or
> END_NOT_FOUND
> > + */
> > +static int find_frame_end(AVCodecParserContext *s, const uint8_t *buf,
> > +                               int buf_size)
> > +{
> > +    H266ParserContext *ctx = s->priv_data;
> > +    ParseContext       *pc = &ctx->pc;
> > +    int i;
> > +
> > +    for (i = 0; i < buf_size; i++) {
> > +        int nut;
> > +
> > +        pc->state64 = (pc->state64 << 8) | buf[i];
> > +
> > +        if (((pc->state64 >> 3 * 8) & 0xFFFFFF) != START_CODE)
> > +            continue;
> > +
> > +        nut = (pc->state64 >> (8 + 3)) & 0x1F;
> > +        // Beginning of access unit
> > +        if ((nut >= H266_NAL_OPI && nut <= H266_NAL_EOB_NUT && nut !=
> H266_NAL_PH) ||
> > +            nut == H266_NAL_PREFIX_SEI ||
> > +            (nut >= 26 && nut <= 31)) {
> > +            if (pc->frame_start_found) {
> > +                pc->frame_start_found = 0;
> > +                return i - 5;
> > +            }
> > +        } else if (nut == H266_NAL_PH  || IS_SLICE(nut)) {
> > +            int sh_picture_header_in_slice_header_flag = buf[i] >> 7;
> > +
> > +            if (nut == H266_NAL_PH ||
> sh_picture_header_in_slice_header_flag) {
> > +                if (!pc->frame_start_found) {
> > +                    pc->frame_start_found = 1;
> > +                } else { // First slice of next frame found
> > +                    pc->frame_start_found = 0;
> > +                    return i - 5;
> > +                }
> > +            }
> > +        }
> > +    }
> > +    return END_NOT_FOUND;
> > +}
> > +
> > +static int get_pict_type(const CodedBitstreamFragment *pu)
> > +{
> > +    int has_p = 0;
> > +    for (int i = 0; i < pu->nb_units; i++) {
> > +        CodedBitstreamUnit *unit = &pu->units[i];
> > +        if (IS_SLICE(unit->type)) {
> > +            const H266RawSlice *slice = unit->content;
> > +            uint8_t type = slice->header.sh_slice_type;
> > +            if (type == H266_SLICE_B) {
> > +                return AV_PICTURE_TYPE_B;
> > +            }
> > +            if (type == H266_SLICE_P) {
> > +                has_p = 1;
> > +            }
> > +        }
> > +    }
> > +    return has_p ? AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_I;
> > +}
> > +
> > +/**
> > + * Parse NAL units of found picture and decode some basic information.
> > + *
> > + * @param s parser context.
> > + * @param avctx codec context.
> > + * @param buf buffer with field/frame data.
> > + * @param buf_size size of the buffer.
> > + */
> > +static int parse_nal_units(AVCodecParserContext *ctx, const uint8_t
> *buf,
> > +                           int buf_size, AVCodecContext *avctx)
> > +{
> > +    H266ParserContext *s = ctx->priv_data;
> > +    CodedBitstreamFragment *pu = &s->picture_unit;
> > +    CodedBitstreamH266Context *h266 = s->cbc->priv_data;
> > +    const H266RawSPS *sps;
> > +    const H266RawPPS *pps;
> > +    const H266RawPH *ph;
> > +    int ret, num = 0, den = 0;
> > +
> > +    /* set some sane default values */
> > +    ctx->pict_type         = AV_PICTURE_TYPE_I;
> > +    ctx->key_frame         = 0;
> > +    ctx->picture_structure = AV_PICTURE_STRUCTURE_FRAME;
> > +
> > +    s->cbc->log_ctx = avctx;
> > +
> > +    if (avctx->extradata_size && !s->parsed_extradata) {
> > +        s->parsed_extradata = 1;
> > +
> > +        if ((ret = ff_cbs_read(s->cbc, pu, avctx->extradata,
> avctx->extradata_size)) < 0)
> > +            av_log(avctx, AV_LOG_WARNING, "Failed to parse
> extradata.\n");
>
> ff_cbs_read_extradata_from_codec()
>
> > +
> > +        ff_cbs_fragment_reset(pu);
> > +    }
> > +
> > +    if ((ret = ff_cbs_read(s->cbc, pu, buf, buf_size))< 0) {
> > +        av_log(avctx, AV_LOG_ERROR, "Failed to parse picture unit.\n");
> > +        goto end;
> > +    }
> > +
> > +    sps = h266->active_sps;
> > +    pps = h266->active_pps;
> > +    ph = h266->active_ph;
> > +    if (!h266->active_pps || !h266->active_ph) {
> > +        av_log(avctx, AV_LOG_ERROR, "No pss or ph header available\n");
> > +        goto end;
> > +    }
> > +
> > +    ctx->key_frame = ph->ph_gdr_or_irap_pic_flag;
>
> Should a gdr pic also marked as keyframe? If not, this should also
> ensure ph->ph_gdr_pic_flag is 0

I guess gdr pic(nal_unit_type 10) is a key frame because of the type before
and after gdr both IRAP.
ph_gdr_or_irap_pic_flag  means the frame is gdr or irap pic. In both cases,
we have a keyframe.
It's no need to check  ph_gdr_pic_flag


>
> +
> > +    ctx->coded_width  = pps->pps_pic_width_in_luma_samples;
> > +    ctx->coded_height = pps->pps_pic_height_in_luma_samples;
> > +    ctx->width        = pps->pps_pic_width_in_luma_samples  -
> pps->pps_conf_win_left_offset - pps->pps_conf_win_right_offset;
> > +    ctx->height       = pps->pps_pic_height_in_luma_samples -
> pps->pps_conf_win_top_offset  - pps->pps_conf_win_bottom_offset;
> > +    ctx->pict_type    = get_pict_type(pu);
> > +    ctx->format       = get_format(sps);
> > +    avctx->profile  = sps->profile_tier_level.general_profile_idc;
> > +    avctx->level    = sps->profile_tier_level.general_level_idc;
> > +
> > +
> > +
> > +    if(sps->sps_ptl_dpb_hrd_params_present_flag &&
> sps->sps_timing_hrd_params_present_flag) {
> > +        num = sps->sps_general_timing_hrd_parameters.num_units_in_tick;
> > +        den = sps->sps_general_timing_hrd_parameters.time_scale;
> > +    } else {
> > +        av_log(avctx, AV_LOG_INFO, "No
> sps_timing_hrd_params_present_flag in sps, the fps may not right.\n");
>
> No need for this warning.
>
> > +        goto end;
> > +    }
> > +    if (num != 0 && den != 0)
> > +        av_reduce(&avctx->framerate.den, &avctx->framerate.num,
> > +                  num, den, 1 << 30);
> > +end:
> > +
> > +    ff_cbs_fragment_reset(pu);
> > +    s->cbc->log_ctx = NULL;
> > +    return 0;
> > +}
> > +
> > +static int h266_parser_parse(AVCodecParserContext *s, AVCodecContext
> *avctx,
> > +                      const uint8_t **poutbuf, int *poutbuf_size,
> > +                      const uint8_t *buf, int buf_size)
> > +{
> > +    int next;
> > +    H266ParserContext *ctx = s->priv_data;
> > +    ParseContext *pc = &ctx->pc;
> > +    int is_dummy_buf = !buf_size;
> > +    const uint8_t *dummy_buf = buf;
> > +
> > +    if (avctx->extradata && !ctx->parsed_extradata) {
> > +        av_log(avctx, AV_LOG_INFO, "extra data is not supported
> yet.\n");
> > +        return AVERROR_PATCHWELCOME;
> > +    }
> > +
> > +    if (s->flags & PARSER_FLAG_COMPLETE_FRAMES) {
> > +        next = buf_size;
> > +    } else {
> > +        next = find_frame_end(s, buf, buf_size);
> > +        if (ff_combine_frame(pc, next, &buf, &buf_size) < 0) {
> > +            *poutbuf      = NULL;
> > +            *poutbuf_size = 0;
> > +            return buf_size;
> > +        }
> > +    }
> > +
> > +    is_dummy_buf &= (dummy_buf == buf);
> > +
> > +    if (!is_dummy_buf)
> > +        parse_nal_units(s, buf, buf_size, avctx);
> > +
> > +    *poutbuf      = buf;
> > +    *poutbuf_size = buf_size;
> > +    return next;
> > +
> > +}
> > +
> > +static const CodedBitstreamUnitType decompose_unit_types[] = {
> > +    H266_NAL_TRAIL,
> > +    H266_NAL_STSA,
> > +    H266_NAL_RADL,
> > +    H266_NAL_RASL,
> > +    H266_NAL_IDR_W_RADL,
> > +    H266_NAL_IDR_N_LP,
> > +    H266_NAL_CRA_NUT,
> > +    H266_NAL_GDR_NUT,
> > +    H266_NAL_VPS,
> > +    H266_NAL_SPS,
> > +    H266_NAL_PPS,
> > +    H266_NAL_PH,
> > +    H266_NAL_AUD,
> > +};
> > +
> > +static av_cold int h266_parser_init(AVCodecParserContext *ctx)
> > +{
> > +    H266ParserContext *s = ctx->priv_data;
> > +    int ret;
> > +
> > +    ret = ff_cbs_init(&s->cbc, AV_CODEC_ID_VVC, NULL);
> > +    if (ret < 0)
> > +        return ret;
> > +
> > +    s->cbc->decompose_unit_types    = (CodedBitstreamUnitType
> *)decompose_unit_types;
> > +    s->cbc->nb_decompose_unit_types =
> FF_ARRAY_ELEMS(decompose_unit_types);
> > +
> > +    return 0;
> > +}
> > +
> > +static void h266_parser_close(AVCodecParserContext *ctx)
> > +{
> > +    H266ParserContext *s = ctx->priv_data;
> > +
> > +    ff_cbs_fragment_free(&s->picture_unit);
> > +    ff_cbs_close(&s->cbc);
> > +    av_freep(&s->pc.buffer);
> > +}
> > +
> > +AVCodecParser ff_h266_parser = {
> > +    .codec_ids      = { AV_CODEC_ID_VVC },
> > +    .priv_data_size = sizeof(H266ParserContext),
> > +    .parser_init    = h266_parser_init,
> > +    .parser_close   = h266_parser_close,
> > +    .parser_parse   = h266_parser_parse,
> > +};
> > diff --git a/libavcodec/parsers.c b/libavcodec/parsers.c
> > index 83271d95a3..155aec71aa 100644
> > --- a/libavcodec/parsers.c
> > +++ b/libavcodec/parsers.c
> > @@ -49,6 +49,7 @@ extern AVCodecParser ff_gsm_parser;
> >   extern AVCodecParser ff_h261_parser;
> >   extern AVCodecParser ff_h263_parser;
> >   extern AVCodecParser ff_h264_parser;
> > +extern AVCodecParser ff_h266_parser;
> >   extern AVCodecParser ff_hevc_parser;
> >   extern AVCodecParser ff_ipu_parser;
> >   extern AVCodecParser ff_jpeg2000_parser;
> >
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel at ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request at ffmpeg.org with subject "unsubscribe".


More information about the ffmpeg-devel mailing list