[FFmpeg-devel] [PATCH v7 03/11] avcodec: add bitstream parser for H266/VVC

Andreas Rheinhardt andreas.rheinhardt at outlook.com
Thu Jun 29 23:35:47 EEST 2023


Thomas Siedel:
> From: Nuo Mi <nuomi2021 at gmail.com>
> 
> Add nal parser ff_vvc_parser to parse vvc elementary bitstreams.
> 
> Co-authored-by: Thomas Siedel <thomas.ff at spin-digital.com>
> ---
>  configure                |   1 +
>  libavcodec/Makefile      |   2 +
>  libavcodec/h2645_parse.c |  71 ++++-
>  libavcodec/h266_parser.c | 601 +++++++++++++++++++++++++++++++++++++++
>  libavcodec/parsers.c     |   1 +
>  5 files changed, 675 insertions(+), 1 deletion(-)
>  create mode 100644 libavcodec/h266_parser.c
> 
> diff --git a/configure b/configure
> index 91964edc8f..2fb167c7c3 100755
> --- a/configure
> +++ b/configure
> @@ -3266,6 +3266,7 @@ av1_amf_encoder_deps="amf"
>  aac_parser_select="adts_header mpeg4audio"
>  av1_parser_select="cbs_av1"
>  h264_parser_select="golomb h264dsp h264parse h264_sei"
> +h266_parser_select="cbs_h266"
>  hevc_parser_select="hevcparse hevc_sei"
>  mpegaudio_parser_select="mpegaudioheader"
>  mpeg4video_parser_select="h263dsp mpegvideodec qpeldsp"
> diff --git a/libavcodec/Makefile b/libavcodec/Makefile
> index abae4909d2..79c4363f3d 100644
> --- a/libavcodec/Makefile
> +++ b/libavcodec/Makefile
> @@ -76,6 +76,7 @@ OBJS-$(CONFIG_CBS)                     += cbs.o cbs_bsf.o
>  OBJS-$(CONFIG_CBS_AV1)                 += cbs_av1.o
>  OBJS-$(CONFIG_CBS_H264)                += cbs_h2645.o cbs_sei.o h2645_parse.o
>  OBJS-$(CONFIG_CBS_H265)                += cbs_h2645.o cbs_sei.o h2645_parse.o
> +OBJS-$(CONFIG_CBS_H266)                += cbs_h2645.o cbs_sei.o h2645_parse.o
>  OBJS-$(CONFIG_CBS_JPEG)                += cbs_jpeg.o
>  OBJS-$(CONFIG_CBS_MPEG2)               += cbs_mpeg2.o
>  OBJS-$(CONFIG_CBS_VP9)                 += cbs_vp9.o
> @@ -1166,6 +1167,7 @@ OBJS-$(CONFIG_GSM_PARSER)              += gsm_parser.o
>  OBJS-$(CONFIG_H261_PARSER)             += h261_parser.o
>  OBJS-$(CONFIG_H263_PARSER)             += h263_parser.o
>  OBJS-$(CONFIG_H264_PARSER)             += h264_parser.o h264data.o
> +OBJS-$(CONFIG_H266_PARSER)             += h266_parser.o
>  OBJS-$(CONFIG_HEVC_PARSER)             += hevc_parser.o hevc_data.o
>  OBJS-$(CONFIG_HDR_PARSER)              += hdr_parser.o
>  OBJS-$(CONFIG_IPU_PARSER)              += ipu_parser.o
> diff --git a/libavcodec/h2645_parse.c b/libavcodec/h2645_parse.c
> index 90944177c7..5261c3e568 100644
> --- a/libavcodec/h2645_parse.c
> +++ b/libavcodec/h2645_parse.c
> @@ -30,6 +30,7 @@
>  #include "hevc.h"
>  #include "h264.h"
>  #include "h2645_parse.h"
> +#include "h266.h"
>  
>  int ff_h2645_extract_rbsp(const uint8_t *src, int length,
>                            H2645RBSP *rbsp, H2645NAL *nal, int small_padding)
> @@ -145,6 +146,47 @@ nsc:
>      return si;
>  }
>  
> +static const char *const h266_nal_type_name[32] = {
> +    "TRAIL_NUT", // VVC_TRAIL_NUT
> +    "STSA_NUT", // VVC_STSA_NUT
> +    "RADL_NUT", // VVC_RADL_NUT
> +    "RASL_NUT", // VVC_RASL_NUT
> +    "RSV_VCL_4", // VVC_RSV_VCL_4
> +    "RSV_VCL_5", // VVC_RSV_VCL_5
> +    "RSV_VCL_6", // VVC_RSV_VCL_6
> +    "IDR_W_RADL", // VVC_IDR_W_RADL
> +    "IDR_N_LP", // VVC_IDR_N_LP
> +    "CRA_NUT", // VVC_CRA_NUT
> +    "GDR_NUT", // VVC_GDR_NUT
> +    "RSV_IRAP_11", // VVC_RSV_IRAP_11
> +    "OPI_NUT", // VVC_OPI_NUT
> +    "DCI_NUT", // VVC_DCI_NUT
> +    "VPS_NUT", // VVC_VPS_NUT
> +    "SPS_NUT", // VVC_SPS_NUT
> +    "PPS_NUT", // VVC_PPS_NUT
> +    "PREFIX_APS_NUT",// VVC_PREFIX_APS_NUT
> +    "SUFFIX_APS_NUT",// VVC_SUFFIX_APS_NUT
> +    "PH_NUT", // VVC_PH_NUT
> +    "AUD_NUT", // VVC_AUD_NUT
> +    "EOS_NUT", // VVC_EOS_NUT
> +    "EOB_NUT", // VVC_EOB_NUT
> +    "PREFIX_SEI_NUT",// VVC_PREFIX_SEI_NUT
> +    "SUFFIX_SEI_NUT",// VVC_SUFFIX_SEI_NUT
> +    "FD_NUT", // VVC_FD_NUT
> +    "RSV_NVCL_26", // VVC_RSV_NVCL_26
> +    "RSV_NVCL_27", // VVC_RSV_NVCL_27
> +    "UNSPEC_28", // VVC_UNSPEC_28
> +    "UNSPEC_29", // VVC_UNSPEC_29
> +    "UNSPEC_30", // VVC_UNSPEC_30
> +    "UNSPEC_31", // VVC_UNSPEC_31
> +};
> +
> +static const char *h266_nal_unit_name(int nal_type)
> +{
> +    av_assert0(nal_type >= 0 && nal_type < 32);
> +    return h266_nal_type_name[nal_type];
> +}
> +
>  static const char *const hevc_nal_type_name[64] = {
>      "TRAIL_N", // HEVC_NAL_TRAIL_N
>      "TRAIL_R", // HEVC_NAL_TRAIL_R
> @@ -293,6 +335,31 @@ static int get_bit_length(H2645NAL *nal, int min_size, int skip_trailing_zeros)
>   * @return AVERROR_INVALIDDATA if the packet is not a valid NAL unit,
>   * 0 otherwise
>   */
> +static int h266_parse_nal_header(H2645NAL *nal, void *logctx)
> +{
> +    GetBitContext *gb = &nal->gb;
> +
> +    if (get_bits1(gb) != 0)     //forbidden_zero_bit
> +        return AVERROR_INVALIDDATA;
> +
> +    skip_bits1(gb);             //nuh_reserved_zero_bit
> +
> +    nal->nuh_layer_id = get_bits(gb, 6);
> +    nal->type = get_bits(gb, 5);
> +    nal->temporal_id = get_bits(gb, 3) - 1;
> +    if (nal->temporal_id < 0)
> +        return AVERROR_INVALIDDATA;
> +
> +    if ((nal->type >= VVC_IDR_W_RADL && nal->type <= VVC_RSV_IRAP_11) && nal->temporal_id)
> +        return AVERROR_INVALIDDATA;
> +
> +    av_log(logctx, AV_LOG_DEBUG,
> +      "nal_unit_type: %d(%s), nuh_layer_id: %d, temporal_id: %d\n",
> +           nal->type, h266_nal_unit_name(nal->type), nal->nuh_layer_id, nal->temporal_id);
> +
> +    return 0;
> +}
> +
>  static int hevc_parse_nal_header(H2645NAL *nal, void *logctx)
>  {
>      GetBitContext *gb = &nal->gb;
> @@ -509,7 +576,9 @@ int ff_h2645_packet_split(H2645Packet *pkt, const uint8_t *buf, int length,
>          /* Reset type in case it contains a stale value from a previously parsed NAL */
>          nal->type = 0;
>  
> -        if (codec_id == AV_CODEC_ID_HEVC)
> +        if (codec_id == AV_CODEC_ID_VVC)
> +            ret = h266_parse_nal_header(nal, logctx);
> +        else if (codec_id == AV_CODEC_ID_HEVC)
>              ret = hevc_parse_nal_header(nal, logctx);
>          else
>              ret = h264_parse_nal_header(nal, logctx);
> diff --git a/libavcodec/h266_parser.c b/libavcodec/h266_parser.c
> new file mode 100644
> index 0000000000..fbf8177261
> --- /dev/null
> +++ b/libavcodec/h266_parser.c
> @@ -0,0 +1,601 @@
> +/*
> + * H.266 / VVC parser
> + *
> + * Copyright (C) 2021 Nuo Mi <nuomi2021 at gmail.com>
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +#include "cbs.h"
> +#include "cbs_h266.h"
> +#include "internal.h"
> +#include "parser.h"
> +#include "decode.h"
> +
> +#define START_CODE 0x000001 ///< start_code_prefix_one_3bytes
> +#define IS_IDR(nut)   (nut == VVC_IDR_W_RADL || nut == VVC_IDR_N_LP)
> +
> +typedef struct PuInfo {
> +    AVBufferRef *sps_ref;
> +    AVBufferRef *pps_ref;
> +    AVBufferRef *slice_ref;
> +    AVBufferRef *ph_ref;
> +
> +    const H266RawPPS *pps;
> +    const H266RawSPS *sps;
> +    const H266RawPH *ph;
> +    const H266RawSlice *slice;
> +    int pic_type;
> +} PuInfo;
> +
> +typedef struct AuDetector {
> +    uint8_t prev_layer_id;
> +    int prev_tid0_poc;
> +    int prev_poc;
> +} AuDetector;
> +
> +typedef struct H266ParserContext {
> +    ParseContext pc;
> +    CodedBitstreamContext *cbc;
> +
> +    CodedBitstreamFragment picture_unit;
> +
> +    PuInfo   au_info;
> +    AVPacket au;
> +    AVPacket last_au;
> +
> +    AuDetector au_detector;
> +
> +    int parsed_extradata;
> +} H266ParserContext;
> +
> +static const enum AVPixelFormat pix_fmts_8bit[] = {
> +    AV_PIX_FMT_GRAY8, AV_PIX_FMT_YUV420P,
> +    AV_PIX_FMT_YUV422P, AV_PIX_FMT_YUV444P
> +};
> +
> +static const enum AVPixelFormat pix_fmts_10bit[] = {
> +    AV_PIX_FMT_GRAY10, AV_PIX_FMT_YUV420P10,
> +    AV_PIX_FMT_YUV422P10, AV_PIX_FMT_YUV444P10
> +};
> +
> +static int get_format(const H266RawSPS *sps)
> +{
> +    switch (sps->sps_bitdepth_minus8) {
> +    case 0:
> +        return pix_fmts_8bit[sps->sps_chroma_format_idc];
> +    case 2:
> +        return pix_fmts_10bit[sps->sps_chroma_format_idc];
> +    }
> +    return AV_PIX_FMT_NONE;
> +}
> +
> +/**
> + * Find the end of the current frame in the bitstream.
> + * @return the position of the first byte of the next frame, or END_NOT_FOUND
> + */
> +static int find_frame_end(AVCodecParserContext *s, const uint8_t *buf,
> +                          int buf_size)
> +{
> +    H266ParserContext *ctx = s->priv_data;
> +    ParseContext *pc = &ctx->pc;
> +    int i;
> +
> +    for (i = 0; i < buf_size; i++) {
> +        int nut, code_len;
> +
> +        pc->state64 = (pc->state64 << 8) | buf[i];
> +
> +        if (((pc->state64 >> 3 * 8) & 0xFFFFFF) != START_CODE)
> +            continue;

Reading one byte at a time is the slowest way to search for a startcode.
We have special functions for that.

> +
> +        code_len = ((pc->state64 >> 3 * 8) & 0xFFFFFFFF) == 0x01 ? 4 : 3;
> +
> +        nut = (pc->state64 >> (8 + 3)) & 0x1F;
> +        // 7.4.2.4.3 and 7.4.2.4.4
> +        if ((nut >= VVC_OPI_NUT && nut <= VVC_PREFIX_APS_NUT &&
> +             nut != VVC_PH_NUT) || nut == VVC_AUD_NUT
> +            || (nut == VVC_PREFIX_SEI_NUT && !pc->frame_start_found)
> +            || nut == VVC_RSV_NVCL_26 || nut == VVC_UNSPEC_28
> +            || nut == VVC_UNSPEC_29) {
> +            if (pc->frame_start_found) {
> +                pc->frame_start_found = 0;
> +                return i - (code_len + 2);
> +            }
> +        } else if (nut == VVC_PH_NUT || IS_H266_SLICE(nut)) {
> +            int sh_picture_header_in_slice_header_flag = buf[i] >> 7;
> +
> +            if (nut == VVC_PH_NUT || sh_picture_header_in_slice_header_flag) {
> +                if (!pc->frame_start_found) {
> +                    pc->frame_start_found = 1;
> +                } else {        // First slice of next frame found
> +                    pc->frame_start_found = 0;
> +                    return i - (code_len + 2);
> +                }
> +            }
> +        }
> +    }
> +    return END_NOT_FOUND;
> +}
> +
> +static int get_pict_type(const CodedBitstreamFragment *pu)
> +{
> +    int has_p = 0;
> +    for (int i = 0; i < pu->nb_units; i++) {
> +        CodedBitstreamUnit *unit = &pu->units[i];
> +        if (IS_H266_SLICE(unit->type)) {
> +            const H266RawSlice *slice = unit->content;
> +            uint8_t type = slice->header.sh_slice_type;
> +            if (type == VVC_SLICE_TYPE_B) {
> +                return AV_PICTURE_TYPE_B;
> +            }
> +            if (type == VVC_SLICE_TYPE_P) {
> +                has_p = 1;
> +            }
> +        }
> +    }
> +    return has_p ? AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_I;
> +}
> +
> +static void pu_info_unref(PuInfo *info)
> +{
> +    av_buffer_unref(&info->slice_ref);
> +    av_buffer_unref(&info->ph_ref);
> +    av_buffer_unref(&info->pps_ref);
> +    av_buffer_unref(&info->sps_ref);
> +    info->slice = NULL;
> +    info->ph = NULL;
> +    info->pps = NULL;
> +    info->sps = NULL;
> +    info->pic_type = AV_PICTURE_TYPE_NONE;
> +}
> +
> +static int pu_info_ref(PuInfo *dest, const PuInfo *src)
> +{
> +    pu_info_unref(dest);
> +    dest->sps_ref = av_buffer_ref(src->sps_ref);
> +    dest->pps_ref = av_buffer_ref(src->pps_ref);
> +    if (src->ph_ref)
> +        dest->ph_ref = av_buffer_ref(src->ph_ref);
> +    dest->slice_ref = av_buffer_ref(src->slice_ref);
> +    if (!dest->sps_ref || !dest->pps_ref || (src->ph_ref && !dest->ph_ref)
> +        || !dest->slice_ref) {
> +        pu_info_unref(dest);
> +        return AVERROR(ENOMEM);
> +    }
> +
> +    dest->sps = src->sps;
> +    dest->pps = src->pps;
> +    dest->ph = src->ph;
> +    dest->slice = src->slice;
> +    dest->pic_type = src->pic_type;
> +    return 0;
> +}
> +
> +static int set_parser_ctx(AVCodecParserContext *s, AVCodecContext *avctx,
> +                          const PuInfo *pu)
> +{
> +    int ret, num = 0, den = 0;
> +    static const uint8_t h266_sub_width_c[] = {
> +        1, 2, 2, 1
> +    };
> +    static const uint8_t h266_sub_height_c[] = {
> +        1, 2, 1, 1
> +    };
> +    const H266RawSPS *sps = pu->sps;
> +    const H266RawPPS *pps = pu->pps;
> +    //const H266RawPH  *ph  = pu->ph;
> +    const H266RawNALUnitHeader *nal = &pu->slice->header.nal_unit_header;
> +
> +    /* set some sane default values */
> +    s->pict_type         = AV_PICTURE_TYPE_I;
> +    s->key_frame         = 0;
> +    s->picture_structure = AV_PICTURE_STRUCTURE_FRAME;
> +
> +    s->key_frame = nal->nal_unit_type == VVC_IDR_W_RADL ||
> +                   nal->nal_unit_type == VVC_IDR_N_LP ||
> +                   nal->nal_unit_type == VVC_CRA_NUT ||
> +                   nal->nal_unit_type == VVC_GDR_NUT;
> +
> +    s->coded_width  = pps->pps_pic_width_in_luma_samples;
> +    s->coded_height = pps->pps_pic_height_in_luma_samples;
> +    s->width = pps->pps_pic_width_in_luma_samples -
> +        (pps->pps_conf_win_left_offset + pps->pps_conf_win_right_offset) *
> +        h266_sub_width_c[sps->sps_chroma_format_idc];
> +    s->height = pps->pps_pic_height_in_luma_samples -
> +        (pps->pps_conf_win_top_offset + pps->pps_conf_win_bottom_offset) *
> +        h266_sub_height_c[sps->sps_chroma_format_idc];;
> +    s->pict_type = pu->pic_type;
> +    s->format = get_format(sps);
> +
> +    avctx->profile = sps->profile_tier_level.general_profile_idc;
> +    avctx->level = sps->profile_tier_level.general_level_idc;
> +
> +    avctx->colorspace = (enum AVColorSpace) sps->vui.vui_matrix_coeffs;
> +    avctx->color_primaries = (enum AVColorPrimaries) sps->vui.vui_colour_primaries;
> +    avctx->color_trc = (enum AVColorTransferCharacteristic) sps->vui.vui_transfer_characteristics;
> +    avctx->color_range =
> +        sps->vui.vui_full_range_flag ? AVCOL_RANGE_JPEG : AVCOL_RANGE_MPEG;
> +
> +    if (s->width != avctx->width || s->height != avctx->height) {
> +        ret = ff_set_dimensions(avctx, s->width, s->height);
> +        if (ret < 0)
> +            return ret;
> +    }
> +    avctx->pix_fmt = s->format;
> +    avctx->has_b_frames = (sps->sps_max_sublayers_minus1 + 1) > 2 ? 2 :
> +                           sps->sps_max_sublayers_minus1;
> +    avctx->max_b_frames = sps->sps_max_sublayers_minus1;
> +
> +    if (sps->sps_ptl_dpb_hrd_params_present_flag &&
> +        sps->sps_timing_hrd_params_present_flag) {
> +        num = sps->sps_general_timing_hrd_parameters.num_units_in_tick;
> +        den = sps->sps_general_timing_hrd_parameters.time_scale;
> +    } else {
> +        return 1;
> +    }
> +    if (num != 0 && den != 0)
> +        av_reduce(&avctx->framerate.den, &avctx->framerate.num,
> +                  num, den, 1 << 30);
> +
> +    if (avctx->framerate.num)
> +        avctx->time_base = av_inv_q(av_mul_q(avctx->framerate, (AVRational) {
> +                                             avctx->ticks_per_frame, 1} ));
> +
> +    return 1;
> +}
> +
> +static int set_ctx(AVCodecParserContext *s, AVCodecContext *avctx,
> +                   const PuInfo *next_pu)
> +{
> +    H266ParserContext *ctx = s->priv_data;
> +    int ret;
> +    if (ctx->au_info.slice) {
> +        if ((ret = set_parser_ctx(s, avctx, &ctx->au_info)) < 0)
> +            return ret;
> +    }
> +    ret = pu_info_ref(&ctx->au_info, next_pu);
> +    return ret;
> +}
> +
> +//8.3.1 Decoding process for picture order count.
> +//VTM did not follow the spec, and it's much simpler than spec.
> +//We follow the VTM.
> +static void get_slice_poc(H266ParserContext *s, int *poc,
> +                          const H266RawSPS *sps,
> +                          const H266RawPH *ph,
> +                          const H266RawSliceHeader *slice, void *log_ctx)
> +{
> +    int poc_msb, max_poc_lsb, poc_lsb;
> +    AuDetector *d = &s->au_detector;
> +    max_poc_lsb = 1 << (sps->sps_log2_max_pic_order_cnt_lsb_minus4 + 4);
> +    poc_lsb = ph->ph_pic_order_cnt_lsb;
> +    if (IS_IDR(slice->nal_unit_header.nal_unit_type)) {
> +        if (ph->ph_poc_msb_cycle_present_flag)
> +            poc_msb = ph->ph_poc_msb_cycle_val * max_poc_lsb;
> +        else
> +            poc_msb = 0;
> +    } else {
> +        int prev_poc = d->prev_tid0_poc;
> +        int prev_poc_lsb = prev_poc & (max_poc_lsb - 1);
> +        int prev_poc_msb = prev_poc - prev_poc_lsb;
> +        if (ph->ph_poc_msb_cycle_present_flag) {
> +            poc_msb = ph->ph_poc_msb_cycle_val * max_poc_lsb;
> +        } else {
> +            if ((poc_lsb < prev_poc_lsb) && ((prev_poc_lsb - poc_lsb) >=
> +                (max_poc_lsb / 2)))
> +                poc_msb = prev_poc_msb + max_poc_lsb;
> +            else if ((poc_lsb > prev_poc_lsb) && ((poc_lsb - prev_poc_lsb) >
> +                     (max_poc_lsb / 2)))
> +                poc_msb = prev_poc_msb - max_poc_lsb;
> +            else
> +                poc_msb = prev_poc_msb;
> +        }
> +    }
> +
> +    *poc = poc_msb + poc_lsb;
> +}
> +
> +static void au_detector_init(AuDetector *d)
> +{
> +    d->prev_layer_id = UINT8_MAX;
> +    d->prev_poc = INT_MAX;
> +    d->prev_tid0_poc = INT_MAX;
> +}
> +
> +static int is_au_start(H266ParserContext *s, const PuInfo *pu, void *log_ctx)
> +{
> +    //7.4.2.4.3
> +    AuDetector *d = &s->au_detector;
> +    const H266RawSPS *sps = pu->sps;
> +    const H266RawNALUnitHeader *nal = &pu->slice->header.nal_unit_header;
> +    const H266RawPH *ph = pu->ph;
> +    const H266RawSlice *slice = pu->slice;
> +    int ret, poc, nut;
> +
> +    get_slice_poc(s, &poc, sps, ph, &slice->header, log_ctx);
> +
> +    ret = (nal->nuh_layer_id <= d->prev_layer_id) || (poc != d->prev_poc);
> +
> +    nut = nal->nal_unit_type;
> +    d->prev_layer_id = nal->nuh_layer_id;
> +    d->prev_poc = poc;
> +    if (nal->nuh_temporal_id_plus1 == 1 &&
> +        !ph->ph_non_ref_pic_flag && nut != VVC_RADL_NUT
> +        && nut != VVC_RASL_NUT) {
> +        d->prev_tid0_poc = poc;
> +    }
> +    return ret;
> +}
> +
> +static int get_pu_info(PuInfo *info, const CodedBitstreamH266Context *h266,
> +                       const CodedBitstreamFragment *pu, void *logctx)
> +{
> +    const H266RawNALUnitHeader *nal;
> +    int ret;
> +
> +    memset(info, 0, sizeof(*info));
> +    for (int i = 0; i < pu->nb_units; i++) {
> +        nal = pu->units[i].content;
> +        if (!nal)
> +            continue;
> +        if ( nal->nal_unit_type == VVC_PH_NUT ) {
> +            info->ph = pu->units[i].content;
> +            info->ph_ref = pu->units[i].content_ref;
> +        } else if (IS_H266_SLICE(nal->nal_unit_type)) {
> +            info->slice = pu->units[i].content;
> +            info->slice_ref = pu->units[i].content_ref;
> +            if (info->slice->header.sh_picture_header_in_slice_header_flag)
> +                info->ph = &info->slice->header.sh_picture_header;
> +            if (!info->ph) {
> +                av_log(logctx, AV_LOG_ERROR,
> +                       "can't find picture header in picture unit.\n");
> +                ret = AVERROR_INVALIDDATA;
> +                goto error;
> +            }
> +            break;
> +        }
> +    }
> +    if (!info->slice) {
> +        av_log(logctx, AV_LOG_ERROR, "can't find slice in picture unit.\n");
> +        ret = AVERROR_INVALIDDATA;
> +        goto error;
> +    }
> +    info->pps = h266->pps[info->ph->ph_pic_parameter_set_id];
> +    if (!info->pps) {
> +        av_log(logctx, AV_LOG_ERROR, "PPS id %d is not avaliable.\n",
> +               info->ph->ph_pic_parameter_set_id);
> +        ret = AVERROR_INVALIDDATA;
> +        goto error;
> +    }
> +    info->pps_ref = h266->pps_ref[info->ph->ph_pic_parameter_set_id];

Copying AVBufferRef* is not nice, although I see that it is correct here.

> +    info->sps = h266->sps[info->pps->pps_seq_parameter_set_id];
> +    if (!info->sps) {
> +        av_log(logctx, AV_LOG_ERROR, "SPS id %d is not avaliable.\n",
> +               info->pps->pps_seq_parameter_set_id);
> +        ret = AVERROR_INVALIDDATA;
> +        goto error;
> +    }
> +    info->sps_ref = h266->sps_ref[info->pps->pps_seq_parameter_set_id];
> +    info->pic_type = get_pict_type(pu);
> +    return 0;
> +  error:
> +    memset(info, 0, sizeof(*info));
> +    return ret;
> +}
> +
> +static int append_au(AVPacket *pkt, const uint8_t *buf, int buf_size)
> +{
> +    int offset = pkt->size;
> +    int ret;
> +    if ((ret = av_grow_packet(pkt, buf_size)) < 0)
> +        goto end;
> +    memcpy(pkt->data + offset, buf, buf_size);
> +  end:
> +    return ret;
> +}
> +
> +/**
> + * Parse NAL units of found picture and decode some basic information.
> + *
> + * @param s parser context.
> + * @param avctx codec context.
> + * @param buf buffer with field/frame data.
> + * @param buf_size size of the buffer.
> + * @return < 0 for error, == 0 for a complete au, > 0 is not a completed au.
> + */
> +static int parse_nal_units(AVCodecParserContext *s, const uint8_t *buf,
> +                           int buf_size, AVCodecContext *avctx)
> +{
> +    H266ParserContext *ctx = s->priv_data;
> +    const CodedBitstreamH266Context *h266 = ctx->cbc->priv_data;
> +
> +    CodedBitstreamFragment *pu = &ctx->picture_unit;
> +    int ret;
> +    PuInfo info;
> +
> +    if (!buf_size) {
> +        if (ctx->au.size) {
> +            if ((ret = av_packet_ref(&ctx->last_au, &ctx->au)) < 0)
> +                goto end;
> +            av_packet_unref(&ctx->au);
> +            return 0;
> +        }
> +        return 1;
> +    }
> +
> +    if ((ret = ff_cbs_read(ctx->cbc, pu, buf, buf_size)) < 0) {
> +        av_log(avctx, AV_LOG_ERROR, "Failed to parse picture unit.\n");
> +        goto end;
> +    }
> +    if ((ret = get_pu_info(&info, h266, pu, avctx)) < 0)
> +        goto end;
> +    if (append_au(&ctx->au, buf, buf_size) < 0){

Why is this done unconditionally, even with PARSER_FLAG_COMPLETE_FRAMES?

> +        ret = AVERROR(ENOMEM);
> +        goto end;
> +    }
> +    if (is_au_start(ctx, &info, avctx)) {

Why are you checking again? Does this mean that find_frame_end() is not
correct?

> +        if ((ret = set_ctx(s, avctx, &info)) < 0)
> +            goto end;
> +        if ((ret = av_packet_ref(&ctx->last_au, &ctx->au)) < 0)
> +            goto end;
> +        av_packet_unref(&ctx->au);
> +    } else {
> +        ret = 1; //not a completed au
> +    }
> +  end:
> +    ff_cbs_fragment_reset(pu);
> +    return ret;
> +}
> +
> +/**
> + * Combine PU to AU
> + *
> + * @param s parser context.
> + * @param avctx codec context.
> + * @param buf buffer to a PU.
> + * @param buf_size size of the buffer.
> + * @return < 0 for error, == 0 a complete au, > 0 not a completed au.
> + */
> +static int combine_au(AVCodecParserContext *s, AVCodecContext *avctx,
> +                      const uint8_t **buf, int *buf_size)
> +{
> +    H266ParserContext *ctx = s->priv_data;
> +    int ret;
> +
> +    ctx->cbc->log_ctx = avctx;
> +
> +    av_packet_unref(&ctx->last_au);
> +    ret = parse_nal_units(s, *buf, *buf_size, avctx);
> +    if (ret == 0) {
> +        if (ctx->last_au.size) {
> +            *buf = ctx->last_au.data;
> +            *buf_size = ctx->last_au.size;
> +        } else {
> +            ret = 1; //no output
> +        }
> +    }
> +    ctx->cbc->log_ctx = NULL;
> +    return ret;
> +}
> +
> +static int h266_parser_parse(AVCodecParserContext *s, AVCodecContext *avctx,
> +                             const uint8_t **poutbuf, int *poutbuf_size,
> +                             const uint8_t *buf, int buf_size)
> +{
> +    int next, ret;
> +    H266ParserContext *ctx = s->priv_data;
> +    ParseContext *pc = &ctx->pc;
> +    CodedBitstreamFragment *pu = &ctx->picture_unit;
> +
> +    int is_dummy_buf = !buf_size;
> +    int flush = !buf_size;
> +    const uint8_t *dummy_buf = buf;

Your dummy_buf might not be a dummy_buf at all; better call it
"orig_buf" or so.

> +
> +    if (avctx->extradata_size && !ctx->parsed_extradata) {
> +        ctx->parsed_extradata = 1;
> +
> +        ret = ff_cbs_read_extradata_from_codec(ctx->cbc, pu, avctx);
> +        if (ret < 0)
> +            av_log(avctx, AV_LOG_WARNING, "Failed to parse extradata.\n");
> +
> +        ff_cbs_fragment_reset(pu);
> +    }
> +
> +    if (s->flags & PARSER_FLAG_COMPLETE_FRAMES) {
> +        next = buf_size;
> +    } else {
> +        next = find_frame_end(s, buf, buf_size);
> +        if (ff_combine_frame(pc, next, &buf, &buf_size) < 0)
> +            goto no_out;
> +    }
> +
> +    is_dummy_buf &= (dummy_buf == buf);
> +
> +    if (!is_dummy_buf) {

I don't really understand this. The above check is only relevant if we
are flushing; but if we are flushing, then we should always analyze
everything we have. It seems to me that the above check is actually
meant to be equivalent to "if (buf_size)". (If
PARSER_FLAG_COMPLETE_FRAMES is set, then we have no leftover stuff from
last time and is_dummy_buf is 1 for flushing; if
PARSER_FLAG_COMPLETE_FRAMES is not true, then find_frame_end() won't
find an end in our 0-sized buffer, but ff_combine_frame() will (as a
special case when parsing) return a value >= 0 and the line above will
make is_dummy_buf 0, so we analyze the leftover.)

> +        ret = combine_au(s, avctx, &buf, &buf_size);
> +        if (ret > 0 && flush) {
> +            buf_size = 0;
> +            ret = combine_au(s, avctx, &buf, &buf_size);
> +        }
> +        if (ret != 0) {
> +            buf_size = next;
> +            goto no_out;
> +        }
> +    }
> +
> +    *poutbuf = buf;
> +    *poutbuf_size = buf_size;
> +    return next;
> +  no_out:
> +    *poutbuf = NULL;
> +    *poutbuf_size = 0;
> +    return buf_size;
> +}
> +
> +static const CodedBitstreamUnitType decompose_unit_types[] = {
> +    VVC_TRAIL_NUT,
> +    VVC_STSA_NUT,
> +    VVC_RADL_NUT,
> +    VVC_RASL_NUT,
> +    VVC_IDR_W_RADL,
> +    VVC_IDR_N_LP,
> +    VVC_CRA_NUT,
> +    VVC_GDR_NUT,
> +    VVC_VPS_NUT,
> +    VVC_SPS_NUT,
> +    VVC_PPS_NUT,
> +    VVC_PH_NUT,
> +    VVC_AUD_NUT,
> +};
> +
> +static av_cold int h266_parser_init(AVCodecParserContext *s)
> +{
> +    H266ParserContext *ctx = s->priv_data;
> +    int ret;
> +
> +    ret = ff_cbs_init(&ctx->cbc, AV_CODEC_ID_VVC, NULL);
> +    if (ret < 0)
> +        return ret;
> +    au_detector_init(&ctx->au_detector);
> +
> +    ctx->cbc->decompose_unit_types    = decompose_unit_types;
> +    ctx->cbc->nb_decompose_unit_types = FF_ARRAY_ELEMS(decompose_unit_types);
> +
> +    return ret;
> +}
> +
> +static void h266_parser_close(AVCodecParserContext *s)
> +{
> +    H266ParserContext *ctx = s->priv_data;
> +
> +    pu_info_unref(&ctx->au_info);
> +    av_packet_unref(&ctx->au);
> +    av_packet_unref(&ctx->last_au);
> +    ff_cbs_fragment_free(&ctx->picture_unit);
> +
> +    ff_cbs_close(&ctx->cbc);
> +    av_freep(&ctx->pc.buffer);
> +}
> +
> +AVCodecParser ff_h266_parser = {
> +    .codec_ids      = { AV_CODEC_ID_VVC },
> +    .priv_data_size = sizeof(H266ParserContext),
> +    .parser_init    = h266_parser_init,
> +    .parser_close   = h266_parser_close,
> +    .parser_parse   = h266_parser_parse,
> +};
> diff --git a/libavcodec/parsers.c b/libavcodec/parsers.c
> index d355808018..aa2043ee1b 100644
> --- a/libavcodec/parsers.c
> +++ b/libavcodec/parsers.c
> @@ -50,6 +50,7 @@ extern const AVCodecParser ff_gsm_parser;
>  extern const AVCodecParser ff_h261_parser;
>  extern const AVCodecParser ff_h263_parser;
>  extern const AVCodecParser ff_h264_parser;
> +extern const AVCodecParser ff_h266_parser;
>  extern const AVCodecParser ff_hevc_parser;
>  extern const AVCodecParser ff_hdr_parser;
>  extern const AVCodecParser ff_ipu_parser;



More information about the ffmpeg-devel mailing list