[FFmpeg-devel] [PATCH v2 06/11] avcodec: add cbs for h266/vvc

Mon Jan 11 22:45:11 EET 2021

On 10/01/2021 09:10, Nuo Mi wrote:
> On Sun, Jan 10, 2021 at 5:34 AM Mark Thompson <sw at jkqxz.net> wrote:
>> On 09/01/2021 07:34, Nuo Mi wrote:
>>> ---
>>>    configure                             |    2 +
>>>    libavcodec/Makefile                   |    1 +
>>>    libavcodec/cbs.c                      |    6 +
>>>    libavcodec/cbs_h2645.c                |  373 ++++
>>>    libavcodec/cbs_h266.h                 |  840 ++++++++
>>>    libavcodec/cbs_h266_syntax_template.c | 2761 +++++++++++++++++++++++++
>>>    libavcodec/cbs_internal.h             |    3 +-
>>>    7 files changed, 3985 insertions(+), 1 deletion(-)
>>>    create mode 100644 libavcodec/cbs_h266.h
>>>    create mode 100644 libavcodec/cbs_h266_syntax_template.c
>>>
>>> ...
>>> @@ -920,6 +934,135 @@ static int
>> cbs_h265_read_nal_unit(CodedBitstreamContext *ctx,
>>>        return 0;
>>>    }
>>>
>>> +static int cbs_h266_replace_ph(CodedBitstreamContext *ctx,
>>> +                               CodedBitstreamUnit *unit)
>>> +{
>>> +    CodedBitstreamH266Context *priv = ctx->priv_data;
>>> +    int err;
>>> +    err = ff_cbs_make_unit_refcounted(ctx, unit);
>>> +    if (err < 0)
>>> +        return err;
>>> +    av_buffer_unref(&priv->ph_ref);
>>> +    av_assert0(unit->content_ref);
>>> +    priv->ph_ref = av_buffer_ref(unit->content_ref);
>>> +    if (!priv->ph_ref)
>>> +        return AVERROR(ENOMEM);
>>> +    priv->active_ph = priv->ph = (H266RawPH *)priv->ph_ref->data;
>>
>> Why are there too variables here?  They seem to always be the same.
>>
>    priv->active_ph is read-only, priv->ph is writeable pointer. I can change
> to priv->ph only if you prefer.

Reading more carefully, H.266 doesn't appear to have the concept of a parameter set being "active" any more (the term never appears).

Does it actually follow the same rules as H.26[45]?  Is there some newer terminology which we should be using in all of this?

>>> +    return 0;
>>> +}
>>> +
>>> ...
>>> diff --git a/libavcodec/cbs_h266_syntax_template.c
>> b/libavcodec/cbs_h266_syntax_template.c
>>> new file mode 100644
>>> index 0000000000..6a6defc8a5
>>> --- /dev/null
>>> +++ b/libavcodec/cbs_h266_syntax_template.c
>>> @@ -0,0 +1,2761 @@
>>> +/*
>>> + * This file is part of FFmpeg.
>>> + *
>>> + * FFmpeg is free software; you can redistribute it and/or
>>> + * modify it under the terms of the GNU Lesser General Public
>>> + * License as published by the Free Software Foundation; either
>>> + * version 2.1 of the License, or (at your option) any later version.
>>> + *
>>> + * FFmpeg is distributed in the hope that it will be useful,
>>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>>> + * Lesser General Public License for more details.
>>> + *
>>> + * You should have received a copy of the GNU Lesser General Public
>>> + * License along with FFmpeg; if not, write to the Free Software
>>> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
>> 02110-1301 USA
>>> + */
>>> +
>>> +#ifndef H266_CEIL
>>> +#define H266_CEIL(v, a) (((v) + (a) - 1) / (a))
>>> +#endif
>>
>> If it's useful to have this separately then put it in cbs_h2645.c rather
>> than doubled in the template.  (See for example cbs_av1_tile_log2().)
>>
> It's protected by ifdef, not doubled.

It would still make more sense to make it a(n inline?) function in cbs_h2645.c.  The template file is intended to contain the standard template code and not other stuff.

>>> +
>>> ...
>>> +
>>> +static int FUNC(vui_parameters)(CodedBitstreamContext *ctx, RWContext
>> *rw,
>>> +                                H266RawVUI *current)
>>> +{
>>> +    int err;
>>> +
>>> +    flag(vui_progressive_source_flag);
>>> +    flag(vui_interlaced_source_flag);
>>> +    flag(vui_non_packed_constraint_flag);
>>> +    flag(vui_non_projected_constraint_flag);
>>> +    flag(vui_aspect_ratio_info_present_flag);
>>> +    if (current->vui_aspect_ratio_info_present_flag) {
>>> +        flag(vui_aspect_ratio_constant_flag);
>>> +        ub(8, vui_aspect_ratio_idc);
>>> +        if (current->vui_aspect_ratio_idc == 255) {
>>> +            ub(16, vui_sar_width);
>>> +            ub(16, vui_sar_height);
>>> +        }
>>> +    } else {
>>> +        infer(vui_aspect_ratio_constant_flag, 0);
>>> +        infer(vui_aspect_ratio_idc, 0);
>>> +    }
>>> +    flag(vui_overscan_info_present_flag);
>>> +    if (current->vui_overscan_info_present_flag)
>>> +        flag(vui_overscan_appropriate_flag);
>>> +    flag(vui_colour_description_present_flag);
>>> +    if (current->vui_colour_description_present_flag) {
>>> +        ub(8, vui_colour_primaries);
>>> +        ub(8, vui_transfer_characteristics);
>>> +        ub(8, vui_matrix_coeffs);
>>> +        flag(vui_full_range_flag);
>>> +    } else {
>>> +        infer(vui_colour_primaries, 2);
>>> +        infer(vui_transfer_characteristics, 2);
>>> +        infer(vui_matrix_coeffs, 2);
>>> +        infer(vui_full_range_flag, 0);
>>> +    }
>>> +    flag(vui_chroma_loc_info_present_flag);
>>> +    if (current->vui_chroma_loc_info_present_flag) {
>>> +        if (current->vui_progressive_source_flag &&
>>> +            !current->vui_interlaced_source_flag) {
>>> +            ue(vui_chroma_sample_loc_type_frame, 0, 6);
>>> +        } else {
>>> +            ue(vui_chroma_sample_loc_type_top_field, 0, 6);
>>> +            ue(vui_chroma_sample_loc_type_bottom_field,  0, 6);
>>> +        }
>>> +    }
>>
>> These are inferred as 6 when not present?
>>
> 6 only happened when ChromaFormatIdc = 1, others are not defined.
> and 6 is the unspecific value in the spec...
> Do we really need to infer it :)

To match the colour description cases probably yes?

>>> +
>>> +    return 0;
>>> +}
>>> +
>>> ...
>>> +
>>> +static int FUNC(sps_subpic)(CodedBitstreamContext *ctx, RWContext *rw,
>>> +                     H266RawSPS *current)
>>
>> This function doesn't actually exist, and all splitting it out seems to be
>> doing is making the template look less like the standard?
>>
> VVC is more complex than hevc. If we do not use some sub-functions. It will
> be too large. not friendly for reader and indent.

Feel free to ask the standard committee to split up their large functions.

Until that happens, please keep the functions the same as they are in the current standard.

>>> +{
>>> ...
>>> +}
>>> +
>>> +static int FUNC(sps)(CodedBitstreamContext *ctx, RWContext *rw,
>>> +                     H266RawSPS *current)
>>> +{
>>> ...
>>> +
>>> +    u(2, sps_num_extra_ph_bytes, 0, 2);
>>> +    for (i = 0; i < (current->sps_num_extra_ph_bytes * 8); i++) {
>>> +        flags(sps_extra_ph_bit_present_flag[i], 1, i);
>>> +    }
>>> +
>>> +    u(2, sps_num_extra_sh_bytes, 0, 0);
>>
>> Between zero and zero?
>>
> For the current version of spec, yes.
> Spec always says we need to open for some bits/values.
> But, from my personal experience, when we have other values in the reserved
> one. We usually cant correctly decode a stream.
> So, instead of ignoring the value, another approach is to report the error
> for it, so we can get the bitstream earlier and fix it.

If that's always the case then the standard people would have failed at their jobs.  What would the point of including these blocks even be if it were going to be incompatible anyway?  They could just introduce new syntax elements wherever they like in that case.

(Also the text for this is identical to sps_num_extra_ph_bytes, which is [0, 2].)

>>> +    for (i = 0; i < (current->sps_num_extra_sh_bytes * 8); i++) {
>>> +        flags(sps_extra_sh_bit_present_flag[i], 1, i);
>>> +    }
>>> +
>>> +    ...
>>> +
>>> +    flag(sps_affine_enabled_flag);
>>> +    if (current->sps_affine_enabled_flag) {
>>> +        ue(sps_five_minus_max_num_subblock_merge_cand,
>>> +           0, 5 - current->sps_sbtmvp_enabled_flag);
>>> +        flag(sps_6param_affine_enabled_flag);
>>> +        if (current->sps_amvr_enabled_flag)
>>> +            flag(sps_affine_amvr_enabled_flag);
>>> +        else
>>> +            infer(sps_affine_amvr_enabled_flag, 0);
>>> +        flag(sps_affine_prof_enabled_flag);
>>> +        if (current->sps_affine_prof_enabled_flag)
>>> +            flag(sps_prof_control_present_in_ph_flag);
>>> +        else
>>> +            infer(sps_prof_control_present_in_ph_flag, 0);
>>> +    } else {
>>> +        infer(sps_five_minus_max_num_subblock_merge_cand, 0);
>>
>> This inference isn't right - there is also a temporal subblock merge
>> candidate.
>>
> How to get it? Spec did not define how to infer
> sps_five_minus_max_num_subblock_merge_cand,
> I just guess it from sps_6param_affine_enabled_flag

The text doesn't offer an inference, and MaxNumSubblockMergeCand is defined without needing any inferred value in 7.4.3.8 (85).

>>> +        infer(sps_6param_affine_enabled_flag, 0);
>>> +        infer(sps_affine_amvr_enabled_flag, 0);
>>> +        infer(sps_affine_prof_enabled_flag, 0);
>>> +        infer(sps_prof_control_present_in_ph_flag, 0);
>>> +    }
>>> +
>>> ...
>>> +
>>> +static int FUNC(slice_header)(CodedBitstreamContext *ctx, RWContext *rw,
>>> +                              H266RawSliceHeader *current)
>>> +{
>>> +    CodedBitstreamH266Context *h266 = ctx->priv_data;
>>> +    const H266RawSPS *sps;
>>> +    const H266RawPPS *pps;
>>> +    const H266RawPH  *ph;
>>> +    const H266RefPicLists *ref_pic_lists;
>>> +    int      err, i;
>>> +    uint8_t  nal_unit_type, qp_bd_offset;
>>> +    uint16_t curr_subpic_idx;
>>> +    uint16_t num_slices_in_subpic;
>>> +
>>> +    HEADER("Slice Header");
>>> +
>>> +    CHECK(FUNC(nal_unit_header)(ctx, rw, &current->nal_unit_header,
>> -1));
>>> +
>>> +    flag(sh_picture_header_in_slice_header_flag);
>>> +    if (current->sh_picture_header_in_slice_header_flag){
>>> +        CHECK(FUNC(picture_header)(ctx, rw,
>> &current->sh_picture_header));
>>> +        if (!h266->ph_ref) {
>>> +            h266->ph_ref = av_buffer_allocz(sizeof(H266RawPH));
>>> +            if (!h266->ph_ref)
>>> +                return AVERROR(ENOMEM);
>>> +            h266->active_ph = h266->ph = (H266RawPH*)h266->ph_ref->data;
>>> +        }
>>> +        memcpy(h266->ph, &current->sh_picture_header,
>> sizeof(H266RawPH));
>>> +    }
>>> +    sps = h266->active_sps;
>>> +    pps = h266->active_pps;
>>> +    ph  = h266->active_ph;
>>
>> This is going to do something horrible if you have a frame where you lose
>> the NAL unit containing the picture header but it still has other slices.
>>
>> Can we detect and avoid that case?
>>
> In this case, we will have a parser to split the frames, the parser code
> will make sure it always has a picture header in every frame.

It has to work anyway, though.  The parser need not be run by the user, and indeed they won't if the input comes from something like RTP.

>>> +
>>> ...

- Mark