[FFmpeg-devel] [PATCH v9 4/6] lavc: Implement Dolby Vision RPU parsing

Sun Jan 2 14:45:46 EET 2022

On 1/2/2022 1:18 AM, Andreas Rheinhardt wrote:
> Niklas Haas:
>> From: Niklas Haas <git at haasn.dev>
>>
>> Based on a mixture of guesswork, partial documentation in patents, and
>> reverse engineering of real-world samples. Confirmed working for all the
>> samples I've thrown at it.
>>
>> Contains some annoying machinery to persist these values in between
>> frames, which is needed in theory even though I've never actually seen a
>> sample that relies on it in practice. May or may not work.
>>
>> Since the distinction matters greatly for parsing the color matrix
>> values, this includes a small helper function to guess the right profile
>> from the RPU itself in case the user has forgotten to forward the dovi
>> configuration record to the decoder. (Which in practice, only ffmpeg.c
>> and ffplay do..)
>>
>> Notable omissions / deviations:
>> - CRC32 verification. This is based on the MPEG2 CRC32 type, which does
>>    not seem to be implemented in lavu. (And I don't care enough to do so)
>> - Linear interpolation support. Nothing documents this (beyond its
>>    existence) and no samples use it, so impossible to implement.
>> - All of the extension metadata blocks, but these contain values that
>>    seem largely congruent with ST2094, HDR10, or other existing forms of
>>    side data, so I will defer parsing/attaching them to a future commit.
>> - The patent describes a mechanism for predicting coefficients from
>>    previous RPUs, but the bit for the flag whether to use the
>>    prediction deltas or signal entirely new coefficients does not seem to
>>    be present in actual RPUs, so we ignore this subsystem entirely.
>> - In the patent's spec, the NLQ subsystem also loops over
>>    num_nlq_pivots, but even in the patent the number is hard-coded to one
>>    iteration rather than signalled. So we only store one set of coefs.
>>
>> Heavily influenced by https://github.com/quietvoid/dovi_tool
>> Documentation drawn from US Patent 10,701,399 B2 and ETSI GS CCM 001
>>
>> Signed-off-by: Niklas Haas <git at haasn.dev>
>> ---
>>   configure             |   2 +
>>   libavcodec/Makefile   |   1 +
>>   libavcodec/dovi_rpu.c | 430 ++++++++++++++++++++++++++++++++++++++++++
>>   libavcodec/dovi_rpu.h |  71 +++++++
>>   4 files changed, 504 insertions(+)
>>   create mode 100644 libavcodec/dovi_rpu.c
>>   create mode 100644 libavcodec/dovi_rpu.h
>>
>> diff --git a/configure b/configure
>> index 0ccd3bda11..68658a847f 100755
>> --- a/configure
>> +++ b/configure
>> @@ -2434,6 +2434,7 @@ CONFIG_EXTRA="
>>       cbs_vp9
>>       dirac_parse
>>       dnn
>> +    dovi_rpu
>>       dvprofile
>>       exif
>>       faandct
>> @@ -2706,6 +2707,7 @@ cbs_mpeg2_select="cbs"
>>   cbs_vp9_select="cbs"
>>   dct_select="rdft"
>>   dirac_parse_select="golomb"
>> +dovi_rpu_select="golomb"
>>   dnn_suggest="libtensorflow libopenvino"
>>   dnn_deps="avformat swscale"
>>   error_resilience_select="me_cmp"
>> diff --git a/libavcodec/Makefile b/libavcodec/Makefile
>> index fb90ecea84..7364c7a91f 100644
>> --- a/libavcodec/Makefile
>> +++ b/libavcodec/Makefile
>> @@ -77,6 +77,7 @@ OBJS-$(CONFIG_CBS_MPEG2)               += cbs_mpeg2.o
>>   OBJS-$(CONFIG_CBS_VP9)                 += cbs_vp9.o
>>   OBJS-$(CONFIG_CRYSTALHD)               += crystalhd.o
>>   OBJS-$(CONFIG_DCT)                     += dct.o dct32_fixed.o dct32_float.o
>> +OBJS-$(CONFIG_DOVI_RPU)                += dovi_rpu.o
>>   OBJS-$(CONFIG_ERROR_RESILIENCE)        += error_resilience.o
>>   OBJS-$(CONFIG_EXIF)                    += exif.o tiff_common.o
>>   OBJS-$(CONFIG_FAANDCT)                 += faandct.o
>> diff --git a/libavcodec/dovi_rpu.c b/libavcodec/dovi_rpu.c
>> new file mode 100644
>> index 0000000000..fc2e1fb2a1
>> --- /dev/null
>> +++ b/libavcodec/dovi_rpu.c
>> @@ -0,0 +1,430 @@
>> +/*
>> + * Dolby Vision RPU decoder
>> + *
>> + * Copyright (C) 2021 Jan Ekström
>> + * Copyright (C) 2021 Niklas Haas
>> + *
>> + * This file is part of FFmpeg.
>> + *
>> + * FFmpeg is free software; you can redistribute it and/or
>> + * modify it under the terms of the GNU Lesser General Public
>> + * License as published by the Free Software Foundation; either
>> + * version 2.1 of the License, or (at your option) any later version.
>> + *
>> + * FFmpeg is distributed in the hope that it will be useful,
>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>> + * Lesser General Public License for more details.
>> + *
>> + * You should have received a copy of the GNU Lesser General Public
>> + * License along with FFmpeg; if not, write to the Free Software
>> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
>> + */
>> +
>> +#include "libavutil/buffer.h"
>> +
>> +#include "dovi_rpu.h"
>> +#include "golomb.h"
>> +#include "get_bits.h"
>> +
>> +enum {
>> +    RPU_COEFF_FIXED = 0,
>> +    RPU_COEFF_FLOAT = 1,
>> +};
>> +
>> +/**
>> + * Private contents of vdr_ref.
>> + */
>> +typedef struct DOVIVdrRef {
>> +    AVDOVIDataMapping mapping;
>> +    AVDOVIColorMetadata color;
>> +} DOVIVdrRef;
>> +
>> +void ff_dovi_ctx_unref(DOVIContext *s)
>> +{
>> +    for (int i = 0; i < FF_ARRAY_ELEMS(s->vdr_ref); i++)
>> +        av_buffer_unref(&s->vdr_ref[i]);
>> +
>> +    /* Preserve the user-provided fields explicitly, reset everything else */
>> +    *s = (DOVIContext) {
>> +        .logctx = s->logctx,
>> +        .config = s->config,
>> +    };
>> +}
>> +
>> +int ff_dovi_ctx_replace(DOVIContext *s, const DOVIContext *s0)
>> +{
>> +    int ret;
>> +    s->logctx = s0->logctx;
>> +    s->config = s0->config;
>> +    s->mapping = s0->mapping;
>> +    s->color = s0->color;
>> +    for (int i = 0; i < DOVI_MAX_DM_ID; i++) {
>> +        if ((ret = av_buffer_replace(&s->vdr_ref[i], s0->vdr_ref[i])) < 0)
>> +            goto fail;
>> +    }
>> +
>> +    return 0;
>> +
>> +fail:
>> +    ff_dovi_ctx_unref(s);
>> +    return ret;
>> +}
>> +
>> +int ff_dovi_attach_side_data(DOVIContext *s, AVFrame *frame)
>> +{
>> +    AVFrameSideData *sd;
>> +    AVBufferRef *buf;
>> +    AVDOVIMetadata *dovi;
>> +    size_t dovi_size;
>> +
>> +    if (!s->mapping || !s->color)
>> +        return 0; /* incomplete dovi metadata */
>> +
>> +    dovi = av_dovi_metadata_alloc(&dovi_size);
>> +    if (!dovi)
>> +        return AVERROR(ENOMEM);
>> +
>> +    buf = av_buffer_create((uint8_t *) dovi, dovi_size, NULL, NULL, 0);
>> +    if (!buf) {
>> +        av_free(dovi);
>> +        return AVERROR(ENOMEM);
>> +    }
>> +
>> +    sd = av_frame_new_side_data_from_buf(frame, AV_FRAME_DATA_DOVI_METADATA, buf);
>> +    if (!sd) {
>> +        av_buffer_unref(&buf);
>> +        return AVERROR(ENOMEM);
>> +    }
>> +
>> +    memcpy(av_dovi_get_header(dovi), &s->header, sizeof(s->header));
>> +    memcpy(av_dovi_get_mapping(dovi), s->mapping, sizeof(*s->mapping));
>> +    memcpy(av_dovi_get_color(dovi), s->color, sizeof(*s->color));
> 
> These are potentially problematic due to trailing padding in the
> structures. This trailing padding might become valid fields in newer
> versions of this structure. E.g. av_dovi_metadata_alloc() might
> intentionally set the default values of these new fields to invalid
> values. Yet lavc (with the old structure) sees them as padding, allowing
> the compiler to trash these values on every write to these structures.
> More likely though is that they will never be touched at all and
> therefore retain their default (zero) initialization. Which might
> conflict with the default value of these fields.
> This can be fixed by only copying everything up to and including the
> last field that we know of for every of these structures. This will
> unfortunately increase the maintainence burden a bit.

I mean, both sizeof(AVDOVIRpuDataHeader) and sizeof(AVDOVIDataMapping) 
are defined as not being part of the ABI (but 
sizeof(AVDOVIColorMetadata) is? Is that intended?), so their usage in 
this patch is wrong to begin with.

> 
>> +    return 0;
>> +}
>> +
>> +static int guess_profile(const AVDOVIRpuDataHeader *hdr)
>> +{
>> +    switch (hdr->vdr_rpu_profile) {
>> +    case 0:
>> +        if (hdr->bl_video_full_range_flag)
>> +            return 5;
>> +        break;
>> +    case 1:
>> +        if (hdr->el_spatial_resampling_filter_flag && !hdr->disable_residual_flag) {
>> +            if (hdr->vdr_bit_depth == 12) {
>> +                return 7;
>> +            } else {
>> +                return 4;
>> +            }
>> +        } else {
>> +            return 8;
>> +        }
>> +    }
>> +
>> +    return 0; /* unknown */
>> +}
>> +
>> +static inline uint64_t get_ue_coef(GetBitContext *gb, const AVDOVIRpuDataHeader *hdr)
>> +{
>> +    uint64_t ipart;
>> +    union { uint32_t u32; float f32; } fpart;
>> +
>> +    switch (hdr->coef_data_type) {
>> +    case RPU_COEFF_FIXED:
>> +        ipart = get_ue_golomb_long(gb);
>> +        fpart.u32 = get_bits_long(gb, hdr->coef_log2_denom);
>> +        return (ipart << hdr->coef_log2_denom) + fpart.u32;
> 
> Here and in get_se_coef below the f32 member of the union is not used
> with RPU_COEFF_FIXED, so using the union seems unnecessary. It is also
> slightly confusing, because upon seeing this and seeing that the union
> is used with RPU_COEFF_FIXED, I expect it to be used for type-punning.
> 
>> +
>> +    case RPU_COEFF_FLOAT:
>> +        fpart.u32 = get_bits_long(gb, 32);
>> +        return fpart.f32 * (1 << hdr->coef_log2_denom);
> 
> You could just use av_int2float() here instead of adding the union yourself.
> 
>> +    }
>> +
>> +    return 0; /* unreachable */
>> +}
>> +
>> +static inline int64_t get_se_coef(GetBitContext *gb, const AVDOVIRpuDataHeader *hdr)
>> +{
>> +    int64_t ipart;
>> +    union { uint32_t u32; float f32; } fpart;
>> +
>> +    switch (hdr->coef_data_type) {
>> +    case RPU_COEFF_FIXED:
>> +        ipart = get_se_golomb_long(gb);
>> +        fpart.u32 = get_bits_long(gb, hdr->coef_log2_denom);
>> +        return (ipart << hdr->coef_log2_denom) + fpart.u32;
>> +
>> +    case RPU_COEFF_FLOAT:
>> +        fpart.u32 = get_bits_long(gb, 32);
>> +        return fpart.f32 * (1 << hdr->coef_log2_denom);
>> +    }
>> +
>> +    return 0; /* unreachable */
>> +}
>> +
>> +#define VALIDATE(VAR, MIN, MAX)                                                 \
>> +    do {                                                                        \
>> +        if (VAR < MIN || VAR > MAX) {                                           \
>> +            av_log(s->logctx, AV_LOG_ERROR, "RPU validation failed: "           \
>> +                   #MIN" <= "#VAR" = %d <= "#MAX"\n", (int) VAR);               \
>> +            goto fail;                                                          \
>> +        }                                                                       \
>> +    } while (0)
>> +
>> +int ff_dovi_rpu_parse(DOVIContext *s, const uint8_t *rpu, size_t rpu_size)
>> +{
>> +    AVDOVIRpuDataHeader *hdr = &s->header;
>> +    GetBitContext *gb = &(GetBitContext){0};
>> +    DOVIVdrRef *vdr;
>> +    int ret;
>> +
>> +    uint8_t nal_prefix;
>> +    uint8_t rpu_type;
>> +    uint8_t vdr_seq_info_present;
>> +    uint8_t vdr_dm_metadata_present;
>> +    uint8_t use_prev_vdr_rpu;
>> +    uint8_t use_nlq;
>> +    uint8_t profile;
>> +    if ((ret = init_get_bits8(gb, rpu, rpu_size)) < 0)
> 
> Notice that rpu_size is converted to int before init_get_bits8() is
> called. (It is not problematic with the use in 6/6 because the size has
> already been checked in this case.)
> 
>> +        return ret;
>> +
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel at ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
> 
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request at ffmpeg.org with subject "unsubscribe".