[FFmpeg-devel] [PATCH] Update HDR10+ metadata structure.
Mark Thompson
sw at jkqxz.net
Sun Feb 9 18:31:13 EET 2020
On 07/02/2020 17:46, Mohammad Izadi wrote:
> From: Mohammad Izadi <izadi at google.com>
>
> Trying to read HDR10+ metadata from HEVC/SEI and pass it to packet side data in the follow-up CLs.
> ---
> libavutil/hdr_dynamic_metadata.c | 165 +++++++++++++++++++++++++++++++
> libavutil/hdr_dynamic_metadata.h | 12 ++-
> libavutil/version.h | 2 +-
> 3 files changed, 177 insertions(+), 2 deletions(-)
>
> diff --git a/libavutil/hdr_dynamic_metadata.c b/libavutil/hdr_dynamic_metadata.c
> index 0fa1ee82de..a988bcd2d5 100644
> --- a/libavutil/hdr_dynamic_metadata.c
> +++ b/libavutil/hdr_dynamic_metadata.c
> @@ -19,8 +19,17 @@
> */
>
> #include "hdr_dynamic_metadata.h"
> +#include "libavcodec/get_bits.h"
> #include "mem.h"
>
> +static const int64_t luminance_den = 1;
The int64_t looks very shady - am I missing some special integer promotion behaviour here? (Note that AVRational num/den are int.)
> +static const int32_t peak_luminance_den = 15;
> +static const int64_t rgb_den = 100000;
> +static const int32_t fraction_pixel_den = 1000;
> +static const int32_t knee_point_den = 4095;
> +static const int32_t bezier_anchor_den = 1023;
> +static const int32_t saturation_weight_den = 8;
It would probably be clearer just to put these constants inline; there isn't really any use to having the values standalone.
> +
> AVDynamicHDRPlus *av_dynamic_hdr_plus_alloc(size_t *size)
> {
> AVDynamicHDRPlus *hdr_plus = av_mallocz(sizeof(AVDynamicHDRPlus));
> @@ -45,3 +54,159 @@ AVDynamicHDRPlus *av_dynamic_hdr_plus_create_side_data(AVFrame *frame)
>
> return (AVDynamicHDRPlus *)side_data->data;
> }
> +
> +int av_dynamic_hdr_plus_decode(const uint8_t *data, size_t size,
> + AVDynamicHDRPlus *s)
Why is this function being added to libavutil? It looks like it's meant for decoding UDR SEI messages only, so it should probably be in the relevant place in libavcodec.
> +{
> + int w, i, j;
> + GetBitContext gb;
> + if (!data)
> + return AVERROR_INVALIDDATA;
> +
> + int ret = init_get_bits8(&gb, data, size);
> + if (ret < 0)
> + return AVERROR_INVALIDDATA;
> +
> + if (get_bits_left(&gb) < 2)
> + return AVERROR_INVALIDDATA;
> + s->num_windows = get_bits(&gb, 2);
> +
> + if (s->num_windows < 1 || s->num_windows > 3)
> + return AVERROR_INVALIDDATA;
> +
> + if (get_bits_left(&gb) < ((19 * 8 + 1) * (s->num_windows - 1)))
> + return AVERROR_INVALIDDATA;
> + for (w = 1; w < s->num_windows; w++) {
> + s->params[w].window_upper_left_corner_x.num = get_bits(&gb, 16);
> + s->params[w].window_upper_left_corner_y.num = get_bits(&gb, 16);
> + s->params[w].window_lower_right_corner_x.num = get_bits(&gb, 16);
> + s->params[w].window_lower_right_corner_y.num = get_bits(&gb, 16);
> + // The corners are set to absolute coordinates here. They should be
> + // converted to the relative coordinates (in [0, 1]) in the decoder.
> + s->params[w].window_upper_left_corner_x.den = 1;
> + s->params[w].window_upper_left_corner_y.den = 1;
> + s->params[w].window_lower_right_corner_x.den = 1;
> + s->params[w].window_lower_right_corner_y.den = 1;
> +
> + s->params[w].center_of_ellipse_x = get_bits(&gb, 16);
> + s->params[w].center_of_ellipse_y = get_bits(&gb, 16);
> + s->params[w].rotation_angle = get_bits(&gb, 8);
You're range-checking some fields here but not others. Should everything be verified against the constraints so that the comments on the structure in the header file are actually true?
> + s->params[w].semimajor_axis_internal_ellipse = get_bits(&gb, 16);
> + s->params[w].semimajor_axis_external_ellipse = get_bits(&gb, 16);
> + s->params[w].semiminor_axis_external_ellipse = get_bits(&gb, 16);
> + s->params[w].overlap_process_option = get_bits1(&gb);
> + }
> +
> + if (get_bits_left(&gb) < 28)
> + return AVERROR(EINVAL);
I think these should consistently be INVALIDDATA (the data is incorrect, not the caller's use of the function).
> + s->targeted_system_display_maximum_luminance.num = get_bits(&gb, 27);
> + s->targeted_system_display_maximum_luminance.den = luminance_den;
> + s->targeted_system_display_actual_peak_luminance_flag = get_bits1(&gb);
> +
> + if (s->targeted_system_display_actual_peak_luminance_flag) {
> + int rows, cols;
> + if (get_bits_left(&gb) < 10)
> + return AVERROR(EINVAL);
> + rows = get_bits(&gb, 5);
> + cols = get_bits(&gb, 5);
> + if (((rows < 2) || (rows > 25)) || ((cols < 2) || (cols > 25)))
> + return AVERROR_INVALIDDATA;
> +
> + s->num_rows_targeted_system_display_actual_peak_luminance = rows;
> + s->num_cols_targeted_system_display_actual_peak_luminance = cols;
> +
> + if (get_bits_left(&gb) < (rows * cols * 4))
> + return AVERROR(EINVAL);
> +
> + for (i = 0; i < rows; i++) {
> + for (j = 0; j < cols; j++) {
> + s->targeted_system_display_actual_peak_luminance[i][j].num = get_bits(&gb, 4);
> + s->targeted_system_display_actual_peak_luminance[i][j].den = peak_luminance_den;
> + }
> + }
> + }
> + for (w = 0; w < s->num_windows; w++) {
> + if (get_bits_left(&gb) < (3 * 17 + 17 + 4))
> + return AVERROR(EINVAL);
> + for (i = 0; i < 3; i++) {
> + s->params[w].maxscl[i].num = get_bits(&gb, 17);
> + s->params[w].maxscl[i].den = rgb_den;
> + }
> + s->params[w].average_maxrgb.num = get_bits(&gb, 17);
> + s->params[w].average_maxrgb.den = rgb_den;
> + s->params[w].num_distribution_maxrgb_percentiles = get_bits(&gb, 4);
> +
> + if (get_bits_left(&gb) <
> + (s->params[w].num_distribution_maxrgb_percentiles * 24))
> + return AVERROR(EINVAL);
> + for (i = 0; i < s->params[w].num_distribution_maxrgb_percentiles; i++) {
> + s->params[w].distribution_maxrgb[i].percentage = get_bits(&gb, 7);
> + s->params[w].distribution_maxrgb[i].percentile.num = get_bits(&gb, 17);
> + s->params[w].distribution_maxrgb[i].percentile.den = rgb_den;
> + }
> +
> + if (get_bits_left(&gb) < 10)
> + return AVERROR(EINVAL);
> + s->params[w].fraction_bright_pixels.num = get_bits(&gb, 10);
> + s->params[w].fraction_bright_pixels.den = fraction_pixel_den;
> + }
> + if (get_bits_left(&gb) < 1)
> + return AVERROR(EINVAL);
> + s->mastering_display_actual_peak_luminance_flag = get_bits1(&gb);
> + if (s->mastering_display_actual_peak_luminance_flag) {
> + int rows, cols;
> + if (get_bits_left(&gb) < 10)
> + return AVERROR(EINVAL);
> + rows = get_bits(&gb, 5);
> + cols = get_bits(&gb, 5);
> + if (((rows < 2) || (rows > 25)) || ((cols < 2) || (cols > 25)))
> + return AVERROR_INVALIDDATA;
> +
> + s->num_rows_mastering_display_actual_peak_luminance = rows;
> + s->num_cols_mastering_display_actual_peak_luminance = cols;
> +
> + if (get_bits_left(&gb) < (rows * cols * 4))
> + return AVERROR(EINVAL);
> +
> + for (i = 0; i < rows; i++) {
> + for (j = 0; j < cols; j++) {
> + s->mastering_display_actual_peak_luminance[i][j].num = get_bits(&gb, 4);
> + s->mastering_display_actual_peak_luminance[i][j].den = peak_luminance_den;
> + }
> + }
> + }
> +
> + for (w = 0; w < s->num_windows; w++) {
> + if (get_bits_left(&gb) < 1)
> + return AVERROR(EINVAL);
> + s->params[w].tone_mapping_flag = get_bits1(&gb);
> + if (s->params[w].tone_mapping_flag) {
> + if (get_bits_left(&gb) < 28)
> + return AVERROR(EINVAL);
> + s->params[w].knee_point_x.num = get_bits(&gb, 12);
> + s->params[w].knee_point_x.den = knee_point_den;
> + s->params[w].knee_point_y.num = get_bits(&gb, 12);
> + s->params[w].knee_point_y.den = knee_point_den;
> + s->params[w].num_bezier_curve_anchors = get_bits(&gb, 4);
> +
> + if (get_bits_left(&gb) < (s->params[w].num_bezier_curve_anchors * 10))
> + return AVERROR(EINVAL);
> + for (i = 0; i < s->params[w].num_bezier_curve_anchors; i++) {
> + s->params[w].bezier_curve_anchors[i].num = get_bits(&gb, 10);
> + s->params[w].bezier_curve_anchors[i].den = bezier_anchor_den;
> + }
> + }
> +
> + if (get_bits_left(&gb) < 1)
> + return AVERROR(EINVAL);
> + s->params[w].color_saturation_mapping_flag = get_bits1(&gb);
> + if (s->params[w].color_saturation_mapping_flag) {
> + if (get_bits_left(&gb) < 6)
> + return AVERROR(EINVAL);
> + s->params[w].color_saturation_weight.num = get_bits(&gb, 6);
> + s->params[w].color_saturation_weight.den = saturation_weight_den;
> + }
> + }
> +
It might be sensible to set the unused members of the structure to something fixed rather than leaving them uninitialised, so that attempting to copy the structure isn't UB.
> + return 0;
> +}
> diff --git a/libavutil/hdr_dynamic_metadata.h b/libavutil/hdr_dynamic_metadata.h
> index 2d72de56ae..28c657481f 100644
> --- a/libavutil/hdr_dynamic_metadata.h
> +++ b/libavutil/hdr_dynamic_metadata.h
> @@ -265,7 +265,7 @@ typedef struct AVDynamicHDRPlus {
>
> /**
> * The nominal maximum display luminance of the targeted system display,
> - * in units of 0.0001 candelas per square metre. The value shall be in
> + * in units of 1 candelas per square metre. The value shall be in
This was a error in the spec itself, I think? It should probably be isolated into its own commit with suitable references explaining what happened.
> * the range of 0 to 10000, inclusive.
> */
> AVRational targeted_system_display_maximum_luminance;
> @@ -340,4 +340,14 @@ AVDynamicHDRPlus *av_dynamic_hdr_plus_alloc(size_t *size);
> */
> AVDynamicHDRPlus *av_dynamic_hdr_plus_create_side_data(AVFrame *frame);
>
> +/**
> + * Decode SMPTE ST 2094-40 (the user data registered ITU-T T.35) to AVDynamicHDRPlus.
> + * @param data The user data registered ITU-T T.35 (SMPTE ST 2094-40).
I think you need to be a bit clearer which part of the data this wants. Is this the entirety of the user_data_registered_itu_t_t35() block, the itu_t_t35_payload_byte[] array, or something else?
> + * @param size The size of the user data registered ITU-T T.35 (SMPTE ST 2094-40).
> + * @param s The decoded AVDynamicHDRPlus structure.
> + *
> + * @return 0 if succeed. Otherwise, returns the appropriate AVERROR.
> + */
> +int av_dynamic_hdr_plus_decode(const uint8_t *data, size_t size, AVDynamicHDRPlus *s);
> +
> ...
It would help to review if this were included in the decoder so that it can actually be tested.
- Mark
More information about the ffmpeg-devel
mailing list