[FFmpeg-devel] [PATCH] Support HDR10+ metadata for HEVC.

Jan Ekström jeebjp at gmail.com
Thu Nov 12 02:46:29 EET 2020


On 14.10.2020 2:53, Mohammad Izadi wrote:
> From: Mohammad Izadi <moh.izadi at gmail.com>
>
> HDR10+ is dynamic metadata (A/341 Amendment - SMPTE2094-40) that needs to be decoded from ITU-T T.35 in HEVC bitstream. The HDR10+ is transferred to side data packet to be used or passed through.
>
> The fate test file can be found here: https://drive.google.com/file/d/1Hadzc24-RsgnRqS-B0bxLmzDeTwEOhtE/view?usp=sharing
>
> The video file needs to be copied to fate-suite/mov/
> ---

Long overdue initial attempt at a review follows...

>  fftools/ffprobe.c                      |  16 +++
>  libavcodec/avpacket.c                  |   1 +
>  libavcodec/decode.c                    |   1 +
>  libavcodec/hevc_sei.c                  |  39 ++++--
>  libavcodec/hevc_sei.h                  |   5 +
>  libavcodec/hevcdec.c                   |  16 +++
>  libavcodec/internal.h                  |   9 ++
>  libavcodec/packet.h                    |   8 ++
>  libavcodec/utils.c                     | 180 +++++++++++++++++++++++++
>  libavcodec/version.h                   |   2 +-
>  libavfilter/vf_showinfo.c              |  28 ++++
>  tests/fate/mov.mak                     |   3 +
>  tests/ref/fate/mov-hdr10-plus-metadata |  55 ++++++++
>  13 files changed, 350 insertions(+), 13 deletions(-)
>  create mode 100644 tests/ref/fate/mov-hdr10-plus-metadata
>
> diff --git a/fftools/ffprobe.c b/fftools/ffprobe.c
> index d4e494f11f..0b80edf842 100644
> --- a/fftools/ffprobe.c
> +++ b/fftools/ffprobe.c
> @@ -35,6 +35,7 @@
>  #include "libavutil/bprint.h"
>  #include "libavutil/display.h"
>  #include "libavutil/hash.h"
> +#include "libavutil/hdr_dynamic_metadata.h"
>  #include "libavutil/mastering_display_metadata.h"
>  #include "libavutil/dovi_meta.h"
>  #include "libavutil/opt.h"
> @@ -1925,6 +1926,13 @@ static void print_pkt_side_data(WriterContext *w,
>                  print_q("min_luminance", metadata->min_luminance, '/');
>                  print_q("max_luminance", metadata->max_luminance, '/');
>              }
> +        } else if (sd->type == AV_PKT_DATA_DYNAMIC_HDR_PLUS) {
> +            AVDynamicHDRPlus *metadata = (AVDynamicHDRPlus *)sd->data;
> +            // Partially print HDR10+ metadata.
> +            print_int("num_windows", metadata->num_windows);
> +            print_q("targeted_system_display_maximum_luminance", metadata->targeted_system_display_maximum_luminance, '/');
> +            print_int("targeted_system_display_actual_peak_luminance_flag", metadata->targeted_system_display_actual_peak_luminance_flag);
> +            print_int("mastering_display_actual_peak_luminance_flag", metadata->mastering_display_actual_peak_luminance_flag);
>          } else if (sd->type == AV_PKT_DATA_CONTENT_LIGHT_LEVEL) {
>              AVContentLightMetadata *metadata = (AVContentLightMetadata *)sd->data;
>              print_int("max_content", metadata->MaxCLL);
> @@ -2250,6 +2258,14 @@ static void show_frame(WriterContext *w, AVFrame *frame, AVStream *stream,
>                      print_q("min_luminance", metadata->min_luminance, '/');
>                      print_q("max_luminance", metadata->max_luminance, '/');
>                  }
> +            } else if (sd->type == AV_FRAME_DATA_DYNAMIC_HDR_PLUS) {
> +                AVDynamicHDRPlus *metadata = (AVDynamicHDRPlus *)sd->data;
> +                // Partially print HDR10+ metadata.
> +		print_int("num_windows", metadata->num_windows);
> +		print_q("targeted_system_display_maximum_luminance", metadata->targeted_system_display_maximum_luminance, '/');
> +		print_int("targeted_system_display_actual_peak_luminance_flag", metadata->targeted_system_display_actual_peak_luminance_flag);
> +		print_int("mastering_display_actual_peak_luminance_flag", metadata->mastering_display_actual_peak_luminance_flag);
> +

Some really minor stuff:
- Empty line at the end
- Tabs vs space, seemingly some tabs ended up here. Copy and paste can make that happen. I know this has been done in this duplicated fashion so far, but maybe this should just be in a static function that takes in a AVDynamicHDRPlus pointer and could just be called from two places?
- I am not sure of the whole history of this patch set, but was there any explanation on why these specific values should be the ones from which printing should be started since we are not printing the whole (complicated) structure? I think so far our practice has been to dump all values from the structures, except for the flags for whether a thing exists or not (see the example with "has_primaries"/"has_luminance" for mastering display metadata, and the loop within S12M timecode printing for lists of things), so even if we limit initial printing to certain values, printing out flags instead of actual possible values is an interesting selection.

>              } else if (sd->type == AV_FRAME_DATA_CONTENT_LIGHT_LEVEL) {
>                  AVContentLightMetadata *metadata = (AVContentLightMetadata *)sd->data;
>                  print_int("max_content", metadata->MaxCLL);
> diff --git a/libavcodec/avpacket.c b/libavcodec/avpacket.c
> index dce26cb31a..8307032335 100644
> --- a/libavcodec/avpacket.c
> +++ b/libavcodec/avpacket.c
> @@ -394,6 +394,7 @@ const char *av_packet_side_data_name(enum AVPacketSideDataType type)
>      case AV_PKT_DATA_CONTENT_LIGHT_LEVEL:        return "Content light level metadata";
>      case AV_PKT_DATA_SPHERICAL:                  return "Spherical Mapping";
>      case AV_PKT_DATA_A53_CC:                     return "A53 Closed Captions";
> +    case AV_PKT_DATA_DYNAMIC_HDR_PLUS:           return "HDR10+ Dynamic Metadata (SMPTE 2094-40)";
>      case AV_PKT_DATA_ENCRYPTION_INIT_INFO:       return "Encryption initialization data";
>      case AV_PKT_DATA_ENCRYPTION_INFO:            return "Encryption info";
>      case AV_PKT_DATA_AFD:                        return "Active Format Description data"; diff --git a/libavcodec/decode.c b/libavcodec/decode.c index de9c079f9d..cd3286f7fb 100644 --- a/libavcodec/decode.c +++ b/libavcodec/decode.c @@ -1698,6 +1698,7 @@ int ff_decode_frame_props(AVCodecContext *avctx, AVFrame *frame) { AV_PKT_DATA_MASTERING_DISPLAY_METADATA, AV_FRAME_DATA_MASTERING_DISPLAY_METADATA }, { AV_PKT_DATA_CONTENT_LIGHT_LEVEL, AV_FRAME_DATA_CONTENT_LIGHT_LEVEL }, { AV_PKT_DATA_A53_CC, AV_FRAME_DATA_A53_CC }, + { AV_PKT_DATA_DYNAMIC_HDR_PLUS, AV_FRAME_DATA_DYNAMIC_HDR_PLUS }, { AV_PKT_DATA_ICC_PROFILE, AV_FRAME_DATA_ICC_PROFILE }, }; diff --git a/libavcodec/hevc_sei.c b/libavcodec/hevc_sei.c index a4ec65dc1a..f2fe47f7f3 100644 --- a/libavcodec/hevc_sei.c +++ b/libavcodec/hevc_sei.c @@ -25,6 +25,7 @@ #include "golomb.h"
>  #include "hevc_ps.h"
>  #include "hevc_sei.h"
> +#include "internal.h"
>  
>  static int decode_nal_sei_decoded_picture_hash(HEVCSEIPictureHash *s, GetBitContext *gb)
>  {
> @@ -242,8 +243,8 @@ static int decode_nal_sei_user_data_unregistered(HEVCSEIUnregistered *s, GetBitC
>  static int decode_nal_sei_user_data_registered_itu_t_t35(HEVCSEI *s, GetBitContext *gb,
>                                                           int size)
>  {
> -    uint32_t country_code;
> -    uint32_t user_identifier;
> +    uint8_t country_code;
> +    uint16_t provider_code;
>  
>      if (size < 7)
>          return AVERROR(EINVAL);
> @@ -255,18 +256,31 @@ static int decode_nal_sei_user_data_registered_itu_t_t35(HEVCSEI *s, GetBitConte
>          size--;
>      }
>  
> -    skip_bits(gb, 8);
> -    skip_bits(gb, 8);
> -
> -    user_identifier = get_bits_long(gb, 32);
> -
> -    switch (user_identifier) {
> -        case MKBETAG('G', 'A', '9', '4'):
> +    provider_code = get_bits(gb, 16);
> +
> +    const uint8_t usa_country_code = 0xB5;
> +    const uint16_t smpte_provider_code = 0x003C;
> +    if (country_code == usa_country_code &&
> +        provider_code == smpte_provider_code) {
> +        // A/341 Amendment – 2094-40
> +        uint16_t provider_oriented_code = get_bits(gb, 16);
> +        uint8_t application_identifier = get_bits(gb, 8);
> +        const uint16_t smpte2094_40_provider_oriented_code = 0x0001;
> +        const uint16_t smpte2094_40_application_identifier = 0x04;

I do not know what the general rule about const integers as an alternative for #defines is in FFmpeg,
but at the very least I applied the following diff during review, which did the following things:

- Alignment of things.
- Move the static defines to be first, separate them a bit from the actual parsed values.
- application_identifier seems to be uint8_t so made smpte2094_40_application_identifier such as well.
- I prefer always specifically initializing values to something known, although since the values are always written to first by get_bits
  this is relatively minor and in some cases might be considered an opinion thing.

--- a/libavcodec/hevc_sei.c
+++ b/libavcodec/hevc_sei.c
@@ -210,8 +210,11 @@ static int decode_nal_sei_user_data_unregistered(HEVCSEIUnregistered *s, GetBitC
 static int decode_nal_sei_user_data_registered_itu_t_t35(HEVCSEI *s, GetBitContext *gb,
                                                          int size)
 {
-    uint8_t country_code;
-    uint16_t provider_code;
+    const uint8_t  usa_country_code    = 0xB5;
+    const uint16_t smpte_provider_code = 0x003C;
+
+    uint8_t  country_code  = 0;
+    uint16_t provider_code = 0;
 
     if (size < 7)
         return AVERROR(EINVAL);
@@ -225,15 +228,15 @@ static int decode_nal_sei_user_data_registered_itu_t_t35(HEVCSEI *s, GetBitConte
 
     provider_code = get_bits(gb, 16);
 
-    const uint8_t usa_country_code = 0xB5;
-    const uint16_t smpte_provider_code = 0x003C;
     if (country_code == usa_country_code &&
         provider_code == smpte_provider_code) {
         // A/341 Amendment – 2094-40
-        uint16_t provider_oriented_code = get_bits(gb, 16);
-        uint8_t application_identifier = get_bits(gb, 8);
         const uint16_t smpte2094_40_provider_oriented_code = 0x0001;
-        const uint16_t smpte2094_40_application_identifier = 0x04;
+        const uint8_t  smpte2094_40_application_identifier = 0x04;
+
+        uint16_t provider_oriented_code = get_bits(gb, 16);
+        uint8_t  application_identifier = get_bits(gb, 8);
+
         if (provider_oriented_code == smpte2094_40_provider_oriented_code &&
             application_identifier == smpte2094_40_application_identifier) {
             int err = ff_read_itu_t_t35_to_dynamic_hdr_plus(gb, &s->dynamic_hdr_plus.info);
@@ -243,7 +246,7 @@ static int decode_nal_sei_user_data_registered_itu_t_t35(HEVCSEI *s, GetBitConte
             return err;
         }
     } else {
-        uint32_t  user_identifier = get_bits_long(gb, 32);
+        uint32_t user_identifier = get_bits_long(gb, 32);
         if(user_identifier == MKBETAG('G', 'A', '9', '4'))
             return decode_registered_user_data_closed_caption(&s->a53_caption, gb, size);
     }

> +        if (provider_oriented_code == smpte2094_40_provider_oriented_code &&
> +            application_identifier == smpte2094_40_application_identifier) {
> +            int err = ff_read_itu_t_t35_to_dynamic_hdr_plus(gb, &s->dynamic_hdr_plus.info);
> +            if (err < 0 && s->dynamic_hdr_plus.info) {
> +                av_buffer_unref(&s->dynamic_hdr_plus.info);
> +            }

Generally such functions (such as ff_parse_a53_cc) are defined such as that in case of error, the AVBufferRef pointer is left untouched.

That is done for exactly the reason that in case of failure, you do not want to be doing such clean-up on the calling side.

> +            return err;
> +        }
> +    } else {
> +        uint32_t  user_identifier = get_bits_long(gb, 32);
> +        if(user_identifier == MKBETAG('G', 'A', '9', '4'))
>              return decode_registered_user_data_closed_caption(&s->a53_caption, gb, size);
> -        default:
> -            skip_bits_long(gb, size * 8);
> -            break;
>      }
> +    skip_bits_long(gb, size * 8);
>      return 0;
>  }
>  
> @@ -453,4 +467,5 @@ void ff_hevc_reset_sei(HEVCSEI *s)
>          av_buffer_unref(&s->unregistered.buf_ref[i]);
>      s->unregistered.nb_buf_ref = 0;
>      av_freep(&s->unregistered.buf_ref);
> +    av_buffer_unref(&s->dynamic_hdr_plus.info);
>  }
> diff --git a/libavcodec/hevc_sei.h b/libavcodec/hevc_sei.h
> index 5ee7a4796d..e9e2d46ed4 100644
> --- a/libavcodec/hevc_sei.h
> +++ b/libavcodec/hevc_sei.h
> @@ -104,6 +104,10 @@ typedef struct HEVCSEIMasteringDisplay {
>      uint32_t min_luminance;
>  } HEVCSEIMasteringDisplay;
>  
> +typedef struct HEVCSEIDynamicHDRPlus {
> +    AVBufferRef *info;
> +} HEVCSEIDynamicHDRPlus;
> +
>  typedef struct HEVCSEIContentLight {
>      int present;
>      uint16_t max_content_light_level;
> @@ -143,6 +147,7 @@ typedef struct HEVCSEI {
>      HEVCSEIA53Caption a53_caption;
>      HEVCSEIUnregistered unregistered;
>      HEVCSEIMasteringDisplay mastering_display;
> +    HEVCSEIDynamicHDRPlus dynamic_hdr_plus;
>      HEVCSEIContentLight content_light;
>      int active_seq_parameter_set_id;
>      HEVCSEIAlternativeTransfer alternative_transfer;
> diff --git a/libavcodec/hevcdec.c b/libavcodec/hevcdec.c
> index b77df8d89f..cd794231e8 100644
> --- a/libavcodec/hevcdec.c
> +++ b/libavcodec/hevcdec.c
> @@ -2849,6 +2849,16 @@ static int set_side_data(HEVCContext *s)
>  
>          s->sei.timecode.num_clock_ts = 0;
>      }
> +    if (s->sei.dynamic_hdr_plus.info){
> +      AVBufferRef *info_ref = av_buffer_ref(s->sei.dynamic_hdr_plus.info);
> +      if (!info_ref)
> +	return AVERROR(ENOMEM);
> +
> +      if(!av_frame_new_side_data_from_buf(out, AV_FRAME_DATA_DYNAMIC_HDR_PLUS, info_ref)){
> +        av_buffer_unref(&info_ref);
> +        return AVERROR(ENOMEM);
> +      }
> +    }

Seems like more mixed tabs and spaces, also 2-space indent instead of 4?

Also while I do see what is being done with the AVBufferRefs, this is still fascinating that none of the AVBufferRef utilizing stuff that does av_frame_new_side_data_from_buf utilizes av_buffer_ref in this fashion in this function. :)

Also for this and the update_thread_context part, just add an empty line before starting the HDR10+ stuff to ease parsing where that bit starts.

>  
>      return 0;
>  }
> @@ -3530,6 +3540,12 @@ static int hevc_update_thread_context(AVCodecContext *dst,
>          if (!s->sei.a53_caption.buf_ref)
>              return AVERROR(ENOMEM);
>      }
> +    av_buffer_unref(&s->sei.dynamic_hdr_plus.info);
> +    if (s0->sei.dynamic_hdr_plus.info) {
> +        s->sei.dynamic_hdr_plus.info = av_buffer_ref(s0->sei.dynamic_hdr_plus.info);
> +        if (!s->sei.dynamic_hdr_plus.info)
> +            return AVERROR(ENOMEM);
> +    }
>  
>      s->sei.frame_packing        = s0->sei.frame_packing;
>      s->sei.display_orientation  = s0->sei.display_orientation;
> diff --git a/libavcodec/internal.h b/libavcodec/internal.h
> index 0a1c0a17ec..7ebbc5e4f9 100644
> --- a/libavcodec/internal.h
> +++ b/libavcodec/internal.h
> @@ -413,6 +413,15 @@ int ff_int_from_list_or_default(void *ctx, const char * val_name, int val,
>  
>  void ff_dvdsub_parse_palette(uint32_t *palette, const char *p);
>  
> +/**
> + * Reads and decode the user data registered ITU-T T.35 to AVbuffer (AVDynamicHDRPlus).
> + * @param gbc The bit content to be decoded.
> + * @param output A buffer containing the decoded AVDynamicHDRPlus structure.
> + *
> + * @return 0 if succeed. Otherwise, returns the appropriate AVERROR.
> + */
> +int ff_read_itu_t_t35_to_dynamic_hdr_plus(void *gbc, AVBufferRef **output);
> +

Taking a look at atsc_a53.{c,h} , is there any reason why this stuff is in the primary internal.h instead of being in a specific helper file? Also, why is the GBC being passed on as a void pointer?

Also for the doxy, I think examples for a bit better wording could be looked at from the atsc_a53.h header (also as I noted before, I would prefer such functions to not touch the AVBufferRef * until a structure was parsed).

>  #if defined(_WIN32) && CONFIG_SHARED && !defined(BUILDING_avcodec)
>  #    define av_export_avcodec __declspec(dllimport)
>  #else
> diff --git a/libavcodec/packet.h b/libavcodec/packet.h
> index 96f237f091..60fbf2c1e8 100644
> --- a/libavcodec/packet.h
> +++ b/libavcodec/packet.h
> @@ -282,6 +282,14 @@ enum AVPacketSideDataType {
>       */
>      AV_PKT_DATA_DOVI_CONF,
>  
> +    /**
> +     * HDR10+ dynamic metadata associated with a video frame. The metadata is in
> +     * the form of the AVDynamicHDRPlus struct and contains
> +     * information for color volume transform - application 4 of
> +     * SPMTE 2094-40:2016 standard.
> +     */
> +    AV_PKT_DATA_DYNAMIC_HDR_PLUS,
> +
>      /**
>       * The number of side data types.
>       * This is not part of the public API/ABI in the sense that it may
> diff --git a/libavcodec/utils.c b/libavcodec/utils.c
> index 2ece34f921..c9435da3b4 100644
> --- a/libavcodec/utils.c
> +++ b/libavcodec/utils.c
> @@ -42,8 +42,10 @@
>  #include "libavutil/samplefmt.h"
>  #include "libavutil/dict.h"
>  #include "libavutil/thread.h"
> +#include "libavutil/hdr_dynamic_metadata.h"
>  #include "avcodec.h"
>  #include "decode.h"
> +#include "get_bits.h"
>  #include "hwconfig.h"
>  #include "libavutil/opt.h"
>  #include "mpegvideo.h"
> @@ -67,6 +69,17 @@
>  const char av_codec_ffversion[] = "FFmpeg version " FFMPEG_VERSION;
>  
>  static AVMutex codec_mutex = AV_MUTEX_INITIALIZER;
> +static const uint8_t usa_country_code = 0xB5;
> +static const uint16_t smpte_provider_code = 0x003C;
> +static const uint16_t smpte2094_40_provider_oriented_code = 0x0001;
> +static const uint16_t smpte2094_40_application_identifier = 0x04;
> +static const int64_t luminance_den = 1;
> +static const int32_t peak_luminance_den = 15;
> +static const int64_t rgb_den = 100000;
> +static const int32_t fraction_pixel_den = 1000;
> +static const int32_t knee_point_den = 4095;
> +static const int32_t bezier_anchor_den = 1023;
> +static const int32_t saturation_weight_den = 8;
>  
>  void av_fast_padded_malloc(void *ptr, unsigned int *size, size_t min_size)
>  {
> @@ -2346,3 +2359,170 @@ int ff_int_from_list_or_default(void *ctx, const char * val_name, int val,
>             "%s %d are not supported. Set to default value : %d\n", val_name, val, default_value);
>      return default_value;
>  }
> +
> +int ff_read_itu_t_t35_to_dynamic_hdr_plus(void *gbc,  AVBufferRef **output)
> +{
> +    GetBitContext *gb = (GetBitContext *)gbc;
> +    uint8_t application_version  = get_bits(gbc, 8);
> +    size_t size;
> +    int w, i, j;
> +    AVDynamicHDRPlus *s = av_dynamic_hdr_plus_alloc(&size);
> +    if (!s)
> +        return AVERROR(ENOMEM);
> +
> +    if (*output)
> +        av_buffer_unref(output);
> +
> +    *output = av_buffer_create(
> +        (uint8_t *)s, size, av_buffer_default_free, NULL, 0);
> +    if (!(*output)) {
> +        av_freep(&s);
> +        return AVERROR(ENOMEM);
> +    }

Poking the output pointer should probably be left at the very end, where the structure "s" is known to be in a good state. That way if the parsing fails for one reason or another, the given pointer is not touched.

> +    s->application_version = application_version;

I think application version can be checked against the value 0 as per the spec?

> +    if (get_bits_left(gb) < 2)
> +        return AVERROR_INVALIDDATA;
> +    s->num_windows = get_bits(gb, 2);
> +
> +    if (s->num_windows < 1 || s->num_windows > 3) {
> +        return AVERROR_INVALIDDATA;
> +    }
> +
> +    if (get_bits_left(gb) < ((19 * 8 + 1) * (s->num_windows - 1)))
> +        return AVERROR_INVALIDDATA;

You can utilize empty lines between the get_bits_left checks and the following parsing to split the context better.

> +    for (w = 1; w < s->num_windows; w++) {

Thankfully, you can utilize variables initialized within loops, so you do not have to pre-define a variable that is only utilized within a loop. Most likely w, i and j here in this function are such :) .

> +        s->params[w].window_upper_left_corner_x.num = get_bits(gb, 16);
> +        s->params[w].window_upper_left_corner_y.num = get_bits(gb, 16);
> +        s->params[w].window_lower_right_corner_x.num = get_bits(gb, 16);
> +        s->params[w].window_lower_right_corner_y.num = get_bits(gb, 16);
> +        // The corners are set to absolute coordinates here. They should be
> +        // converted to the relative coordinates (in [0, 1]) in the decoder.
> +        s->params[w].window_upper_left_corner_x.den = 1;
> +        s->params[w].window_upper_left_corner_y.den = 1;
> +        s->params[w].window_lower_right_corner_x.den = 1;
> +        s->params[w].window_lower_right_corner_y.den = 1;
> +

These seem to be just X/Y co-ordinates, I'm probably missing something here but why are these AVRationals? Of course the struct is already defined and been available since 2018 so me commenting on this is kind of late, I guess? :D

I think such structs are usually set in a single set with something like "(AVRational){get_bits(gb, 16), 1};", that way you do not have to separately set the numerator and the denumerator.

> +        s->params[w].center_of_ellipse_x = get_bits(gb, 16);
> +        s->params[w].center_of_ellipse_y = get_bits(gb, 16);
> +        s->params[w].rotation_angle = get_bits(gb, 8);
> +        s->params[w].semimajor_axis_internal_ellipse = get_bits(gb, 16);
> +        s->params[w].semimajor_axis_external_ellipse = get_bits(gb, 16);
> +        s->params[w].semiminor_axis_external_ellipse = get_bits(gb, 16);
> +        s->params[w].overlap_process_option = get_bits1(gb);
> +    }
> +
> +    if (get_bits_left(gb) < 28)
> +        return AVERROR(EINVAL);
> +    s->targeted_system_display_maximum_luminance.num = get_bits(gb, 27);
> +    s->targeted_system_display_maximum_luminance.den = luminance_den;

Ditto regarding AVRational initialization, "(AVRational){get_bits(gb, 27), luminance_den};" .

Although once again, I must wonder how useful a rational is for this if the denumerator is specified to be 1...

> +    s->targeted_system_display_actual_peak_luminance_flag = get_bits1(gb);
> +
> +    if (s->targeted_system_display_actual_peak_luminance_flag) {
> +        int rows, cols;
> +        if (get_bits_left(gb) < 10)
> +            return AVERROR(EINVAL);
> +        rows = get_bits(gb, 5);
> +        cols = get_bits(gb, 5);
> +        if (((rows < 2) || (rows > 25)) || ((cols < 2) || (cols > 25))) {
> +            return AVERROR_INVALIDDATA;
> +        }
> +        s->num_rows_targeted_system_display_actual_peak_luminance = rows;
> +        s->num_cols_targeted_system_display_actual_peak_luminance = cols;
> +
> +        if (get_bits_left(gb) < (rows * cols * 4))
> +            return AVERROR(EINVAL);
> +
> +        for (i = 0; i < rows; i++) {
> +            for (j = 0; j < cols; j++) {
> +                s->targeted_system_display_actual_peak_luminance[i][j].num = get_bits(gb, 4);
> +                s->targeted_system_display_actual_peak_luminance[i][j].den = peak_luminance_den;

Once again, regarding the AVRational initialization :) . Now at least the denumerator is is not 1, yay :) (although it seems like the coded value is a [0,1] float, just represented in values 0-15).

> +            }
> +        }
> +    }
> +    for (w = 0; w < s->num_windows; w++) {
> +        if (get_bits_left(gb) < (3 * 17 + 17 + 4))
> +            return AVERROR(EINVAL);
> +        for (i = 0; i < 3; i++) {
> +            s->params[w].maxscl[i].num = get_bits(gb, 17);
> +            s->params[w].maxscl[i].den = rgb_den;
> +        }
> +        s->params[w].average_maxrgb.num = get_bits(gb, 17);
> +        s->params[w].average_maxrgb.den = rgb_den;
> +        s->params[w].num_distribution_maxrgb_percentiles = get_bits(gb, 4);
> +
> +        if (get_bits_left(gb) <
> +            (s->params[w].num_distribution_maxrgb_percentiles * 24))
> +            return AVERROR(EINVAL);
> +        for (i = 0; i < s->params[w].num_distribution_maxrgb_percentiles; i++) {
> +            s->params[w].distribution_maxrgb[i].percentage = get_bits(gb, 7);
> +            s->params[w].distribution_maxrgb[i].percentile.num = get_bits(gb, 17);
> +            s->params[w].distribution_maxrgb[i].percentile.den = rgb_den;
> +        }
> +
> +        if (get_bits_left(gb) < 10)
> +            return AVERROR(EINVAL);
> +        s->params[w].fraction_bright_pixels.num = get_bits(gb, 10);
> +        s->params[w].fraction_bright_pixels.den = fraction_pixel_den;
> +    }

Generally the same comment with regards to AVRationals here as well.

Additionally, since you can define variables it might probably make sense to add a AVHDRPlusColorTransformParams *transform_params = &params[w] just not to duplicate the indexing all the time? Same for the AVHDRPlusPercentile list being under &s->params[w].distribution_maxrgb[i].

And sorry, at this point it's 02:30 local time and I need to still do some work tomorrow. I will attempt to do another look at all of this soon.

Jan

> +    if (get_bits_left(gb) < 1)
> +        return AVERROR(EINVAL);
> +    s->mastering_display_actual_peak_luminance_flag = get_bits1(gb);
> +    if (s->mastering_display_actual_peak_luminance_flag) {
> +        int rows, cols;
> +        if (get_bits_left(gb) < 10)
> +            return AVERROR(EINVAL);
> +        rows = get_bits(gb, 5);
> +        cols = get_bits(gb, 5);
> +        if (((rows < 2) || (rows > 25)) || ((cols < 2) || (cols > 25))) {
> +            return AVERROR_INVALIDDATA;
> +        }
> +        s->num_rows_mastering_display_actual_peak_luminance = rows;
> +        s->num_cols_mastering_display_actual_peak_luminance = cols;
> +
> +        if (get_bits_left(gb) < (rows * cols * 4))
> +            return AVERROR(EINVAL);
> +
> +        for (i = 0; i < rows; i++) {
> +            for (j = 0; j < cols; j++) {
> +                s->mastering_display_actual_peak_luminance[i][j].num = get_bits(gb, 4);
> +                s->mastering_display_actual_peak_luminance[i][j].den = peak_luminance_den;
> +            }
> +        }
> +    }
> +
> +    for (w = 0; w < s->num_windows; w++) {
> +        if (get_bits_left(gb) < 1)
> +            return AVERROR(EINVAL);
> +        s->params[w].tone_mapping_flag = get_bits1(gb);
> +        if (s->params[w].tone_mapping_flag) {
> +            if (get_bits_left(gb) < 28)
> +                return AVERROR(EINVAL);
> +            s->params[w].knee_point_x.num = get_bits(gb, 12);
> +            s->params[w].knee_point_x.den = knee_point_den;
> +            s->params[w].knee_point_y.num = get_bits(gb, 12);
> +            s->params[w].knee_point_y.den = knee_point_den;
> +            s->params[w].num_bezier_curve_anchors = get_bits(gb, 4);
> + 
> +            if (get_bits_left(gb) < (s->params[w].num_bezier_curve_anchors * 10))
> +                return AVERROR(EINVAL);
> +            for (i = 0; i < s->params[w].num_bezier_curve_anchors; i++) {
> +                s->params[w].bezier_curve_anchors[i].num = get_bits(gb, 10);
> +                s->params[w].bezier_curve_anchors[i].den = bezier_anchor_den;
> +            }
> +        }
> +
> +        if (get_bits_left(gb) < 1)
> +            return AVERROR(EINVAL);
> +        s->params[w].color_saturation_mapping_flag = get_bits1(gb);
> +        if (s->params[w].color_saturation_mapping_flag) {
> +            if (get_bits_left(gb) < 6)
> +                return AVERROR(EINVAL);
> +            s->params[w].color_saturation_weight.num = get_bits(gb, 6);
> +            s->params[w].color_saturation_weight.den = saturation_weight_den;
> +        }
> +    }
> +
> +    skip_bits(gb, get_bits_left(gb));
> +
> +    return 0;
> +}
> diff --git a/libavcodec/version.h b/libavcodec/version.h
> index e75891d463..ad0bfd619d 100644
> --- a/libavcodec/version.h
> +++ b/libavcodec/version.h
> @@ -28,7 +28,7 @@
>  #include "libavutil/version.h"
>  
>  #define LIBAVCODEC_VERSION_MAJOR  58
> -#define LIBAVCODEC_VERSION_MINOR  95
> +#define LIBAVCODEC_VERSION_MINOR  96
>  #define LIBAVCODEC_VERSION_MICRO 100
>  
>  #define LIBAVCODEC_VERSION_INT  AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \
> diff --git a/libavfilter/vf_showinfo.c b/libavfilter/vf_showinfo.c
> index 1be939614d..f55df520ca 100644
> --- a/libavfilter/vf_showinfo.c
> +++ b/libavfilter/vf_showinfo.c
> @@ -37,6 +37,7 @@
>  #include "libavutil/timecode.h"
>  #include "libavutil/mastering_display_metadata.h"
>  #include "libavutil/video_enc_params.h"
> +#include "libavutil/hdr_dynamic_metadata.h"
>  
>  #include "avfilter.h"
>  #include "internal.h"
> @@ -178,6 +179,30 @@ static void dump_mastering_display(AVFilterContext *ctx, AVFrameSideData *sd)
>             av_q2d(mastering_display->min_luminance), av_q2d(mastering_display->max_luminance));
>  }
>  
> +static void dump_dynamic_hdr_plus(AVFilterContext *ctx, AVFrameSideData *sd)
> +{
> +    AVDynamicHDRPlus *hdr_plus;
> +
> +    av_log(ctx, AV_LOG_INFO, "HDR10+ metadata: ");
> +    if (sd->size < sizeof(*hdr_plus)) {
> +        av_log(ctx, AV_LOG_ERROR, "invalid data\n");
> +        return;
> +    }
> +
> +    hdr_plus = (AVDynamicHDRPlus *)sd->data;
> +    av_log(ctx, AV_LOG_INFO, "num_windows: %d, ",
> +           hdr_plus->num_windows);
> +    av_log(ctx, AV_LOG_INFO,
> +           "targeted_system_display_maximum_luminance: %8.4f, ",
> +           av_q2d(hdr_plus->targeted_system_display_maximum_luminance));
> +    av_log(ctx, AV_LOG_INFO,
> +           "targeted_system_display_actual_peak_luminance_flag: %d, ",
> +           hdr_plus->targeted_system_display_actual_peak_luminance_flag);
> +    av_log(ctx, AV_LOG_INFO,
> +           "mastering_display_actual_peak_luminance_flag: %d",
> +           hdr_plus->mastering_display_actual_peak_luminance_flag);
> +}
> +
>  static void dump_content_light_metadata(AVFilterContext *ctx, AVFrameSideData *sd)
>  {
>      AVContentLightMetadata* metadata = (AVContentLightMetadata*)sd->data;
> @@ -396,6 +421,9 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *frame)
>          case AV_FRAME_DATA_MASTERING_DISPLAY_METADATA:
>              dump_mastering_display(ctx, sd);
>              break;
> +        case AV_FRAME_DATA_DYNAMIC_HDR_PLUS:
> +            dump_dynamic_hdr_plus(ctx, sd);
> +            break;
>          case AV_FRAME_DATA_CONTENT_LIGHT_LEVEL:
>              dump_content_light_metadata(ctx, sd);
>              break;
> diff --git a/tests/fate/mov.mak b/tests/fate/mov.mak
> index 7a721d7c95..7cd62041b8 100644
> --- a/tests/fate/mov.mak
> +++ b/tests/fate/mov.mak
> @@ -29,6 +29,7 @@ FATE_MOV_FFPROBE = fate-mov-neg-firstpts-discard \
>                     fate-mov-guess-delay-2 \
>                     fate-mov-guess-delay-3 \
>                     fate-mov-mp4-with-mov-in24-ver \
> +                   fate-mov-hdr10-plus-metadata \
>  
>  FATE_MOV_FASTSTART = fate-mov-faststart-4gb-overflow \
>  
> @@ -124,3 +125,5 @@ fate-mov-faststart-4gb-overflow: CMP = oneline
>  fate-mov-faststart-4gb-overflow: REF = bc875921f151871e787c4b4023269b29
>  
>  fate-mov-mp4-with-mov-in24-ver: CMD = run ffprobe$(PROGSSUF)$(EXESUF) -show_entries stream=codec_name -select_streams 1 $(TARGET_SAMPLES)/mov/mp4-with-mov-in24-ver.mp4
> +
> +fate-mov-hdr10-plus-metadata: CMD = run ffprobe$(PROGSSUF)$(EXESUF) -show_frames -read_intervals 0%+0.01 -select_streams v -v 0 $(TARGET_SAMPLES)/mov/hdr10_plus_h265_sample.mp4
> \ No newline at end of file
> diff --git a/tests/ref/fate/mov-hdr10-plus-metadata b/tests/ref/fate/mov-hdr10-plus-metadata
> new file mode 100644
> index 0000000000..8444f77883
> --- /dev/null
> +++ b/tests/ref/fate/mov-hdr10-plus-metadata
> @@ -0,0 +1,55 @@
> +[FRAME]
> +media_type=video
> +stream_index=0
> +key_frame=1
> +pkt_pts=0
> +pkt_pts_time=0.000000
> +pkt_dts=0
> +pkt_dts_time=0.000000
> +best_effort_timestamp=0
> +best_effort_timestamp_time=0.000000
> +pkt_duration=4119
> +pkt_duration_time=0.045767
> +pkt_pos=33496
> +pkt_size=340318
> +width=3840
> +height=2160
> +pix_fmt=yuv420p10le
> +sample_aspect_ratio=1:1
> +pict_type=I
> +coded_picture_number=0
> +display_picture_number=0
> +interlaced_frame=0
> +top_field_first=0
> +repeat_pict=0
> +color_range=tv
> +color_space=bt2020nc
> +color_primaries=bt2020
> +color_transfer=smpte2084
> +chroma_location=left
> +[SIDE_DATA]
> +side_data_type=Mastering display metadata
> +red_x=34000/50000
> +red_y=16000/50000
> +green_x=13250/50000
> +green_y=34500/50000
> +blue_x=7500/50000
> +blue_y=3000/50000
> +white_point_x=15635/50000
> +white_point_y=16450/50000
> +min_luminance=50/10000
> +max_luminance=10000000/10000
> +[/SIDE_DATA]
> +[SIDE_DATA]
> +side_data_type=Content light level metadata
> +max_content=0
> +max_average=0
> +[/SIDE_DATA]
> +[SIDE_DATA]
> +side_data_type=HDR Dynamic Metadata SMPTE2094-40 (HDR10+)
> +num_windows=1
> +targeted_system_display_maximum_luminance=400/1
> +targeted_system_display_actual_peak_luminance_flag=0
> +mastering_display_actual_peak_luminance_flag=0
> +[/SIDE_DATA]
> +[/FRAME]
> \ No newline at end of file




More information about the ffmpeg-devel mailing list