[FFmpeg-devel] [PATCH] lavfi/af_ebur128: update filter to use new ebur128 API

Sun Jan 1 21:35:16 EET 2017

On Wed, Dec 28, 2016 at 9:51 AM, Kyle Swanson <k at ylo.ph> wrote:

> On Mon, Nov 28, 2016 at 10:59 PM, Marton Balint <cus at passwd.hu> wrote:
>
>>
>> On Mon, 28 Nov 2016, Kyle Swanson wrote:
>>
>> On Thu, Nov 17, 2016 at 11:04 AM, Kyle Swanson <k at ylo.ph> wrote:
>>>
>>>> Hi,
>>>>
>>>> Here's a couple of patches which update the ebur128 filter to use the
>>>> recently added ebur128 API. This updated filter allows fine-tuned
>>>> control over which EBU R128 parameters are measured, and provides
>>>> modest speed increases over the previous ebur128 filter. Also
>>>> noteworthy: this removes the video output option of the ebur128
>>>> filter. This is extraneous for an ebur128 measurement filter IMHO, but
>>>> if we wanted to keep similar functionality in FFmpeg, we'd be better
>>>> served by a new video source filter where custom meters could be
>>>> created via exported frame metadata.
>>>>
>>>> The first patch adds true peak functionality to the ebur128 API using
>>>> swresample (this was already discussed a little bit:
>>>> http://ffmpeg.org/pipermail/ffmpeg-devel/2016-November/202583.html)
>>>> The second patch is an update to the ebur128 filter.
>>>>
>>>> Kyle
>>>>
>>>
>>> Does anyone have any problems with the first patch?
>>>
>>
>> From 6912ed3a03cd19f46e96f1f4b9eb3aa69b7ce4df Mon Sep 17 00:00:00 2001
>>> From: Kyle Swanson <k at ylo.ph>
>>> Date: Thu, 17 Nov 2016 10:32:45 -0600
>>> Subject: [PATCH 1/2] lavfi/ebur128: add ebur128_check_true_peak()
>>>
>>> Signed-off-by: Kyle Swanson <k at ylo.ph>
>>> ---
>>>  libavfilter/ebur128.c | 162 ++++++++++++++++++++++++++++++
>>> +++++++++++++++++++-
>>>  libavfilter/ebur128.h |  17 ++++++
>>>  2 files changed, 177 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/libavfilter/ebur128.c b/libavfilter/ebur128.c
>>> index a46692e..dc16647 100644
>>> --- a/libavfilter/ebur128.c
>>> +++ b/libavfilter/ebur128.c
>>> @@ -50,6 +50,9 @@
>>>  #include "libavutil/common.h"
>>>  #include "libavutil/mem.h"
>>>  #include "libavutil/thread.h"
>>> +#include "libavutil/channel_layout.h"
>>> +#include "libswresample/swresample.h"
>>>
>>
>> Isn't this include needs an ifdef as well?
>>
>> +#include "libavutil/opt.h"
>>>
>>>  #define CHECK_ERROR(condition, errorcode, goto_point)
>>>         \
>>>      if ((condition)) {
>>>        \
>>> @@ -91,6 +94,16 @@ struct FFEBUR128StateInternal {
>>>      size_t short_term_frame_counter;
>>>      /** Maximum sample peak, one per channel */
>>>      double *sample_peak;
>>> +    /** Maximum true peak, one per channel */
>>> +    double* true_peak;
>>> +#if CONFIG_SWRESAMPLE
>>> +    SwrContext *resampler;
>>> +    size_t oversample_factor;
>>> +    float* resampler_buffer_input;
>>> +    size_t resampler_buffer_input_frames;
>>> +    float* resampler_buffer_output;
>>> +    size_t resampler_buffer_output_frames;
>>> +#endif
>>>      /** The maximum window duration in ms. */
>>>      unsigned long window;
>>>      /** Data pointer array for interleaved data */
>>> @@ -214,12 +227,78 @@ static inline void init_histogram(void)
>>>      }
>>>  }
>>>
>>> +#if CONFIG_SWRESAMPLE
>>> +static int ebur128_init_resampler(FFEBUR128State* st) {
>>> +    int64_t channel_layout;
>>> +    int errcode;
>>> +
>>> +    if (st->samplerate < 96000) {
>>> +        st->d->oversample_factor = 4;
>>> +    } else if (st->samplerate < 192000) {
>>> +        st->d->oversample_factor = 2;
>>> +    } else {
>>> +        st->d->oversample_factor = 1;
>>> +        st->d->resampler_buffer_input = NULL;
>>> +        st->d->resampler_buffer_output = NULL;
>>> +        st->d->resampler = NULL;
>>> +    }
>>> +
>>> +    st->d->resampler_buffer_input_frames = st->d->samples_in_100ms * 4;
>>> +    st->d->resampler_buffer_input = av_malloc(st->d->resampler_buffer_input_frames
>>> *
>>> +                                              st->channels *
>>> +                                              sizeof(float));
>>>
>>
>> av_malloc_array
>>
>> +    CHECK_ERROR(!st->d->resampler_buffer_input, 0, exit)
>>> +
>>> +    st->d->resampler_buffer_output_frames =
>>> +    st->d->resampler_buffer_input_frames *
>>> +    st->d->oversample_factor;
>>> +    st->d->resampler_buffer_output = av_malloc(st->d->resampler_buffer_output_frames
>>> *
>>> +                                               st->channels *
>>> +                                               sizeof(float));
>>>
>>
>> av_malloc_array
>>
>> +    CHECK_ERROR(!st->d->resampler_buffer_output, 0, free_input)
>>> +
>>> +    st->d->resampler = swr_alloc();
>>> +    CHECK_ERROR(!st->d->resampler, 0, free_output)
>>> +
>>> +    channel_layout = av_get_default_channel_layout(st->channels);
>>> +
>>> +    av_opt_set_int(st->d->resampler, "in_channel_layout",
>>> channel_layout, 0);
>>> +    av_opt_set_int(st->d->resampler, "in_sample_rate", st->samplerate,
>>> 0);
>>> +    av_opt_set_sample_fmt(st->d->resampler, "in_sample_fmt",
>>> AV_SAMPLE_FMT_FLT, 0);
>>> +    av_opt_set_int(st->d->resampler, "out_channel_layout",
>>> channel_layout, 0);
>>> +    av_opt_set_int(st->d->resampler, "out_sample_rate", st->samplerate
>>> * st->d->oversample_factor, 0);
>>> +    av_opt_set_sample_fmt(st->d->resampler, "out_sample_fmt",
>>> AV_SAMPLE_FMT_FLT, 0);
>>> +
>>> +    swr_init(st->d->resampler);
>>> +    return 0;
>>> +
>>> +free_output:
>>> +    av_free(st->d->resampler_buffer_output);
>>> +    st->d->resampler_buffer_output = NULL;
>>>
>>
>> av_freep
>>
>> +free_input:
>>> +    av_free(st->d->resampler_buffer_input);
>>> +    st->d->resampler_buffer_input = NULL;
>>>
>>
>> av_freep
>>
>> +exit:
>>> +    return AVERROR(ENOMEM);
>>> +}
>>> +
>>> +static void ebur128_destroy_resampler(FFEBUR128State* st) {
>>> +    av_free(st->d->resampler_buffer_input);
>>> +    st->d->resampler_buffer_input = NULL;
>>>
>>
>> av_freep
>>
>> +    av_free(st->d->resampler_buffer_output);
>>> +    st->d->resampler_buffer_output = NULL;
>>>
>>
>> av_freep
>>
>> +    swr_free(&st->d->resampler);
>>> +    st->d->resampler = NULL;
>>>
>>
>> swr_free already sets resampler to NULL.
>>
>> +}
>>> +#endif
>>> +
>>>  FFEBUR128State *ff_ebur128_init(unsigned int channels,
>>>                                  unsigned long samplerate,
>>>                                  unsigned long window, int mode)
>>>  {
>>>      int errcode;
>>>      FFEBUR128State *st;
>>> +    unsigned int i;
>>>
>>>      st = (FFEBUR128State *) av_malloc(sizeof(FFEBUR128State));
>>>      CHECK_ERROR(!st, 0, exit)
>>> @@ -233,6 +312,14 @@ FFEBUR128State *ff_ebur128_init(unsigned int
>>> channels,
>>>      st->d->sample_peak =
>>>          (double *) av_mallocz_array(channels, sizeof(double));
>>>      CHECK_ERROR(!st->d->sample_peak, 0, free_channel_map)
>>> +    st->d->true_peak =
>>> +        (double*) malloc(channels * sizeof(double));
>>>
>>
>> av_mallocz_array
>>
>> +    CHECK_ERROR(!st->d->true_peak, 0, free_sample_peak)
>>> +
>>> +    for (i = 0; i < channels; ++i) {
>>> +        st->d->sample_peak[i] = 0.0;
>>> +        st->d->true_peak[i] = 0.0;
>>> +    }
>>>
>>
>> Technically not portable (AFAIK), but we assume in a lot of places that a
>> mallocz-ed double is 0.0, so this initialization is unneeded.
>>
>>
>>>      st->samplerate = samplerate;
>>>      st->d->samples_in_100ms = (st->samplerate + 5) / 10;
>>> @@ -242,7 +329,7 @@ FFEBUR128State *ff_ebur128_init(unsigned int
>>> channels,
>>>      } else if ((mode & FF_EBUR128_MODE_M) == FF_EBUR128_MODE_M) {
>>>          st->d->window = FFMAX(window, 400);
>>>      } else {
>>> -        goto free_sample_peak;
>>> +        goto free_true_peak;
>>>      }
>>>      st->d->audio_data_frames = st->samplerate * st->d->window / 1000;
>>>      if (st->d->audio_data_frames % st->d->samples_in_100ms) {
>>> @@ -254,7 +341,7 @@ FFEBUR128State *ff_ebur128_init(unsigned int
>>> channels,
>>>      st->d->audio_data =
>>>          (double *) av_mallocz_array(st->d->audio_data_frames,
>>>                                      st->channels * sizeof(double));
>>> -    CHECK_ERROR(!st->d->audio_data, 0, free_sample_peak)
>>> +    CHECK_ERROR(!st->d->audio_data, 0, free_true_peak)
>>>
>>>      ebur128_init_filter(st);
>>>
>>> @@ -267,6 +354,11 @@ FFEBUR128State *ff_ebur128_init(unsigned int
>>> channels,
>>>                  free_block_energy_histogram)
>>>      st->d->short_term_frame_counter = 0;
>>>
>>> +#if CONFIG_SWRESAMPLE
>>> +    unsigned int result = ebur128_init_resampler(st);
>>>
>>
>> Why unsigned?
>>
>> +    CHECK_ERROR(result, 0, free_short_term_block_energy_histogram)
>>> +#endif
>>> +
>>>      /* the first block needs 400ms of audio data */
>>>      st->d->needed_frames = st->d->samples_in_100ms * 4;
>>>      /* start at the beginning of the buffer */
>>> @@ -287,6 +379,8 @@ free_block_energy_histogram:
>>>      av_free(st->d->block_energy_histogram);
>>>  free_audio_data:
>>>      av_free(st->d->audio_data);
>>> +free_true_peak:
>>> +    av_free(st->d->true_peak);
>>>  free_sample_peak:
>>>      av_free(st->d->sample_peak);
>>>  free_channel_map:
>>> @@ -306,12 +400,53 @@ void ff_ebur128_destroy(FFEBUR128State ** st)
>>>      av_free((*st)->d->audio_data);
>>>      av_free((*st)->d->channel_map);
>>>      av_free((*st)->d->sample_peak);
>>> +    av_free((*st)->d->true_peak);
>>>      av_free((*st)->d->data_ptrs);
>>> +#if CONFIG_SWRESAMPLE
>>> +  ebur128_destroy_resampler(*st);
>>> +#endif
>>>      av_free((*st)->d);
>>>      av_free(*st);
>>>      *st = NULL;
>>>  }
>>>
>>> +static int ebur128_use_swresample(FFEBUR128State* st) {
>>> +#if CONFIG_SWRESAMPLE
>>> +    return ((st->mode & FF_EBUR128_MODE_TRUE_PEAK) ==
>>> FF_EBUR128_MODE_TRUE_PEAK);
>>> +#else
>>> +    (void) st;
>>> +    return 0;
>>> +#endif
>>> +}
>>> +
>>> +static void ebur128_check_true_peak(FFEBUR128State* st, size_t frames)
>>> {
>>> +#if CONFIG_SWRESAMPLE
>>> +    size_t c, i;
>>> +
>>> +    const int in_len  = frames;
>>> +    const int out_len = st->d->resampler_buffer_output_frames;
>>> +    swr_convert(st->d->resampler, (uint8_t
>>> **)&st->d->resampler_buffer_output, out_len,
>>> +                (const uint8_t **)&st->d->resampler_buffer_input,
>>> in_len);
>>> +
>>> +    for (c = 0; c < st->channels; ++c) {
>>> +        for (i = 0; i < out_len; ++i) {
>>> +            if (st->d->resampler_buffer_output[i * st->channels + c] >
>>> +
>>>  st->d->true_peak[c]) {
>>> +              st->d->true_peak[c] =
>>> +                  st->d->resampler_buffer_output[i * st->channels + c];
>>> +            } else if (-st->d->resampler_buffer_output[i *
>>> st->channels + c] >
>>> +
>>>  st->d->true_peak[c]) {
>>> +              st->d->true_peak[c] =
>>> +                 -st->d->resampler_buffer_output[i * st->channels + c];
>>> +            }
>>> +        }
>>> +    }
>>> +#else
>>> +    (void) st; (void) frames;
>>> +#endif
>>> +}
>>> +
>>> +
>>>  #define EBUR128_FILTER(type, scaling_factor)
>>>            \
>>>  static void ebur128_filter_##type(FFEBUR128State* st, const type**
>>> srcs,           \
>>>                                    size_t src_index, size_t frames,
>>>            \
>>> @@ -334,6 +469,15 @@ static void ebur128_filter_##type(FFEBUR128State*
>>> st, const type** srcs,
>>>              if (max > st->d->sample_peak[c]) st->d->sample_peak[c] =
>>> max;          \
>>>          }
>>>             \
>>>      }
>>>             \
>>> +    if (ebur128_use_swresample(st)) {
>>>             \
>>> +        for (c = 0; c < st->channels; ++c) {
>>>            \
>>> +            for (i = 0; i < frames; ++i) {
>>>            \
>>> +                st->d->resampler_buffer_input[i * st->channels + c] =
>>>             \
>>> +                    (float) (srcs[c][src_index + i * stride] /
>>> scaling_factor);    \
>>> +            }
>>>             \
>>> +        }
>>>             \
>>> +        ebur128_check_true_peak(st, frames);
>>>            \
>>> +    }
>>>             \
>>>      for (c = 0; c < st->channels; ++c) {
>>>            \
>>>          int ci = st->d->channel_map[c] - 1;
>>>             \
>>>          if (ci < 0) continue;
>>>             \
>>> @@ -781,3 +925,17 @@ int ff_ebur128_sample_peak(FFEBUR128State * st,
>>>      *out = st->d->sample_peak[channel_number];
>>>      return 0;
>>>
>>
>> Hmm, okay, I got a bit of a problem with this performance-wise. The way I
>> see it first we convert everything to float, then we resample, then we find
>> the maximum in an interleaved output. I'd say performance-wise it would be
>> alot better if we could resample directly the input data to a planar float,
>> and then measure the maximum there, so no intermediate conversions.
>>
>> This can be a second step, if you are not interested in this now.
>>
>> }
>>
>>> +
>>> +int ff_ebur128_true_peak(FFEBUR128State * st,
>>> +                         unsigned int channel_number,
>>> +                         double* out) {
>>> +  if ((st->mode & FF_EBUR128_MODE_TRUE_PEAK) !=
>>> FF_EBUR128_MODE_TRUE_PEAK) {
>>> +      return AVERROR(EINVAL);
>>> +  } else if (channel_number >= st->channels) {
>>> +      return AVERROR(EINVAL);
>>> +  }
>>> +  *out = st->d->true_peak[channel_number] >
>>> st->d->sample_peak[channel_number]
>>> +       ? st->d->true_peak[channel_number]
>>> +       : st->d->sample_peak[channel_number];
>>> +  return 0;
>>> +}
>>> diff --git a/libavfilter/ebur128.h b/libavfilter/ebur128.h
>>> index b94cd24..ca6dd62 100644
>>> --- a/libavfilter/ebur128.h
>>> +++ b/libavfilter/ebur128.h
>>> @@ -91,6 +91,9 @@ enum mode {
>>>      FF_EBUR128_MODE_LRA = (1 << 3) | FF_EBUR128_MODE_S,
>>>    /** can call ff_ebur128_sample_peak */
>>>      FF_EBUR128_MODE_SAMPLE_PEAK = (1 << 4) | FF_EBUR128_MODE_M,
>>> +  /** can call ff_ebur128_true_peak */
>>> +    FF_EBUR128_MODE_TRUE_PEAK   = (1 << 5) | FF_EBUR128_MODE_M
>>> +                                           | FF_EBUR128_MODE_SAMPLE_PEAK
>>>
>>
>> I'd rather not set implicitly MODE_M, because I don't want to give
>> loudness measurement to the user, who only wants true peak.
>>
>>  };
>>>
>>>  /** forward declaration of FFEBUR128StateInternal */
>>> @@ -283,6 +286,20 @@ int ff_ebur128_loudness_range_multiple(FFEBUR128State
>>> ** sts,
>>>  int ff_ebur128_sample_peak(FFEBUR128State * st,
>>>                             unsigned int channel_number, double *out);
>>>
>>> +/** \brief Get maximum true peak from all frames that have been
>>> processed.
>>> + *
>>> + *  @param st library state
>>> + *  @param channel_number channel to analyse
>>> + *  @param out maximum true peak in float format (1.0 is 0 dBTP)
>>> + *  @return
>>> + *    - 0 on success.
>>> + *    - AVERROR(EINVAL) if mode "FF_EBUR128_MODE_TRUE_PEAK" has not
>>> + *      been set.
>>> + *    - AVERROR(EINVAL) if invalid channel index.
>>> + */
>>> +int ff_ebur128_true_peak(FFEBUR128State* st,
>>> +                      unsigned int channel_number, double* out);
>>> +
>>>  /** \brief Get relative threshold in LUFS.
>>>   *
>>>   *  @param st library state
>>> --
>>> 2.10.1
>>>
>>>
>> Regards,
>> Marton
>
>
> Finally had a minute to look at this again. Attached patch addresses
> Michael's and Marton's comments.
>

If no one has anything else, I'll push this in the next couple of days.

>
>
>>
>> _______________________________________________
>> ffmpeg-devel mailing list
>> ffmpeg-devel at ffmpeg.org
>> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>>
>
>