[FFmpeg-cvslog] lavfi: add volumedetect filter.
Nicolas George
git at videolan.org
Sun Aug 19 11:37:54 CEST 2012
ffmpeg | branch: master | Nicolas George <nicolas.george at normalesup.org> | Sat Aug 18 13:49:47 2012 +0200| [5980e57cf9a08ca284bc1b5ffba2363f9eff8ca8] | committer: Nicolas George
lavfi: add volumedetect filter.
> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=5980e57cf9a08ca284bc1b5ffba2363f9eff8ca8
---
Changelog | 1 +
doc/filters.texi | 40 +++++++++++
libavfilter/Makefile | 1 +
libavfilter/af_volumedetect.c | 159 +++++++++++++++++++++++++++++++++++++++++
libavfilter/allfilters.c | 1 +
5 files changed, 202 insertions(+)
diff --git a/Changelog b/Changelog
index cd73c6d..14e01f3 100644
--- a/Changelog
+++ b/Changelog
@@ -50,6 +50,7 @@ version next:
- edge detection filter
- framestep filter
- ffmpeg -shortest option is now per-output file
+- volume measurement filter
version 0.11:
diff --git a/doc/filters.texi b/doc/filters.texi
index 5793100..8847990 100644
--- a/doc/filters.texi
+++ b/doc/filters.texi
@@ -690,6 +690,46 @@ volume=-12dB
@end example
@end itemize
+ at section volumedetect
+
+Detect the volume of the input video.
+
+The filter has no parameters. The input is not modified. Statistics about
+the volume will be printed in the log when the input stream end is reached.
+
+In particular it will show the mean volume (root mean square), maximum
+volume (on a per-sample basis), and the beginning of an histogram of the
+registered volume values (from the maximum value to a cumulated 1/1000 of
+the samples).
+
+All volumes are in decibels relative to the maximum PCM value.
+
+Here is an excerpt of the output:
+ at example
+[Parsed_volumedetect_0 @ 0xa23120] mean_volume: -27 dB
+[Parsed_volumedetect_0 @ 0xa23120] max_volume: -4 dB
+[Parsed_volumedetect_0 @ 0xa23120] histogram_4db: 6
+[Parsed_volumedetect_0 @ 0xa23120] histogram_5db: 62
+[Parsed_volumedetect_0 @ 0xa23120] histogram_6db: 286
+[Parsed_volumedetect_0 @ 0xa23120] histogram_7db: 1042
+[Parsed_volumedetect_0 @ 0xa23120] histogram_8db: 2551
+[Parsed_volumedetect_0 @ 0xa23120] histogram_9db: 4609
+[Parsed_volumedetect_0 @ 0xa23120] histogram_10db: 8409
+ at end example
+
+It means that:
+ at itemize
+ at item
+The mean square energy is approximately -27 dB, or 10^-2.7.
+ at item
+The largest sample is at -4 dB, or more precisely between -4 dB and -5 dB.
+ at item
+There are 6 samples at -4 dB, 62 at -5 dB, 286 at -6 dB, etc.
+ at end itemize
+
+In other words, raising the volume by +4 dB does not cause any clipping,
+raising it by +5 dB causes clipping for 6 samples, etc.
+
@section asyncts
Synchronize audio data with timestamps by squeezing/stretching it and/or
dropping samples/adding silence when needed.
diff --git a/libavfilter/Makefile b/libavfilter/Makefile
index 916e54a..af4fde6 100644
--- a/libavfilter/Makefile
+++ b/libavfilter/Makefile
@@ -67,6 +67,7 @@ OBJS-$(CONFIG_PAN_FILTER) += af_pan.o
OBJS-$(CONFIG_RESAMPLE_FILTER) += af_resample.o
OBJS-$(CONFIG_SILENCEDETECT_FILTER) += af_silencedetect.o
OBJS-$(CONFIG_VOLUME_FILTER) += af_volume.o
+OBJS-$(CONFIG_VOLUMEDETECT_FILTER) += af_volumedetect.o
OBJS-$(CONFIG_AEVALSRC_FILTER) += asrc_aevalsrc.o
OBJS-$(CONFIG_ANULLSRC_FILTER) += asrc_anullsrc.o
diff --git a/libavfilter/af_volumedetect.c b/libavfilter/af_volumedetect.c
new file mode 100644
index 0000000..caf8559
--- /dev/null
+++ b/libavfilter/af_volumedetect.c
@@ -0,0 +1,159 @@
+/*
+ * Copyright (c) 2012 Nicolas George
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with FFmpeg; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/audioconvert.h"
+#include "libavutil/avassert.h"
+#include "audio.h"
+#include "avfilter.h"
+#include "internal.h"
+
+typedef struct {
+ /**
+ * Number of samples at each PCM value.
+ * histogram[0x8000 + i] is the number of samples at value i.
+ * The extra element is there for symmetry.
+ */
+ uint64_t histogram[0x10001];
+} VolDetectContext;
+
+static int query_formats(AVFilterContext *ctx)
+{
+ enum AVSampleFormat sample_fmts[] = {
+ AV_SAMPLE_FMT_S16,
+ AV_SAMPLE_FMT_S16P,
+ AV_SAMPLE_FMT_NONE
+ };
+ AVFilterFormats *formats;
+
+ if (!(formats = ff_make_format_list(sample_fmts)))
+ return AVERROR(ENOMEM);
+ ff_set_common_formats(ctx, formats);
+
+ return 0;
+}
+
+static int filter_samples(AVFilterLink *inlink, AVFilterBufferRef *samples)
+{
+ AVFilterContext *ctx = inlink->dst;
+ VolDetectContext *vd = ctx->priv;
+ int64_t layout = samples->audio->channel_layout;
+ int nb_samples = samples->audio->nb_samples;
+ int nb_channels = av_get_channel_layout_nb_channels(layout);
+ int nb_planes = nb_planes;
+ int plane, i;
+ int16_t *pcm;
+
+ if (!av_sample_fmt_is_planar(samples->format)) {
+ nb_samples *= nb_channels;
+ nb_planes = 1;
+ }
+ for (plane = 0; plane < nb_planes; plane++) {
+ pcm = (int16_t *)samples->extended_data[plane];
+ for (i = 0; i < nb_samples; i++)
+ vd->histogram[pcm[i] + 0x8000]++;
+ }
+
+ return ff_filter_samples(inlink->dst->outputs[0], samples);
+}
+
+#define MAX_DB 91
+
+static inline double logdb(uint64_t v)
+{
+ double d = v / (double)(0x8000 * 0x8000);
+ if (!v)
+ return MAX_DB;
+ return log(d) * -4.3429448190325182765112891891660508229; /* -10/log(10) */
+}
+
+static void print_stats(AVFilterContext *ctx)
+{
+ VolDetectContext *vd = ctx->priv;
+ int i, max_volume, shift;
+ uint64_t nb_samples = 0, power = 0, nb_samples_shift = 0, sum = 0;
+ uint64_t histdb[MAX_DB + 1] = { 0 };
+
+ for (i = 0; i < 0x10000; i++)
+ nb_samples += vd->histogram[i];
+ av_log(ctx, AV_LOG_INFO, "n_samples: %"PRId64"\n", nb_samples);
+ if (!nb_samples)
+ return;
+
+ /* If nb_samples > 1<<34, there is a risk of overflow in the
+ multiplication or the sum: shift all histogram values to avoid that.
+ The total number of samples must be recomputed to avoid rounding
+ errors. */
+ shift = av_log2(nb_samples >> 33);
+ for (i = 0; i < 0x10000; i++) {
+ nb_samples_shift += vd->histogram[i] >> shift;
+ power += (i - 0x8000) * (i - 0x8000) * (vd->histogram[i] >> shift);
+ }
+ if (!nb_samples_shift)
+ return;
+ power = (power + nb_samples_shift / 2) / nb_samples_shift;
+ av_assert0(power <= 0x8000 * 0x8000);
+ av_log(ctx, AV_LOG_INFO, "mean_volume: %.1f dB\n", -logdb(power));
+
+ max_volume = 0x8000;
+ while (max_volume > 0 && !vd->histogram[0x8000 + max_volume] &&
+ !vd->histogram[0x8000 - max_volume])
+ max_volume--;
+ av_log(ctx, AV_LOG_INFO, "max_volume: %.1f dB\n", -logdb(max_volume * max_volume));
+
+ for (i = 0; i < 0x10000; i++)
+ histdb[(int)logdb((i - 0x8000) * (i - 0x8000))] += vd->histogram[i];
+ for (i = 0; i <= MAX_DB && !histdb[i]; i++);
+ for (; i <= MAX_DB && sum < nb_samples / 1000; i++) {
+ av_log(ctx, AV_LOG_INFO, "histogram_%ddb: %"PRId64"\n", i, histdb[i]);
+ sum += histdb[i];
+ }
+}
+
+static int request_frame(AVFilterLink *outlink)
+{
+ AVFilterContext *ctx = outlink->src;
+ int ret = ff_request_frame(ctx->inputs[0]);
+ if (ret == AVERROR_EOF)
+ print_stats(ctx);
+ return ret;
+}
+
+AVFilter avfilter_af_volumedetect = {
+ .name = "volumedetect",
+ .description = NULL_IF_CONFIG_SMALL("Detect audio volume."),
+
+ .priv_size = sizeof(VolDetectContext),
+ .query_formats = query_formats,
+
+ .inputs = (const AVFilterPad[]) {
+ { .name = "default",
+ .type = AVMEDIA_TYPE_AUDIO,
+ .get_audio_buffer = ff_null_get_audio_buffer,
+ .filter_samples = filter_samples,
+ .min_perms = AV_PERM_READ, },
+ { .name = NULL }
+ },
+ .outputs = (const AVFilterPad[]) {
+ { .name = "default",
+ .type = AVMEDIA_TYPE_AUDIO,
+ .request_frame = request_frame, },
+ { .name = NULL }
+ },
+};
diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
index a9344c2..6defed4 100644
--- a/libavfilter/allfilters.c
+++ b/libavfilter/allfilters.c
@@ -57,6 +57,7 @@ void avfilter_register_all(void)
REGISTER_FILTER (PAN, pan, af);
REGISTER_FILTER (SILENCEDETECT, silencedetect, af);
REGISTER_FILTER (VOLUME, volume, af);
+ REGISTER_FILTER (VOLUMEDETECT,volumedetect,af);
REGISTER_FILTER (RESAMPLE, resample, af);
REGISTER_FILTER (AEVALSRC, aevalsrc, asrc);
More information about the ffmpeg-cvslog
mailing list