[FFmpeg-devel] [PATCH] avfilter: add declick audio filter
Paul B Mahol
onemda at gmail.com
Sat Mar 24 15:41:24 EET 2018
Signed-off-by: Paul B Mahol <onemda at gmail.com>
---
doc/filters.texi | 35 +++
libavfilter/Makefile | 1 +
libavfilter/af_declick.c | 586 +++++++++++++++++++++++++++++++++++++++++++++++
libavfilter/allfilters.c | 1 +
4 files changed, 623 insertions(+)
create mode 100644 libavfilter/af_declick.c
diff --git a/doc/filters.texi b/doc/filters.texi
index 1620ae1cfa..9a067ba9ea 100644
--- a/doc/filters.texi
+++ b/doc/filters.texi
@@ -2562,6 +2562,41 @@ Optional. It should have a value much less than 1 (e.g. 0.05 or 0.02) and is
used to prevent clipping.
@end table
+ at section declick
+Remove impulsive noise from input audio.
+
+Samples detected as impulsive noise are replaced by interpolated samples using
+autoregressive modeling.
+
+ at table @option
+ at item w
+Set window size, in milliseconds. Allowed range is from @code{10} to @code{100}.
+Default value is @code{54} milliseconds.
+This sets size of window which will be processed at once.
+
+ at item o
+Set window overlap, in percentage of window size. Allowed range is from @code{50}
+to @code{95}. Default value is @code{75} percent.
+Setting this to very high value increases impulsive noise removal but makes whole
+processs much slower.
+
+ at item a
+Set autoregression order, in percentage of window size. Allowed range is from
+ at code{1} to @code{50}. Default value is @code{2} percent. This option also controls
+quality of interpolated samples using neighbour good samples.
+
+ at item t
+Set threshold value. Allowed range is from @code{1} to @code{10}.
+Default value is @code{2}.
+This controls the strength of impulse noise which is going to be removed.
+
+ at item b
+Set burst fusion, in percentage of window size. Allowed range is @code{0} to
+ at code{40}. Default value is @code{10} percent.
+This controls between how much samples, which are detected as impulsive noise,
+any sample between 2 detected noise samples is considered also as noise sample.
+ at end table
+
@section drmeter
Measure audio dynamic range.
diff --git a/libavfilter/Makefile b/libavfilter/Makefile
index 1043b41d80..978751d2a0 100644
--- a/libavfilter/Makefile
+++ b/libavfilter/Makefile
@@ -87,6 +87,7 @@ OBJS-$(CONFIG_COMPENSATIONDELAY_FILTER) += af_compensationdelay.o
OBJS-$(CONFIG_CROSSFEED_FILTER) += af_crossfeed.o
OBJS-$(CONFIG_CRYSTALIZER_FILTER) += af_crystalizer.o
OBJS-$(CONFIG_DCSHIFT_FILTER) += af_dcshift.o
+OBJS-$(CONFIG_DECLICK_FILTER) += af_declick.o
OBJS-$(CONFIG_DRMETER_FILTER) += af_drmeter.o
OBJS-$(CONFIG_DYNAUDNORM_FILTER) += af_dynaudnorm.o
OBJS-$(CONFIG_EARWAX_FILTER) += af_earwax.o
diff --git a/libavfilter/af_declick.c b/libavfilter/af_declick.c
new file mode 100644
index 0000000000..0de4c35c95
--- /dev/null
+++ b/libavfilter/af_declick.c
@@ -0,0 +1,586 @@
+/*
+ * Copyright (c) 2018 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/audio_fifo.h"
+#include "libavutil/opt.h"
+#include "avfilter.h"
+#include "audio.h"
+#include "formats.h"
+
+typedef struct DeclickContext {
+ const AVClass *class;
+ double w;
+ double overlap;
+ double threshold;
+ double ar;
+ double burst;
+
+ int ar_order;
+ int nb_burst_samples;
+ int window_size;
+ int hop_size;
+
+ AVFrame *in;
+ AVFrame *out;
+ AVFrame *buffer;
+ AVFrame *is;
+ double *auxiliary;
+ double *detection;
+ double *acoefficients;
+ double *acorrelation;
+ double *tmp;
+ double *interpolated;
+ double *matrix;
+ int matrix_size;
+ double *vector;
+ int vector_size;
+ uint8_t *click;
+ int *index;
+
+ double *ltriangular;
+ int ltriangular_size;
+ double *diagonal;
+ int d_size;
+ double *y;
+ int y_size;
+
+ int64_t pts;
+ uint64_t nb_samples;
+ uint64_t detected_clicks;
+ int samples_left;
+
+ AVAudioFifo *fifo;
+ double *window_func_lut;
+} DeclickContext;
+
+#define OFFSET(x) offsetof(DeclickContext, x)
+#define AF AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
+
+static const AVOption declick_options[] = {
+ { "w", "set window size", OFFSET(w), AV_OPT_TYPE_DOUBLE, {.dbl=54}, 10, 100, AF },
+ { "o", "set window overlap", OFFSET(overlap), AV_OPT_TYPE_DOUBLE, {.dbl=75}, 50, 95, AF },
+ { "a", "set autoregression order", OFFSET(ar), AV_OPT_TYPE_DOUBLE, {.dbl=2}, 1, 50, AF },
+ { "t", "set threshold", OFFSET(threshold), AV_OPT_TYPE_DOUBLE, {.dbl=2}, 1, 10, AF },
+ { "b", "set burst fusion", OFFSET(burst), AV_OPT_TYPE_DOUBLE, {.dbl=10}, 0, 40, AF },
+ { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(declick);
+
+static int query_formats(AVFilterContext *ctx)
+{
+ AVFilterFormats *formats = NULL;
+ AVFilterChannelLayouts *layouts = NULL;
+ static const enum AVSampleFormat sample_fmts[] = {
+ AV_SAMPLE_FMT_DBLP,
+ AV_SAMPLE_FMT_NONE
+ };
+ int ret;
+
+ formats = ff_make_format_list(sample_fmts);
+ if (!formats)
+ return AVERROR(ENOMEM);
+ ret = ff_set_common_formats(ctx, formats);
+ if (ret < 0)
+ return ret;
+
+ layouts = ff_all_channel_counts();
+ if (!layouts)
+ return AVERROR(ENOMEM);
+
+ ret = ff_set_common_channel_layouts(ctx, layouts);
+ if (ret < 0)
+ return ret;
+
+ formats = ff_all_samplerates();
+ return ff_set_common_samplerates(ctx, formats);
+}
+
+static int config_input(AVFilterLink *inlink)
+{
+ AVFilterContext *ctx = inlink->dst;
+ DeclickContext *s = ctx->priv;
+ int i;
+
+ s->pts = AV_NOPTS_VALUE;
+ s->window_size = inlink->sample_rate * s->w / 1000.;
+ if (s->window_size < 100)
+ return AVERROR(EINVAL);
+ s->ar_order = s->window_size * s->ar / 100.;
+ s->nb_burst_samples = s->window_size * s->burst / 1000.;
+ s->hop_size = s->window_size * (1. - (s->overlap / 100.));
+ if (s->hop_size < 1)
+ return AVERROR(EINVAL);
+
+ s->fifo = av_audio_fifo_alloc(inlink->format, inlink->channels, s->window_size);
+ if (!s->fifo)
+ return AVERROR(ENOMEM);
+
+ s->window_func_lut = av_realloc_f(s->window_func_lut, s->window_size,
+ sizeof(*s->window_func_lut));
+ if (!s->window_func_lut)
+ return AVERROR(ENOMEM);
+ for (i = 0; i < s->window_size; i++)
+ s->window_func_lut[i] = sin(M_PI * i / s->window_size) * (1. - (s->overlap / 100.)) * M_PI_2;
+
+ av_frame_free(&s->in);
+ av_frame_free(&s->out);
+ s->in = ff_get_audio_buffer(inlink, s->window_size);
+ s->out = ff_get_audio_buffer(inlink, s->window_size);
+ s->buffer = ff_get_audio_buffer(inlink, s->window_size * 2);
+ s->is = ff_get_audio_buffer(inlink, s->window_size);
+ if (!s->in || !s->out || !s->buffer || !s->is)
+ return AVERROR(ENOMEM);
+
+ s->detection = av_calloc(s->window_size, sizeof(*s->detection));
+ s->auxiliary = av_calloc(s->ar_order + 1, sizeof(*s->auxiliary));
+ s->acoefficients = av_calloc(s->ar_order + 1, sizeof(*s->acoefficients));
+ s->acorrelation = av_calloc(s->ar_order + 1, sizeof(*s->acorrelation));
+ s->tmp = av_calloc(s->ar_order, sizeof(*s->tmp));
+ s->click = av_calloc(s->window_size, sizeof(*s->click));
+ s->index = av_calloc(s->window_size, sizeof(*s->index));
+ s->interpolated = av_calloc(s->window_size, sizeof(*s->interpolated));
+ if (!s->auxiliary || !s->acoefficients || !s->detection || !s->click ||
+ !s->index || !s->interpolated || !s->acorrelation || !s->tmp)
+ return AVERROR(ENOMEM);
+
+ return 0;
+}
+
+static void autocorrelation(const double *input, int order, int size, double *output)
+{
+ double scale = 1. / size;
+ int i, j;
+
+ for (i = 0; i <= order; i++) {
+ double value = 0.;
+
+ for (j = i; j < size; j++)
+ value += input[j] * input[j - i];
+
+ output[i] = value * scale;
+ }
+}
+
+static double autoregression(const double *samples, int ar_order, int nb_samples, double *k, double *r, double *a)
+{
+ double alpha;
+ int i, j;
+
+ memset(a, 0, ar_order * sizeof(*a));
+
+ autocorrelation(samples, ar_order, nb_samples, r);
+
+ /* Levinson-Durbin algorithm */
+ k[0] = a[0] = -r[1] / r[0];
+ alpha = r[0] * (1. - k[0] * k[0]);
+ for (i = 1; i < ar_order; i++) {
+ double epsilon = 0.;
+
+ for (j = 0; j < i; j++)
+ epsilon += a[j] * r[i - j];
+ epsilon += r[i + 1];
+
+ k[i] = -epsilon / alpha;
+ alpha *= (1. - k[i] * k[i]);
+ for (j = i - 1; j >= 0; j--)
+ k[j] = a[j] + k[i] * a[i - j - 1];
+ for (j = 0; j <= i; j++)
+ a[j] = k[j];
+ }
+
+ k[0] = 1.;
+ for (i = 0; i < ar_order; i++)
+ k[i + 1] = a[i];
+
+ return sqrt(alpha);
+}
+
+static int isfinite_array(double *samples, int nb_samples)
+{
+ int i;
+
+ for (i = 0; i < nb_samples; i++)
+ if (!isfinite(samples[i]))
+ return 0;
+
+ return 1;
+}
+
+static int find_index(int *index, int value, int size)
+{
+ int i, start, end;
+
+ if ((value < index[0]) || (value > index[size - 1]))
+ return 1;
+
+ i = start = 0;
+ end = size - 1;
+
+ while (start <= end) {
+ i = (end + start) / 2;
+ if (index[i] == value)
+ return 0;
+ if (value < index[i])
+ end = i - 1;
+ if (value > index[i])
+ start = i + 1;
+ }
+
+ return 1;
+}
+
+static int cholesky_decomposition(DeclickContext *s, double *matrix, double *vector, int n, double *out)
+{
+ double *ltriangular, *diagonal, *y;
+ int i, j, k;
+
+ av_fast_malloc(&s->ltriangular, &s->ltriangular_size, n * n * sizeof(*s->ltriangular));
+ ltriangular = s->ltriangular;
+
+ av_fast_malloc(&s->diagonal, &s->d_size, n * sizeof(*s->diagonal));
+ diagonal = s->diagonal;
+
+ av_fast_malloc(&s->y, &s->y_size, n * sizeof(*s->y));
+ y = s->y;
+
+ if (!ltriangular || !diagonal || !y)
+ return AVERROR(ENOMEM);
+
+ memset(s->ltriangular, 0, n * n * sizeof(*s->ltriangular));
+ memset(s->diagonal, 0, n * sizeof(*s->diagonal));
+ memset(s->y, 0, n * sizeof(*s->y));
+
+ for (i = 0; i < n; i++) {
+ const int in = i * n;
+
+ diagonal[i] = matrix[in + i];
+ for (j = 0; j < i; j++)
+ diagonal[i] -= diagonal[j] * ltriangular[in + j] * ltriangular[in + j];
+
+ if (diagonal[i] == 0.) {
+ return -1;
+ }
+
+ for (j = i + 1; j < n; j++) {
+ const int jn = j * n;
+ double x;
+
+ x = matrix[jn + i];
+ for (k = 0; k < i; k++)
+ x -= diagonal[k] * ltriangular[in + k] * ltriangular[jn + k];
+ ltriangular[in + j] = ltriangular[jn + i] = x / diagonal[i];
+ }
+ }
+
+ for (i = 0; i < n; i++) {
+ const int in = i * n;
+
+ y[i] = vector[i];
+ for (j = 0; j <= i; j++)
+ y[i] -= ltriangular[in + j] * y[j];
+ }
+
+ for (i = n - 1; i >= 0; i--) {
+ const int in = i * n;
+
+ out[i] = y[i] / diagonal[i];
+ for (j = i; j < n; j++)
+ out[i] -= ltriangular[in + j] * out[j];
+ }
+
+ return 0;
+}
+
+static int interpolation(DeclickContext *s, const double *src, int ar_order,
+ double *acoefficients, int *index, int nb_clicks,
+ double *auxiliary, double *interpolated)
+{
+ double *vector, *matrix;
+ int i, j;
+
+ av_fast_malloc(&s->matrix, &s->matrix_size, nb_clicks * nb_clicks * sizeof(*s->matrix));
+ matrix = s->matrix;
+ if (!matrix)
+ return AVERROR(ENOMEM);
+
+ av_fast_malloc(&s->vector, &s->vector_size, nb_clicks * sizeof(*s->vector));
+ vector = s->vector;
+ if (!vector)
+ return AVERROR(ENOMEM);
+
+ for (i = 0; i <= ar_order; i++) {
+ auxiliary[i] = 0.;
+ for (j = i; j <= ar_order; j++)
+ auxiliary[i] += acoefficients[j] * acoefficients[j - i];
+ }
+
+ for (i = 0; i < nb_clicks; i++) {
+ const int im = i * nb_clicks;
+
+ for (j = i; j < nb_clicks; j++) {
+ if (abs(index[j] - index[i]) <= ar_order) {
+ matrix[j * nb_clicks + i] = matrix[im + j] = auxiliary[abs(index[j] - index[i])];
+ } else {
+ matrix[j * nb_clicks + i] = matrix[im + j] = 0;
+ }
+ }
+ }
+
+ for (i = 0; i < nb_clicks; i++) {
+ vector[i] = 0.;
+ for (j = -ar_order; j <= ar_order; j++)
+ if (find_index(index, index[i] - j, nb_clicks))
+ vector[i] -= src[index[i] - j] * auxiliary[abs(j)];
+ }
+
+ return cholesky_decomposition(s, matrix, vector, nb_clicks, interpolated);
+}
+
+static int detect_clicks(DeclickContext *s, double sigmae, double *detection, double *acoefficients,
+ uint8_t *click, int *index,
+ const double *src, double *dst)
+{
+ const double threshold = s->threshold;
+ int i, j, nb_clicks = 0, prev = -1;
+
+ memset(detection, 0, s->window_size * sizeof(*detection));
+
+ for (i = s->ar_order; i < s->window_size; i++) {
+ for (j = 0; j <= s->ar_order; j++) {
+ detection[i] += acoefficients[j] * src[i - j];
+ }
+ }
+
+ for (i = 0; i < s->window_size; i++) {
+ click[i] = fabs(detection[i]) > sigmae * threshold;
+ dst[i] = src[i];
+ }
+
+ for (i = 0; i < s->window_size; i++) {
+ if (!click[i])
+ continue;
+
+ if (prev >= 0 && (i > prev + 1) && (i <= s->nb_burst_samples + prev))
+ for (j = prev + 1; j < i; j++)
+ click[j] = 1;
+ prev = i;
+ }
+
+ memset(click, 0, s->ar_order * sizeof(*click));
+ memset(click + (s->window_size - s->ar_order), 0, s->ar_order * sizeof(*click));
+
+ for (i = s->ar_order; i < s->window_size - s->ar_order; i++)
+ if (click[i])
+ index[nb_clicks++] = i;
+
+ return nb_clicks;
+}
+
+static int filter_frame(AVFilterLink *inlink, AVFrame *in)
+{
+ AVFilterContext *ctx = inlink->dst;
+ AVFilterLink *outlink = ctx->outputs[0];
+ DeclickContext *s = ctx->priv;
+ AVFrame *out = NULL;
+ int ret = 0;
+
+ if (s->pts == AV_NOPTS_VALUE)
+ s->pts = in->pts;
+
+ ret = av_audio_fifo_write(s->fifo, (void **)in->extended_data,
+ in->nb_samples);
+ av_frame_free(&in);
+ if (ret < 0) {
+ av_frame_free(&out);
+ return ret;
+ }
+
+ while (av_audio_fifo_size(s->fifo) >= s->window_size) {
+ int j, ch, detected_clicks = 0;
+
+ out = ff_get_audio_buffer(outlink, s->hop_size);
+ if (!out)
+ return AVERROR(ENOMEM);
+
+ ret = av_audio_fifo_peek(s->fifo, (void **)s->in->extended_data,
+ s->window_size);
+ if (ret < 0)
+ break;
+
+ for (ch = 0; ch < s->in->channels; ch++) {
+ const double *src = (const double *)s->in->extended_data[ch];
+ double *is = (double *)s->is->extended_data[ch];
+ double *dst = (double *)s->out->extended_data[ch];
+ double *ptr = (double *)out->extended_data[ch];
+ double *buf = (double *)s->buffer->extended_data[ch];
+ const double *w = s->window_func_lut;
+ double sigmae;
+
+ sigmae = autoregression(src, s->ar_order, s->window_size, s->acoefficients, s->acorrelation, s->tmp);
+
+ if (isfinite_array(s->acoefficients, s->ar_order + 1)) {
+ double *interpolated = s->interpolated;
+ int *index = s->index;
+ int nb_clicks;
+
+ nb_clicks = detect_clicks(s, sigmae, s->detection, s->acoefficients,
+ s->click, index, src, dst);
+ if (nb_clicks > 0) {
+ ret = interpolation(s, src, s->ar_order, s->acoefficients, index,
+ nb_clicks, s->auxiliary, interpolated);
+ if (ret < 0)
+ goto fail;
+
+ for (j = 0; j < nb_clicks; j++) {
+ dst[index[j]] = interpolated[j];
+ is[index[j]] = 1;
+ }
+ }
+ } else {
+ memcpy(dst, src, s->window_size * sizeof(*dst));
+ }
+
+ for (j = 0; j < s->window_size; j++)
+ buf[j] += dst[j] * w[j];
+ for (j = 0; j < s->hop_size; j++)
+ ptr[j] = buf[j];
+
+ memmove(buf, buf + s->hop_size, (s->window_size * 2 - s->hop_size) * sizeof(*buf));
+ memmove(is, is + s->hop_size, (s->window_size - s->hop_size) * sizeof(*is));
+ memset(buf + s->window_size * 2 - s->hop_size, 0, s->hop_size * sizeof(*buf));
+ memset(is + s->window_size - s->hop_size, 0, s->hop_size * sizeof(*is));
+
+ for (j = 0; j < s->hop_size; j++) {
+ if (is[j])
+ detected_clicks++;
+ }
+ }
+
+ av_audio_fifo_drain(s->fifo, s->hop_size);
+
+ out->pts = s->pts;
+ s->pts += s->hop_size;
+
+ s->detected_clicks += detected_clicks;
+ s->nb_samples += s->hop_size * inlink->channels;
+
+ ret = ff_filter_frame(outlink, out);
+ if (ret < 0)
+ break;
+ }
+
+fail:
+ if (ret < 0)
+ av_frame_free(&out);
+ return ret;
+}
+
+static int request_frame(AVFilterLink *outlink)
+{
+ AVFilterContext *ctx = outlink->src;
+ DeclickContext *s = ctx->priv;
+ int ret = 0;
+
+ ret = ff_request_frame(ctx->inputs[0]);
+
+ if (ret == AVERROR_EOF && av_audio_fifo_size(s->fifo) > 0) {
+ AVFrame *in;
+
+ if (!s->samples_left)
+ s->samples_left = av_audio_fifo_size(s->fifo);
+
+ in = ff_get_audio_buffer(outlink, s->window_size);
+ if (!in)
+ return AVERROR(ENOMEM);
+ ret = filter_frame(ctx->inputs[0], in);
+ if (s->samples_left) {
+ s->samples_left -= s->hop_size;
+ if (s->samples_left <= 0)
+ av_audio_fifo_drain(s->fifo, s->window_size);
+ }
+ }
+
+ return ret;
+}
+
+
+static av_cold void uninit(AVFilterContext *ctx)
+{
+ DeclickContext *s = ctx->priv;
+
+ av_log(ctx, AV_LOG_INFO, "Detected clicks in %"PRId64" of %"PRId64" samples (%g%%).\n",
+ s->detected_clicks, s->nb_samples, 100. * s->detected_clicks / s->nb_samples);
+
+ av_audio_fifo_free(s->fifo);
+ av_freep(&s->window_func_lut);
+ av_frame_free(&s->in);
+ av_frame_free(&s->out);
+ av_frame_free(&s->buffer);
+ av_frame_free(&s->is);
+ av_freep(&s->detection);
+ av_freep(&s->auxiliary);
+ av_freep(&s->acoefficients);
+ av_freep(&s->acorrelation);
+ av_freep(&s->tmp);
+ av_freep(&s->click);
+ av_freep(&s->index);
+ av_freep(&s->interpolated);
+ av_freep(&s->matrix);
+ s->matrix_size = 0;
+ av_freep(&s->vector);
+ s->vector_size = 0;
+ av_freep(&s->ltriangular);
+ s->ltriangular_size = 0;
+ av_freep(&s->diagonal);
+ s->d_size = 0;
+ av_freep(&s->y);
+ s->y_size = 0;
+}
+
+static const AVFilterPad inputs[] = {
+ {
+ .name = "default",
+ .type = AVMEDIA_TYPE_AUDIO,
+ .filter_frame = filter_frame,
+ .config_props = config_input,
+ },
+ { NULL }
+};
+
+static const AVFilterPad outputs[] = {
+ {
+ .name = "default",
+ .type = AVMEDIA_TYPE_AUDIO,
+ .request_frame = request_frame,
+ },
+ { NULL }
+};
+
+AVFilter ff_af_declick = {
+ .name = "declick",
+ .description = NULL_IF_CONFIG_SMALL("Remove impulsive noise from input audio."),
+ .query_formats = query_formats,
+ .priv_size = sizeof(DeclickContext),
+ .priv_class = &declick_class,
+ .uninit = uninit,
+ .inputs = inputs,
+ .outputs = outputs,
+};
diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
index 3f67e321bf..cf5016f2c1 100644
--- a/libavfilter/allfilters.c
+++ b/libavfilter/allfilters.c
@@ -98,6 +98,7 @@ static void register_all(void)
REGISTER_FILTER(CROSSFEED, crossfeed, af);
REGISTER_FILTER(CRYSTALIZER, crystalizer, af);
REGISTER_FILTER(DCSHIFT, dcshift, af);
+ REGISTER_FILTER(DECLICK, declick, af);
REGISTER_FILTER(DRMETER, drmeter, af);
REGISTER_FILTER(DYNAUDNORM, dynaudnorm, af);
REGISTER_FILTER(EARWAX, earwax, af);
--
2.11.0
More information about the ffmpeg-devel
mailing list