[FFmpeg-devel] [PATCH 4/5] lavfi: add audio convert filter

Mon Aug 8 19:57:10 CEST 2011

On date Monday 2011-08-08 11:11:48 +0300, Mina Nagy Zaki encoded:
> Add aconvert filter to perform sample format and channel layout conversion.
> 
> Based on code by Stefano Sabatini and "S.N. Hemanth Meenakshisundaram"
> smeenaks at ucsd.edu.
> ---
>  libavfilter/Makefile               |    2 +
>  libavfilter/af_aconvert.c          |  418 ++++++++++++++++++++++++++++++++++++
>  libavfilter/af_aconvert_rematrix.c |  184 ++++++++++++++++
>  libavfilter/allfilters.c           |    1 +
>  4 files changed, 605 insertions(+), 0 deletions(-)
>  create mode 100644 libavfilter/af_aconvert.c
>  create mode 100644 libavfilter/af_aconvert_rematrix.c
> 
> diff --git a/libavfilter/Makefile b/libavfilter/Makefile
> index 686fd30..5576768 100644
> --- a/libavfilter/Makefile
> +++ b/libavfilter/Makefile
> @@ -2,6 +2,7 @@ include $(SUBDIR)../config.mak
>  
>  NAME = avfilter
>  FFLIBS = avutil
> +FFLIBS-$(CONFIG_ACONVERT_FILTER) += avcodec
>  FFLIBS-$(CONFIG_MOVIE_FILTER) += avformat avcodec
>  FFLIBS-$(CONFIG_SCALE_FILTER) += swscale
>  FFLIBS-$(CONFIG_MP_FILTER) += avcodec
> @@ -18,6 +19,7 @@ OBJS = allfilters.o                                                     \
>  
>  OBJS-$(CONFIG_AVCODEC)                       += avcodec.o
>  
> +OBJS-$(CONFIG_ACONVERT_FILTER)               += af_aconvert.o
>  OBJS-$(CONFIG_AFORMAT_FILTER)                += af_aformat.o
>  OBJS-$(CONFIG_ANULL_FILTER)                  += af_anull.o
>  
> diff --git a/libavfilter/af_aconvert.c b/libavfilter/af_aconvert.c
> new file mode 100644
> index 0000000..a51dfc6
> --- /dev/null
> +++ b/libavfilter/af_aconvert.c
> @@ -0,0 +1,418 @@
> +/*
> + * Copyright (c) 2010 S.N. Hemanth Meenakshisundaram <smeenaks at ucsd.edu>
> + * Copyright (c) 2011 Stefano Sabatini
> + * Copyright (c) 2011 Mina Nagy Zaki
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +/**
> + * @file
> + * sample format and channel layout conversion audio filter
> + * based on code in libavcodec/resample.c by Fabrice Bellard and
> + * libavcodec/audioconvert.c by Michael Niedermayer
> + */
> +
> +#include "libavcodec/audioconvert.h"
> +#include "libavutil/audioconvert.h"
> +#include "avfilter.h"
> +#include "internal.h"
> +
> +#define SFMT_TYPE uint8_t
> +#define REMATRIX_FUNC_NAME(NAME) NAME ## _u8
> +#include "af_aconvert_rematrix.c"
> +
> +#define SFMT_TYPE int16_t
> +#define REMATRIX_FUNC_NAME(NAME) NAME ## _s16
> +#include "af_aconvert_rematrix.c"
> +
> +#define SFMT_TYPE int32_t
> +#define REMATRIX_FUNC_NAME(NAME) NAME ## _s32
> +#include "af_aconvert_rematrix.c"
> +
> +#define FLOATING
> +
> +#define SFMT_TYPE float
> +#define REMATRIX_FUNC_NAME(NAME) NAME ## _flt
> +#include "af_aconvert_rematrix.c"
> +
> +#define SFMT_TYPE double
> +#define REMATRIX_FUNC_NAME(NAME) NAME ## _dbl
> +#include "af_aconvert_rematrix.c"
> +
> +typedef struct {
> +    int max_nb_samples;                     ///< maximum number of buffered samples
> +    enum AVSampleFormat out_sample_fmt;     ///< output sample format
> +    int64_t out_chlayout;                   ///< output channel layout
> +
> +    int out_strides[8], in_strides [8];     ///< input/output strides for av_audio_convert
> +
> +    AVFilterBufferRef *mix_samplesref;      ///< rematrixed buffer
> +    AVFilterBufferRef *out_samplesref;      ///< output buffer after required conversions
> +    uint8_t *packed_data[8];                ///< pointers for packing conversion
> +    uint8_t **in_data, **out_data;          ///< input/output for av_audio_convert
> +
> +    AVAudioConvert *audioconvert_ctx;       ///< context for conversion to output sample format
> +
> +    void (*convert_chlayout) ();
> +} AConvertContext;
> +
> +static av_cold int init(AVFilterContext *ctx, const char *args, void *opaque)
> +{
> +    AConvertContext *aconvert = ctx->priv;
> +    char *arg;
> +    aconvert->out_sample_fmt = aconvert->out_chlayout = -1;
> +
> +    /* the special argument 'copy' means no conversion */
> +

> +    if ((arg = strsep(&args, ",")) && strcmp(arg, "copy")) {
> +        aconvert->out_sample_fmt = ff_parse_sample_format(arg, ctx);
> +        if (aconvert->out_sample_fmt == -1) return AVERROR(EINVAL);
> +    }
> +
> +    if ((arg = strsep(&args, ",")) && strcmp(arg, "copy")) {
> +        aconvert->out_chlayout = ff_parse_channel_layout(arg, ctx);
> +        if (aconvert->out_chlayout == -1) return AVERROR(EINVAL);
> +    }

Ditto.

> +
> +    return 0;
> +}
> +
> +static av_cold void uninit(AVFilterContext *ctx)
> +{
> +    AConvertContext *aconvert = ctx->priv;
> +    avfilter_unref_buffer(aconvert->mix_samplesref);
> +    avfilter_unref_buffer(aconvert->out_samplesref);
> +    if (aconvert->audioconvert_ctx)
> +        av_audio_convert_free(aconvert->audioconvert_ctx);
> +}
> +
> +static int query_formats(AVFilterContext *ctx)
> +{
> +    AVFilterFormats *formats = NULL;
> +    AConvertContext *aconvert = ctx->priv;
> +
> +    avfilter_formats_ref(avfilter_all_packing_formats(),
> +                        &ctx->outputs[0]->in_packing);
> +    avfilter_formats_ref(avfilter_all_packing_formats(),
> +                        &ctx->inputs[0]->out_packing);
> +
> +    avfilter_formats_ref(avfilter_all_formats(AVMEDIA_TYPE_AUDIO),
> +                        &ctx->inputs[0]->out_formats);
> +    if (aconvert->out_sample_fmt != AV_SAMPLE_FMT_NONE) {
> +        avfilter_add_format(&formats, aconvert->out_sample_fmt);
> +        avfilter_formats_ref(formats, &ctx->outputs[0]->in_formats);
> +    } else
> +        avfilter_formats_ref(avfilter_all_formats(AVMEDIA_TYPE_AUDIO),
> +                             &ctx->outputs[0]->in_formats);
> +
> +    avfilter_formats_ref(avfilter_all_channel_layouts(),
> +                         &ctx->inputs[0]->out_chlayouts);
> +    if (aconvert->out_chlayout != -1) {
> +        formats = NULL;
> +        avfilter_add_format(&formats, aconvert->out_chlayout);
> +        avfilter_formats_ref(formats, &ctx->outputs[0]->in_chlayouts);
> +    } else
> +        avfilter_formats_ref(avfilter_all_channel_layouts(),
> +                             &ctx->outputs[0]->in_chlayouts);
> +
> +    return 0;
> +}
> +
> +#define SET_CONVERT_CHLAYOUT_SFMT(FUNC)                     \
> +    switch (inlink->format) {                               \
> +    case AV_SAMPLE_FMT_U8:                                  \
> +        aconvert->convert_chlayout = FUNC ## _u8;  break;   \
> +    case AV_SAMPLE_FMT_S16:                                 \
> +        aconvert->convert_chlayout = FUNC ## _s16; break;   \
> +    case AV_SAMPLE_FMT_S32:                                 \
> +        aconvert->convert_chlayout = FUNC ## _s32; break;   \
> +    case AV_SAMPLE_FMT_FLT:                                 \
> +        aconvert->convert_chlayout = FUNC ## _flt; break;   \
> +    case AV_SAMPLE_FMT_DBL:                                 \
> +        aconvert->convert_chlayout = FUNC ## _dbl; break;   \
> +    }
> +
> +#define SET_CONVERT_CHLAYOUT(OUT, FUNC)                     \
> +    if (aconvert->out_chlayout == OUT) {                    \
> +        if (inlink->planar)                                 \
> +            SET_CONVERT_CHLAYOUT_SFMT(FUNC ## _planar)      \
> +        else                                                \
> +            SET_CONVERT_CHLAYOUT_SFMT(FUNC ## _packed)      \
> +    }
> +
> +#define SET_CONVERT_CHLAYOUT2(IN, OUT, FUNC)                \
> +    if (inlink->channel_layout == IN &&                     \
> +        aconvert->out_chlayout == OUT) {                    \
> +        if (inlink->planar)                                 \
> +            SET_CONVERT_CHLAYOUT_SFMT(FUNC ## _planar)      \
> +        else                                                \
> +            SET_CONVERT_CHLAYOUT_SFMT(FUNC ## _packed)      \
> +    }
> +
> +static int config_output(AVFilterLink *outlink)
> +{
> +    AVFilterLink *inlink = outlink->src->inputs[0];
> +    AConvertContext *aconvert = outlink->src->priv;
> +    char buf1[32], buf2[32];
> +
> +    /* if not specified in args, use the format and layout of the output */
> +    if (aconvert->out_sample_fmt == AV_SAMPLE_FMT_NONE)
> +        aconvert->out_sample_fmt = outlink->format;
> +    if (aconvert->out_chlayout == -1)
> +        aconvert->out_chlayout = outlink->channel_layout;
> +
> +    av_get_channel_layout_string(buf1, sizeof(buf1),
> +                                 -1, inlink ->channel_layout);
> +    av_get_channel_layout_string(buf2, sizeof(buf2),
> +                                 -1, outlink->channel_layout);
> +    av_log(outlink->src, AV_LOG_INFO, "fmt:%s cl:%s planar:%i -> fmt:%s cl:%s planar:%i\n",
> +           av_get_sample_fmt_name(inlink ->format), buf1, inlink->planar,
> +           av_get_sample_fmt_name(outlink->format), buf2, outlink->planar);
> +
> +    /* handle stereo_to_mono and mono_to_stereo separately because there are
> +     * no planar versions */
> +    if (!inlink->planar                                &&
> +         inlink->channel_layout == AV_CH_LAYOUT_STEREO &&
> +         aconvert->out_chlayout == AV_CH_LAYOUT_MONO) {
> +       SET_CONVERT_CHLAYOUT_SFMT(stereo_to_mono_packed);
> +    }
> +    else
> +    if (!outlink->planar                               &&
> +         inlink->channel_layout == AV_CH_LAYOUT_MONO   &&
> +         aconvert->out_chlayout == AV_CH_LAYOUT_STEREO) {
> +       SET_CONVERT_CHLAYOUT_SFMT(mono_to_stereo_packed);
> +    }
> +
> +    if (!aconvert->convert_chlayout &&
> +        inlink->channel_layout != outlink->channel_layout) {
> +             SET_CONVERT_CHLAYOUT2(AV_CH_LAYOUT_STEREO,  AV_CH_LAYOUT_5POINT1, stereo_to_surround_5p1)
> +        else SET_CONVERT_CHLAYOUT2(AV_CH_LAYOUT_5POINT1, AV_CH_LAYOUT_STEREO,  surround_5p1_to_stereo)
> +        else SET_CONVERT_CHLAYOUT(                       AV_CH_LAYOUT_MONO,    mono_downmix)
> +    }
> +
> +    /* If there's no channel conversion function and output is stereo,
> +     * we can do generic stereo downmixing:
> +     * if there's a format conversion then stereo downmixing is implicitly
> +     * done by av_audio_convert.
> +     * if there's no format conversion then packed stereo downmixing is
> +     * explicitly done by av_audio_convert, while planar is done in
> +     * filter_samples
> +     */
> +    if (!aconvert->convert_chlayout                       &&
> +        outlink->channel_layout != inlink->channel_layout &&
> +        outlink->channel_layout != AV_CH_LAYOUT_STEREO) {
> +        av_log(outlink->src, AV_LOG_ERROR,
> +                "Unsupported channel layout conversion requested!\n");
> +        return AVERROR(EINVAL);
> +    }
> +
> +    return 0;
> +}
> +
> +static int init_buffers(AVFilterLink *inlink, int nb_samples)
> +{
> +    AConvertContext *aconvert = inlink->dst->priv;
> +    AVFilterLink * const outlink = inlink->dst->outputs[0];
> +    int i, packed_stride = 0;
> +    int in_channels  =
> +            av_get_channel_layout_nb_channels(inlink->channel_layout),
> +        out_channels =
> +            av_get_channel_layout_nb_channels(outlink->channel_layout);
> +    const short
> +        stereo_downmix = out_channels   == 2               &&
> +                         !aconvert->convert_chlayout,
> +        format_conv    = inlink->format != outlink->format,
> +        packing_conv   = inlink->planar != outlink->planar &&
> +                         in_channels    != 1               &&
> +                         out_channels   != 1;
> +
> +    aconvert->max_nb_samples = nb_samples;
> +    uninit(inlink->dst);
> +
> +    // rematrixing
> +    if (aconvert->convert_chlayout) {
> +        aconvert->mix_samplesref =
> +                avfilter_get_audio_buffer(outlink,
> +                                          AV_PERM_WRITE | AV_PERM_REUSE2,
> +                                          inlink->format,
> +                                          nb_samples,
> +                                          outlink->channel_layout,
> +                                          inlink->planar);
> +        if (!aconvert->mix_samplesref)
> +            return AVERROR(ENOMEM);
> +        in_channels = out_channels;
> +    }
> +
> +    /* If there's any conversion left to do, we need a buffer */
> +    if (format_conv || packing_conv || stereo_downmix) {
> +        aconvert->out_samplesref = avfilter_get_audio_buffer(outlink,
> +                                          AV_PERM_WRITE | AV_PERM_REUSE2,
> +                                          outlink->format,
> +                                          nb_samples,
> +                                          outlink->channel_layout,
> +                                          outlink->planar);
> +        if (!aconvert->out_samplesref)
> +            return AVERROR(ENOMEM);
> +    }
> +
> +    /* if there's a format/mode conversion or packed stereo downmixing,
> +     * we need an audio_convert context
> +     */
> +    if (format_conv || packing_conv || (stereo_downmix && !outlink->planar)) {
> +        aconvert->in_strides[0]  = av_get_bytes_per_sample(inlink->format);
> +        aconvert->out_strides[0] = av_get_bytes_per_sample(outlink->format);
> +
> +        aconvert->out_data = aconvert->out_samplesref->data;
> +        if (aconvert->mix_samplesref)
> +            aconvert->in_data  = aconvert->mix_samplesref->data;
> +
> +        if (packing_conv) {
> +            if (outlink->planar) {

> +                if (aconvert->mix_samplesref)
> +                    aconvert->packed_data[0] =
> +                        aconvert->mix_samplesref->data[0];

what in the other case?

> +                aconvert->in_data         = aconvert->packed_data;
> +                packed_stride             = aconvert->in_strides[0];
> +                aconvert->in_strides[0]  *= in_channels;
> +            } else {
> +                aconvert->packed_data[0]  = aconvert->out_samplesref->data[0];
> +                aconvert->out_data        = aconvert->packed_data;
> +                packed_stride             = aconvert->out_strides[0];
> +                aconvert->out_strides[0] *= out_channels;
> +            }
> +        } else if (!outlink->planar || (stereo_downmix && in_channels == 1)) {
> +            out_channels = 1;
> +        }
> +
> +        for (i = 1; i < out_channels; i++) {
> +            aconvert->packed_data[i] = aconvert->packed_data[i-1] +
> +                                           packed_stride;
> +            aconvert->in_strides[i]  = aconvert->in_strides[0];
> +            aconvert->out_strides[i] = aconvert->out_strides[0];
> +        }
> +
> +        aconvert->audioconvert_ctx =
> +                av_audio_convert_alloc(outlink->format, out_channels,
> +                                       inlink->format,  out_channels, NULL, 0);
> +        if (!aconvert->audioconvert_ctx)
> +            return AVERROR(ENOMEM);
> +    }
> +
> +    return 0;
> +}
> +
> +static void filter_samples(AVFilterLink *inlink, AVFilterBufferRef *insamplesref)
> +{
> +    AConvertContext *aconvert = inlink->dst->priv;
> +    AVFilterBufferRef *curbuf = insamplesref;
> +    AVFilterLink * const outlink = inlink->dst->outputs[0];
> +    int nb_channels = av_get_channel_layout_nb_channels(
> +                          curbuf->audio->channel_layout);
> +
> +    if (!aconvert->max_nb_samples ||
> +        (curbuf->audio->nb_samples > aconvert->max_nb_samples))

> +        if(init_buffers(inlink, curbuf->audio->nb_samples))
> +            return;

nit: if (init_buffers(...) < 0)
         return;

warn in case, since the filter will stop to work at this point.

> +
> +    if (aconvert->mix_samplesref) {
> +        if (inlink->planar && nb_channels != 1)
> +            aconvert->convert_chlayout(aconvert->mix_samplesref->data,
> +                                       curbuf->data,
> +                                       curbuf->audio->nb_samples,
> +                                       nb_channels);
> +        else
> +            aconvert->convert_chlayout(aconvert->mix_samplesref->data[0],
> +                                       curbuf->data[0],
> +                                       curbuf->audio->nb_samples,
> +                                       nb_channels);
> +
> +        curbuf = aconvert->mix_samplesref;
> +    }
> +
> +    if (aconvert->audioconvert_ctx) {
> +        if (!aconvert->mix_samplesref) {
> +            if (aconvert->in_data == aconvert->packed_data) {
> +                int i, packed_stride = av_get_bytes_per_sample(inlink->format);
> +                aconvert->packed_data[0] = curbuf->data[0];
> +                for (i = 1; i < nb_channels; i++)
> +                    aconvert->packed_data[i] =
> +                                aconvert->packed_data[i-1] + packed_stride;
> +            } else {
> +                aconvert->in_data = curbuf->data;
> +            }
> +        }
> +
> +        if (inlink->planar == outlink->planar && !outlink->planar)
> +            nb_channels = av_get_channel_layout_nb_channels(
> +                              curbuf->audio->channel_layout);
> +        else
> +            nb_channels = 1;
> +
> +        av_audio_convert(aconvert->audioconvert_ctx,
> +                         (void * const *) aconvert->out_data,
> +                         aconvert->out_strides,
> +                         (const void * const *) aconvert->in_data,
> +                         aconvert->in_strides,
> +                         curbuf->audio->nb_samples * nb_channels);
> +
> +        curbuf = aconvert->out_samplesref;
> +    }
> +
> +    /* Handle generic planar stereo downmixing by simply copying streams */
> +    if (outlink->channel_layout == AV_CH_LAYOUT_STEREO &&
> +        !aconvert->convert_chlayout) {
> +        int size =
> +            av_get_bytes_per_sample(curbuf->format) * curbuf->audio->nb_samples;
> +
> +        if (!aconvert->audioconvert_ctx)
> +            memcpy(aconvert->out_samplesref->data[0], curbuf->data[0], size);
> +

> +        memcpy(aconvert->out_samplesref->data[1],
> +               nb_channels == 1 ? curbuf->data[0] : curbuf->data[1],
> +               size);

this is meant for *up*mixing (1 -> 2), right?

> +
> +        curbuf = aconvert->out_samplesref;
> +    }
> +
> +    avfilter_copy_buffer_ref_props(curbuf, insamplesref);
> +    curbuf->audio->channel_layout = outlink->channel_layout;
> +    curbuf->audio->planar         = outlink->planar;
> +
> +    avfilter_filter_samples(inlink->dst->outputs[0],
> +                            avfilter_ref_buffer(curbuf, ~0));
> +    avfilter_unref_buffer(insamplesref);
> +}
[...]

In general, the code is somehow convoluted and I had an hard time at
getting how it works. I see the complexity is required for
optimization purposes, but from the maintainability point of view this
code will be painful.

The general layout:

* channel mixing/rematrixing (we have packed and planar routines, so no
  need to convert from planar<->packed)

* conversion/requantization
  av_audio_convert() is general enough so it can deal with both planar
  and packed formats, you just need to fill an intermediary struct
  (data+strides) for it, no memcpies should be needed. This should also
  be able to perform planar<->packed if needed, so the next step won't
  be necessary

* planar<->packed conversion if needed, if conversion was done it
  shouldn't be necessary 

Possibly each stage shouldn't be intermixed with the previous one, so
you have a simpler code path.

In general I'm not sure I like the idea of doing downmixing/upmixing
in the conversion phase and adding too many special cases, since
that's making the code flow really hard to follow.
-- 
FFmpeg = Freak and Funny Murdering Picky Entertaining Guru