[FFmpeg-devel] [PATCH 2/2] lavfi: add amerge audio filter.

Wed Dec 28 17:09:49 CET 2011

On date Saturday 2011-12-24 17:57:53 +0100, Nicolas George encoded:
> 
> Signed-off-by: Nicolas George <nicolas.george at normalesup.org>
> ---
>  Changelog                |    1 +
>  doc/filters.texi         |   30 +++++
>  libavfilter/Makefile     |    1 +
>  libavfilter/af_amerge.c  |  273 ++++++++++++++++++++++++++++++++++++++++++++++
>  libavfilter/allfilters.c |    1 +
>  5 files changed, 306 insertions(+), 0 deletions(-)
>  create mode 100644 libavfilter/af_amerge.c
> 
> diff --git a/Changelog b/Changelog
> index 7044b4c..430048e 100644
> --- a/Changelog
> +++ b/Changelog
> @@ -8,6 +8,7 @@ version next:
>  - OpenMG Audio muxer
>  - SMJPEG demuxer
>  - astreamsync audio filter
> +- amerge audio filter
>  
>  
>  version 0.9:
> diff --git a/doc/filters.texi b/doc/filters.texi
> index ce4a74f..77e9c50 100644
> --- a/doc/filters.texi
> +++ b/doc/filters.texi
> @@ -156,6 +156,36 @@ aformat=u8\\,s16:mono:packed
>  aformat=s16:mono\\,stereo:all
>  @end example
>  
> + at section amerge
> +
> +Merge two audio streams into a single multi-channel stream.
> +
> +This filter does not need any argument.
> +
> +If the channel layouts of the inputs are disjoint, and therefore compatible,
> +the channel layout of the output will be set accordingly and the channels
> +will be reordered as necessary. If the channel layouts of the inputs are not
> +disjoint, the output will have all the channels of the first input then all
> +the channels of the second input, in that order, and the channel layout of
> +the output will be the default value corresponding to the total number of
> +channels.
> +
> +For example, if the first input is in 2.1 (FL+FR+LF) and the second input
> +is FC+BL+BR, then the output will be in 5.1, with the channels in the
> +following order: a1, a2, b1, a3, b2, b3 (a1 is the first channel of the
> +first input, b1 is the first channel of the second input).
> +
> +On the other hand, if both input are in stereo, the output channels will be
> +in the default order: a1, a2, b1, b2, and the channel layout will be
> +arbitrarily set to 4.0, which may or may not be the expected value.
> +
> +Both inputs must have the same sample rate, format and packing.
> +
> +Example: merge two mono files into a stereo stream:
> + at example
> +amovie=left.wav [l] ; amovie=right.mp3 [r] ; [l] [r] amerge
> + at end example
> +
>  @section anull
>  
>  Pass the audio source unchanged to the output.
> diff --git a/libavfilter/Makefile b/libavfilter/Makefile
> index 555767b..445daa1 100644
> --- a/libavfilter/Makefile
> +++ b/libavfilter/Makefile
> @@ -26,6 +26,7 @@ OBJS-$(CONFIG_AVCODEC)                       += avcodec.o
>  
>  OBJS-$(CONFIG_ACONVERT_FILTER)               += af_aconvert.o
>  OBJS-$(CONFIG_AFORMAT_FILTER)                += af_aformat.o
> +OBJS-$(CONFIG_AMERGE_FILTER)                 += af_amerge.o
>  OBJS-$(CONFIG_ANULL_FILTER)                  += af_anull.o
>  OBJS-$(CONFIG_ARESAMPLE_FILTER)              += af_aresample.o
>  OBJS-$(CONFIG_ASHOWINFO_FILTER)              += af_ashowinfo.o
> diff --git a/libavfilter/af_amerge.c b/libavfilter/af_amerge.c
> new file mode 100644
> index 0000000..fa86d26
> --- /dev/null
> +++ b/libavfilter/af_amerge.c
> @@ -0,0 +1,273 @@
> +/*
> + * Copyright (c) 2011 Nicolas George <nicolas.george at normalesup.org>
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +/**
> + * @file
> + * Audio merging filter
> + */
> +

> +#include <stdlib.h>
> +#include "libavcodec/avcodec.h"

unnecessary?

> +#include "libavutil/avstring.h"
> +#include "libswresample/swresample.h" // only for SWR_CH_MAX
> +#include "avfilter.h"
> +#include "internal.h"
> +
> +#define QUEUE_SIZE 16
> +
> +struct amerge_context {

> +    int route[SWR_CH_MAX];
> +    int nb_in_ch[2];

these may benefit from a doxy

> +    int bps;
> +    struct amerge_queue {
> +        AVFilterBufferRef *buf[QUEUE_SIZE];
> +        int nb_buf, nb_samples, pos;
> +    } queue[2];
> +};

consistency nit: struct amerge_context -> AMergeContext

> +
> +static av_cold int init(AVFilterContext *ctx, const char *args0, void *opaque)
> +{
> +    return 0;
> +}

unnecessary?

> +
> +static av_cold void uninit(AVFilterContext *ctx)
> +{
> +    struct amerge_context *am = ctx->priv;
> +    int i, j;
> +
> +    for (i = 0; i < 2; i++)
> +        for (j = 0; j < am->queue[i].nb_buf; j++)
> +            avfilter_unref_buffer(am->queue[i].buf[j]);
> +}

This reminds me that we still don't properly support flush.

> +
> +static int query_formats(AVFilterContext *ctx)
> +{
> +    struct amerge_context *am = ctx->priv;
> +    int64_t inlayout[2], outlayout;
> +    const int packing_fmts[] = { AVFILTER_PACKED, -1 };
> +    AVFilterFormats *formats;
> +    int i;
> +
> +    for (i = 0; i < 2; i++) {
> +        if (!ctx->inputs[i]->in_chlayouts ||
> +            !ctx->inputs[i]->in_chlayouts->format_count) {
> +            av_log(ctx, AV_LOG_ERROR,
> +                   "No channel layout for input %d\n", i + 1);
> +            return AVERROR(EINVAL);
> +        }
> +        inlayout[i] = ctx->inputs[i]->in_chlayouts->formats[0];
> +        if (ctx->inputs[i]->in_chlayouts->format_count > 1) {
> +            char buf[256];
> +            av_get_channel_layout_string(buf, sizeof(buf), 0, inlayout[i]);
> +            av_log(ctx, AV_LOG_INFO, "Using \"%s\" for input %d\n", buf, i + 1);
> +        }
> +        am->nb_in_ch[i] = av_get_channel_layout_nb_channels(inlayout[i]);
> +    }

> +    if (am->nb_in_ch[0] + am->nb_in_ch[1] > SWR_CH_MAX) {
> +        av_log(ctx, AV_LOG_ERROR, "Too many channels\n");
> +        return AVERROR(EINVAL);
> +    }

maybe mention the value of SWR_CH_MAX

> +    if (inlayout[0] & inlayout[1]) {
> +        av_log(ctx, AV_LOG_WARNING,
> +               "Inputs overlap: output layout will be meaningless\n");
> +        for (i = 0; i < am->nb_in_ch[0] + am->nb_in_ch[1]; i++)
> +            am->route[i] = i;
> +        outlayout = av_get_default_channel_layout(am->nb_in_ch[0] +
> +                                                  am->nb_in_ch[1]);
> +        if (!outlayout)
> +            outlayout = ((int64_t)1 << (am->nb_in_ch[0] + am->nb_in_ch[1])) - 1;
> +    } else {
> +        int *route[2] = { am->route, am->route + am->nb_in_ch[0] };

> +        int c, out_no = 0;

nit: out_no is confusing, indeed I can't even fid a better name, maybe you can

> +
> +        outlayout = inlayout[0] | inlayout[1];
> +        for (c = 0; c < 64; c++)
> +            for (i = 0; i < 2; i++)
> +                if ((inlayout[i] >> c) & 1)
> +                    *(route[i]++) = out_no++;
> +    }
> +    formats = avfilter_make_all_formats(AVMEDIA_TYPE_AUDIO);
> +    avfilter_set_common_sample_formats(ctx, formats);
> +    formats = avfilter_make_format_list(packing_fmts);
> +    avfilter_set_common_packing_formats(ctx, formats);
> +    for (i = 0; i < 2; i++) {
> +        formats = NULL;
> +        avfilter_add_format(&formats, inlayout[i]);
> +        avfilter_formats_ref(formats, &ctx->inputs[i]->out_chlayouts);
> +    }
> +    formats = NULL;
> +    avfilter_add_format(&formats, outlayout);
> +    avfilter_formats_ref(formats, &ctx->outputs[0]->in_chlayouts);
> +    return 0;
> +}
> +
> +static int config_output(AVFilterLink *outlink)
> +{
> +    AVFilterContext *ctx = outlink->src;
> +    struct amerge_context *am = ctx->priv;
> +
> +    if (ctx->inputs[0]->sample_rate != ctx->inputs[1]->sample_rate) {
> +        av_log(ctx, AV_LOG_ERROR,
> +               "Inputs must have the same sample rate "
> +               "(%"PRIi64" vs %"PRIi64")\n",
> +               ctx->inputs[0]->sample_rate, ctx->inputs[1]->sample_rate);
> +        return AVERROR(EINVAL);
> +    }
> +    am->bps = av_get_bytes_per_sample(ctx->outputs[0]->format);
> +    outlink->sample_rate = ctx->inputs[0]->sample_rate;
> +    outlink->time_base   = ctx->inputs[0]->time_base;

here it will be extremely helpful an INFO log of the kind:
in_chl1:%s in_chl2:%s -> out_chl:%s

> +    return 0;
> +}
> +
> +static int request_frame(AVFilterLink *outlink)
> +{
> +    AVFilterContext *ctx = outlink->src;
> +    struct amerge_context *am = ctx->priv;
> +    int i;
> +
> +    for (i = 0; i < 2; i++)
> +        if (!am->queue[i].nb_samples)
> +            avfilter_request_frame(ctx->inputs[i]);

What when one stream ends? I see two possible policies, the filter may
continue to stream and fill with zeros the finished channels, or
return EOF when the first one finishes (configurable?).

> +    return 0;
> +}
> +

> +static inline void copy_samples(int nb_in_ch[2], int *route0, uint8_t *ins[2],
> +                                uint8_t **outs, int ns, int bps)

please add some doxy

>From my understanding:

/**
 * Copy samples from the two input channels in ins[2] to outs.
 * Samples data is assumed to be in packed format.
 *
 * @param nb_in_ch the number of channels in the two input buffers.
 * @param route0 int arrays which provides a map from the input to the
 *               output channel, has the form:
 *               [outch1_1 outch1_2 ... outch1_N1 outch2_1 ... outch2_N2]
 * @param ns number of samples per channel to copy to output
 */
static inline void copy_samples(int nb_in_ch[2], int *route0, uint8_t *ins[2],
                                uint8_t **outs, int ns, int bps)

> +{
> +    int *route;
> +    int i, c;
> +
> +    while (ns--) {
> +        route = route0;
> +        for (i = 0; i < 2; i++) {
> +            for (c = 0; c < nb_in_ch[i]; c++) {
> +                memcpy((*outs) + bps * *(route++), ins[i], bps);
> +                ins[i] += bps;
> +            }
> +        }
> +        *outs += (nb_in_ch[0] + nb_in_ch[1]) * bps;
> +    }
> +}
> +
> +static void filter_samples(AVFilterLink *inlink, AVFilterBufferRef *insamples)
> +{
> +    AVFilterContext *ctx = inlink->dst;
> +    struct amerge_context *am = ctx->priv;

> +    int in_no = inlink == ctx->inputs[1];

suggestion: inlink_id (more readable to me)

> +    struct amerge_queue *iq = &am->queue[in_no];
> +    int nb_samples, ns, i;
> +    AVFilterBufferRef *outbuf, **ibuf[2];

ibuf -> inbuf?

> +    uint8_t *ins[2], *outs;
> +
> +    if (iq->nb_buf == QUEUE_SIZE) {
> +        av_log(ctx, AV_LOG_ERROR, "Packet queue overflow; dropped\n");
> +        avfilter_unref_buffer(insamples);
> +        return;
> +    }

> +    iq->buf[iq->nb_buf++] = avfilter_ref_buffer(insamples, AV_PERM_READ |
> +                                                           AV_PERM_PRESERVE);

Is AV_PERM_PRESERVE really necessary?

> +    iq->nb_samples += insamples->audio->nb_samples;
> +    avfilter_unref_buffer(insamples);
> +    if (!am->queue[!in_no].nb_samples)
> +        return;
> +
> +    nb_samples = FFMIN(am->queue[0].nb_samples,
> +                       am->queue[1].nb_samples);
> +    outbuf = avfilter_get_audio_buffer(ctx->outputs[0], AV_PERM_WRITE,
> +                                       nb_samples);
> +    outs = outbuf->data[0];
> +    for (i = 0; i < 2; i++) {
> +        ibuf[i] = am->queue[i].buf;
> +        ins[i] = (*ibuf[i])->data[0] +
> +                 am->queue[i].pos * am->nb_in_ch[i] * am->bps;
> +    }
> +    while (nb_samples) {
> +        ns = nb_samples;
> +        for (i = 0; i < 2; i++)
> +            ns = FFMIN(ns, (*ibuf[i])->audio->nb_samples - am->queue[i].pos);

> +        /* Unroll the most common sample formats: speed +~350% for the loop,
> +           +~13% overall (including two common decoders) */

That seems weird to me, but I'm possibly naive about what the compiler
is expected to do, I'd expect that a const int bps in copy_samples()
would have the same effect...

> +        switch (am->bps) {
> +            case 1:
> +                copy_samples(am->nb_in_ch, am->route, ins, &outs, ns, 1);
> +                break;
> +            case 2:
> +                copy_samples(am->nb_in_ch, am->route, ins, &outs, ns, 2);
> +                break;
> +            case 4:
> +                copy_samples(am->nb_in_ch, am->route, ins, &outs, ns, 4);
> +                break;
> +            default:
> +                copy_samples(am->nb_in_ch, am->route, ins, &outs, ns, am->bps);
> +                break;
> +        }
> +
> +        nb_samples -= ns;
> +        for (i = 0; i < 2; i++) {
> +            am->queue[i].nb_samples -= ns;
> +            am->queue[i].pos += ns;
> +            if (am->queue[i].pos == (*ibuf[i])->audio->nb_samples) {
> +                am->queue[i].pos = 0;
> +                avfilter_unref_buffer(*ibuf[i]);
> +                *ibuf[i] = NULL;
> +                ibuf[i]++;
> +                ins[i] = *ibuf[i] ? (*ibuf[i])->data[0] : NULL;
> +            }
> +        }
> +    }
> +    for (i = 0; i < 2; i++) {
> +        int nbufused = ibuf[i] - am->queue[i].buf;
> +        if (nbufused) {
> +            am->queue[i].nb_buf -= nbufused;
> +            memmove(am->queue[i].buf, ibuf[i],
> +                    am->queue[i].nb_buf * sizeof(**ibuf));
> +        }
> +    }
> +    avfilter_filter_samples(ctx->outputs[0], outbuf);
> +}
> +
> +AVFilter avfilter_af_amerge = {
> +    .name          = "amerge",

> +    .description   = NULL_IF_CONFIG_SMALL("Merge two audio streams into "
> +                                          "a single multi-channel stream"),

nit+: missing final dot

And thanks for taking care to write this very important filter.
-- 
FFmpeg = Fostering and Fanciful Magic Problematic Ecumenical God