[FFmpeg-soc] [PATCH] Add af_sox initial draft

Sun Aug 15 20:08:57 CEST 2010

On date Sunday 2010-08-15 00:05:07 -0700, S.N. Hemanth Meenakshisundaram encoded:
> 
> Sox wrapper for lavfi:
> 
> 1. sox internally uses only integer 32-bit signed format for all its
> effects. It can convert to S32 if other format inputs are supplied but I
> guess we would want to do any such conversions internally (using
> inserted resample filter) and only send s32 data to sox?

Yes.

> 2. sox reads data from an 'input handler' effect in af_sox that forms
> the first effect of the chain and writes data out to last effect
> 'output_handler' again defined in af_sox. Unfortunately, it does not
> guarantee when an input will be processed and output. Also inserting x
> samples need not mean all of them will be processed together. Hence the
> current design is as follows:
> 
> i. i/p is assumed to be s32 (achieved with a resample filter inserted
> ahead) and o/p is also changed to other formats if necessary by another
> af_resample. The resample filter will also serve to keep channels constant.
> 
> ii. The effects chain (including input and output handlers) is built and
> configured during init.
> 
> iii. Incoming s32 samples (coming in via filter samples) are added to an
> input fifo.
> 
> iv. When sox requests data through its 'input handler' effect, we pull
> data from the fifo and copy it into the buffer provided by sox for input.
> 
> v. When sox outputs any data, we queue it to an output fifo.
> 
> vi. When the next lavfi filter calls request_frame, we create a lavfi
> audio buffer of predetermined size and copy data from the output fifo
> into it and pass it on to the next filter.

Looks fine.

If I understood it correctly so we have this design:
[lavfi input fifo] [sox input handler] [sox effects] [sox output handler] [lavfi output fifo]

> Please review code and comment on the design as well. I will post
> updates as I make more changes.
> 
> Regards,
> 
> ---
>  libavfilter/Makefile     |    1 +
>  libavfilter/af_sox.c     |  323 ++++++++++++++++++++++++++++++++++++++++++++++
>  libavfilter/allfilters.c |    1 +
>  3 files changed, 325 insertions(+), 0 deletions(-)
>  create mode 100644 libavfilter/af_sox.c

Please add the required configure magic, for examples check the
drawtext filter or the opencv filter I recently posted on devel.

> diff --git a/libavfilter/Makefile b/libavfilter/Makefile
> index 920f428..4c4ec74 100644
> --- a/libavfilter/Makefile
> +++ b/libavfilter/Makefile
> @@ -17,6 +17,7 @@ OBJS = allfilters.o                                                     \
>  OBJS-$(CONFIG_AFIFO_FILTER)                  += af_afifo.o
>  OBJS-$(CONFIG_NULLAUD_FILTER)                += af_nullaud.o
>  OBJS-$(CONFIG_ASPLIT_FILTER)                 += af_asplit.o
> +OBJS-$(CONFIG_SOX_FILTER)                    += af_sox.o
>  
>  OBJS-$(CONFIG_ASPECT_FILTER)                 += vf_aspect.o
>  OBJS-$(CONFIG_CROP_FILTER)                   += vf_crop.o
> diff --git a/libavfilter/af_sox.c b/libavfilter/af_sox.c
> new file mode 100644
> index 0000000..eb1b8aa
> --- /dev/null
> +++ b/libavfilter/af_sox.c
> @@ -0,0 +1,323 @@
> +/*
> + * copyright (c) 2010 S.N. Hemanth Meenakshisundaram
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +/**
> + * @file
> + * Sox Filter
> + */
> +
> +#include "avfilter.h"
> +#include "libavcodec/audioconvert.h"
> +#include "libavutil/fifo.h"
> +#include <sox/sox.h>
> +#include <assert.h>
> +
> +typedef struct {
> +    sox_effects_chain_t * chain; ///< handle to sox effects chain.
> +    AVFifoBuffer *in_fifo;       ///< fifo buffer of input audio frame pointers
> +    AVFifoBuffer *out_fifo;      ///< fifo buffer of output audio data from sox
> +    int64_t ch_layout;           ///< channel layout of data handled
> +    int nb_channels;             ///< number of channels in our channel layout
> +    int out_size;                ///< desired size of each output audio buffer
> +} SoxContext;
> +
> +static int query_formats(AVFilterContext *ctx)
> +{
> +    // Sox effects only operate on signed 32-bit integer audio data.
> +    enum SampleFormat sample_fmts[] = {
> +        SAMPLE_FMT_S32, PIX_FMT_NONE
> +    };
> +
> +    avfilter_set_common_formats(ctx, avfilter_make_format_list(sample_fmts));
> +    return 0;
> +}
> +
> +typedef struct {
> +    SoxContext *lavfi_ctx;
> +} sox_inout_ctx;
> +

> +// configures the sox effect for sample input.
> +static int inout_config_opts(sox_effect_t * effp, int argc, char **argv)

please call effp => effect, here and below

> +{
> +    SoxContext *sox;
> +    if (argc < 2) {
> +        lsx_fail("lavfi context not supplied");
> +        return (SOX_EOF);
> +    }
> +    sscanf(argv[1], "%ld", (long int *)&sox);
> +    ((sox_inout_ctx *)effp->priv)->lavfi_ctx = sox;
> +}
> +
> +/**
> + * a sox effect handler to handle input of samples to the effects chain.
> + * The function that will be called to input samples into the effects chain.
> + */
> +static int input_drain(sox_effect_t *effp,

> +                       sox_sample_t *obuf, size_t * osamp) {

Maybe:
effp    => effect
obuf    => [o_ or out_ or output_]samples
osample => [o_ or out_ or output_]samples_size

are more clear names.

Nit: '{' on its own line.

> +
> +    SoxContext *sox = ((sox_inout_ctx *)effp->priv)->lavfi_ctx;
> +    AVFilterBufferRef *samplesref;
> +    int input_nb_samples = 0;
> +
> +    if (!av_fifo_size(sox->in_fifo)) {
> +        av_log(sox, AV_LOG_DEBUG,
> +               "sox chain requested audio data when none available, sending silence!\n");
> +        memset(obuf, 0, *osamp);
> +        return SOX_SUCCESS;
> +    }
> +
> +    // read first audio frame from queued input buffers and give it to sox.
> +    av_fifo_generic_read(sox->in_fifo, &samplesref, sizeof(samplesref), NULL);
> +
> +    /**
> +     * inside lavfi, nb_samples is number of samples in each channel, while in sox
> +     * number of samples refers to the total number over all channels
> +     */
> +    input_nb_samples = samplesref->audio->samples_nb * sox->nb_channels;
> +
> +    // ensure that *osamp is a multiple of the number of channels.
> +    *osamp -= *osamp % sox->nb_channels;
> +
> +    /**
> +     * FIXME: Right now, if sox chain accepts fewer samples than in one buffer, we drop
> +     * remaining data. We should be taking the required data and preserving the rest.
> +     * Luckily, this is highly unlikely.
> +     */

> +    if (*osamp < input_nb_samples)
> +        input_nb_samples = *osamp;

I have a doubt about this, is osamp a numer of samples or the used size
of the samples buffer?
In the latter case this check looks wrong.

> +
> +    memcpy(obuf, samplesref->data[0], input_nb_samples*sizeof(int));
> +    *osamp = input_nb_samples;
> +
> +    avfilter_unref_buffer(samplesref);
> +    return SOX_SUCCESS;
> +}
> +
> +/**
> + * a sox effect handler to handle output of samples to the effects chain.
> + * The function that will be called to output samples from the effects chain.
> + */
> +static int output_flow(sox_effect_t *effp UNUSED, sox_sample_t const * ibuf,
> +                       sox_sample_t * obuf UNUSED, size_t * isamp, size_t * osamp)

Nits: * foo => *foo

> +{
> +    
> +    SoxContext *sox = ((sox_inout_ctx *)effp->priv)->lavfi_ctx;
> +
> +    // If our fifo runs out of space, we just drop this frame and keep going.
> +    if (av_fifo_space(sox->out_fifo) < *isamp * sizeof(int)) {
> +        av_log(NULL, AV_LOG_ERROR,
> +               "Buffering limit reached. Sox output data being dropped.\n");
> +        return SOX_SUCCESS;
> +    }
> +
> +    av_fifo_generic_write(sox->out_fifo, (void *)ibuf, *isamp, NULL);
> +
> +    // Set *osamp to 0 since this is the last effect of the sox chain.
> +    *osamp = 0;
> +
> +    return SOX_SUCCESS; // All samples output successfully.
> +}
> +
> +static sox_effect_handler_t const * input_handler(void)
> +{
> +    static sox_effect_handler_t handler = {
> +        "input", NULL, SOX_EFF_MCHAN, inout_config_opts, NULL, NULL,
> +        input_drain, NULL, NULL, sizeof(SoxContext *)
> +    };
> +    return &handler;
> +}
> +
> +// a sox effect handler to handle output of samples from the effects chain.
> +static sox_effect_handler_t const * output_handler(void)
> +{
> +    static sox_effect_handler_t handler = {
> +        "output", NULL, SOX_EFF_MCHAN, inout_config_opts, NULL,
> +        output_flow, NULL, NULL, NULL, 0
> +    };
> +    return &handler;
> +}
> +
> +#define MAX_EFFECT_ARGS 10
> +#define INFIFO_SIZE 8
> +#define OUTFIFO_SIZE 8192
> +#define OUT_FRAME_SIZE 1024
> +
> +static inline int add_effect_and_setopts(AVFilterContext *ctx, char *effect_str, sox_signalinfo_t *signal)
> +{
> +    SoxContext *sox = ctx->priv;
> +    int nargs = -1, err = 0;
> +    char *effect = NULL, *args[MAX_EFFECT_ARGS];
> +    sox_effect_t *e = NULL;

Some naming nits:
nargs => nb_args
e     => effect

> +
> +    effect = strtok(effect_str, " ");

effect_str = strtok(effect_str, "");

> +    e = sox_create_effect(sox_find_effect(effect));
> +    if (!e) {
> +        av_log(ctx, AV_LOG_ERROR, "No such sox effect: '%s'.\n", effect);
> +        return AVERROR(EINVAL);
> +    }
> +
> +    while (args[nargs++] = strtok(NULL, " "))

strtok is not re-entrant and should be avoided when used this
way. strtok_r is re-entrant but is not ISO C99, so we may need to add
a check for that (and eventually disable this filter in this case).

> +    ;
> +    if ((err = sox_add_effect(sox->chain, e, signal, signal)) != SOX_SUCCESS) {
> +        av_log(ctx, AV_LOG_ERROR, "Sox error: '%s'.\n", sox_strerror(err));
> +        return AVERROR(EINVAL);
> +    }
> +    if ((err = sox_effect_options(e, nargs, args)) != SOX_SUCCESS) {
> +        av_log(ctx, AV_LOG_ERROR, "Sox error: '%s'.\n", sox_strerror(err));
> +        return AVERROR(EINVAL);
> +    }
> +
> +    return 0;
> +}
> +
> +static av_cold int init(AVFilterContext *ctx, const char *args, void *opaque)
> +{
> +    SoxContext *sox = ctx->priv;
> +    sox_effects_chain_t * chain;
> +    sox_effect_t *e = NULL;
> +    unsigned int bits_per_sample = 32;
> +    sox_encodinginfo_t enc = {SOX_DEFAULT_ENCODING, bits_per_sample,
> +                              0.0, 0, 0, 0, 0};
> +    sox_signalinfo_t in_signal_info = {SOX_UNSPEC};
> +    char *token = NULL, param[10];
> +    int err = 0;
> +
> +    if ((err = sox_init()) != SOX_SUCCESS) {
> +        av_log(ctx, AV_LOG_ERROR, "Sox error: '%s'.\n", sox_strerror(err));
> +        return AVERROR(EINVAL);
> +    }
> +
> +    chain = sox_create_effects_chain(&enc, &enc);
> +    sox->chain = chain;
> +
> +    snprintf(param, sizeof(param), "%ld", (long int)sox);
> +    // Set up the audio buffer source as first effect of the chain.
> +    e = sox_create_effect(input_handler());
> +    if ((err = sox_add_effect(chain, e, &in_signal_info, &in_signal_info)) != SOX_SUCCESS) {
> +        av_log(ctx, AV_LOG_ERROR, "Sox error: '%s'.\n", sox_strerror(err));
> +        return AVERROR(EINVAL);
> +    }
> +    if ((err = sox_effect_options(e, 1, param)) != SOX_SUCCESS) {
> +        av_log(ctx, AV_LOG_ERROR, "Sox error: '%s'.\n", sox_strerror(err));
> +        return AVERROR(EINVAL);
> +    }
> +
> +    token = strtok (args, ":");
> +    while (token) {
> +        if ((err = add_effect_and_setopts(ctx, token, &in_signal_info))) {
> +            av_log(ctx, AV_LOG_ERROR, "Invalid sox argument: '%s'.\n", token);
> +            return err;
> +        }
> +        token = strtok (NULL, ":");
> +    }
> +
> +    e = sox_create_effect(output_handler());
> +    if ((err = sox_add_effect(chain, e, &in_signal_info, &in_signal_info)) != SOX_SUCCESS) {
> +        av_log(ctx, AV_LOG_ERROR, "Sox error: '%s'.\n", sox_strerror(err));
> +        return AVERROR(EINVAL);
> +    }
> +    if ((err = sox_effect_options(e, 1, param)) != SOX_SUCCESS) {
> +        av_log(ctx, AV_LOG_ERROR, "Sox error: '%s'.\n", sox_strerror(err));
> +        return AVERROR(EINVAL);
> +    }
> +
> +    sox->in_fifo = av_fifo_alloc(INFIFO_SIZE*sizeof(AVFilterBufferRef*));
> +    // the output data fifo stores samples in sox's native s32 integer format.
> +    sox->out_size = OUT_FRAME_SIZE; // FIXME: Make this configurable;
> +    sox->out_fifo = av_fifo_alloc(OUTFIFO_SIZE*sizeof(int));
> +    return 0;
> +}
> +
> +static av_cold void uninit(AVFilterContext *ctx)
> +{
> +    SoxContext *sox = ctx->priv;
> +    sox_quit();
> +    av_fifo_free(sox->in_fifo);
> +    av_fifo_free(sox->out_fifo);
> +}
> +

> +static int config_input(AVFilterLink *link)
> +{
> +    SoxContext *sox = link->dst->priv;
> +
> +    if (link->format != SAMPLE_FMT_S32) {
> +        av_log(link->dst, AV_LOG_ERROR, "Sox needs signed 32-bit input samples, insert resample filter.");
> +        return AVERROR(EINVAL);
> +    }

Possibly this should be avoided. We need a format negotiation system
like it is implemented for video.

> +    // store channel layout and number of channels, insert resample filter to keep this constant.
> +    sox->ch_layout = link->channel_layout;
> +    sox->nb_channels = avcodec_channel_layout_num_channels(link->channel_layout);
> +
> +    return 0;
> +}
> +
> +static void filter_samples(AVFilterLink *link, AVFilterBufferRef *samplesref)
> +{
> +    SoxContext *sox = link->dst->priv;
> +
> +    if (av_fifo_space(sox->in_fifo) < sizeof(samplesref)) {
> +        av_log(NULL, AV_LOG_ERROR,
> +               "Buffering limit reached. Please allow sox to consume some available frames before adding new ones.\n");
> +        return;
> +    }
> +
> +    av_fifo_generic_write(sox->in_fifo, &samplesref, sizeof(samplesref), NULL);
> +
> +    return;
> +}
> +
> +static int request_frame(AVFilterLink *link)
> +{
> +    SoxContext *sox = link->src->priv;
> +    AVFilterBufferRef *samplesref;
> +
> +    if (!av_fifo_size(sox->out_fifo)) {
> +        av_log(link->src, AV_LOG_ERROR,
> +               "request_frame() called with no available data from sox!\n");
> +    }
> +
> +    // libsox uses packed audio data internally.
> +    samplesref = avfilter_get_audio_buffer(link, AV_PERM_WRITE, SAMPLE_FMT_S32,
> +                                           sox->out_size, sox->ch_layout, 0);
> +
> +    av_fifo_generic_read(sox->out_fifo, samplesref->data[0], sox->out_size, NULL);
> +    filter_samples(link, samplesref);
> +    return 0;
> +}
> +
> +AVFilter avfilter_af_sox = {
> +    .name        = "sox",

> +    .description = NULL_IF_CONFIG_SMALL("Draw text on top of audio frames using libfreetype library."),

Ehm...

[...]

Also please add a description to filter, that should help to figure
out how to use it.

Regards.