[FFmpeg-devel] Fun with aevalsrc + audio testing

Nicolas George nicolas.george at normalesup.org
Sun May 20 16:24:33 CEST 2012


Le duodi 2 prairial, an CCXX, Stefano Sabatini a écrit :
> Yes, but not natively, check attachment, test with:
> ffplay -f lavfi "flite=text='Finned Francis molds plenty eager pomegranates'"

Very nice idea.

Unfortunately, if depends on an external library, it can not used for tests.

(When I was young, I had games on Atari ST that could speak. That was ugly
and expressionless, but it could be understood, and it was done with only
three squarewave channels. Isn't there someone who knows how to do it
still?)

Also, it would be nice to be able to produce voice on different channels,
such as "[FL]front left [FR]front right [BL]back left [BR]back right": to
test channel layout problems, this is very useful. And it is probably quite
easy to implement.

> >From 16bf1143e860835b4b26c552bd2d86421c999a99 Mon Sep 17 00:00:00 2001
> From: Stefano Sabatini <stefano.sabatini-lala at poste.it>
> Date: Sun, 21 Aug 2011 02:29:33 +0200
> Subject: [PATCH] lavfi: add flite audio source
> 
> ---
>  configure                |    4 +
>  libavfilter/Makefile     |    1 +
>  libavfilter/allfilters.c |    1 +
>  libavfilter/asrc_flite.c |  179 ++++++++++++++++++++++++++++++++++++++++++++++
>  4 files changed, 185 insertions(+), 0 deletions(-)
>  create mode 100644 libavfilter/asrc_flite.c
> 
> diff --git a/configure b/configure
> index e070c0b..4a3a7ea 100755
> --- a/configure
> +++ b/configure
> @@ -181,6 +181,7 @@ External library support:
>    --enable-libdc1394       enable IIDC-1394 grabbing using libdc1394
>                             and libraw1394 [no]
>    --enable-libfaac         enable FAAC support via libfaac [no]
> +  --enable-libflite        enable flite (voice synthesis) support via libflite [no]
>    --enable-libfreetype     enable libfreetype [no]
>    --enable-libgsm          enable GSM support via libgsm [no]
>    --enable-libmodplug      enable ModPlug via libmodplug [no]
> @@ -1037,6 +1038,7 @@ CONFIG_LIST="
>      libcelt
>      libdc1394
>      libfaac
> +    libflite
>      libfreetype
>      libgsm
>      libmodplug
> @@ -1682,6 +1684,7 @@ colormatrix_filter_deps="gpl"
>  cropdetect_filter_deps="gpl"
>  delogo_filter_deps="gpl"
>  drawtext_filter_deps="libfreetype"
> +flite_filter_deps="libflite"
>  frei0r_filter_deps="frei0r dlopen"
>  frei0r_filter_extralibs='$ldl'
>  frei0r_src_filter_deps="frei0r dlopen"
> @@ -3210,6 +3213,7 @@ enabled libcelt    && require libcelt celt/celt.h celt_decode -lcelt0 &&
>                        { check_lib celt/celt.h celt_decoder_create_custom -lcelt0 ||
>                          die "ERROR: libcelt version must be >= 0.11.0."; }
>  enabled libfaac    && require2 libfaac "stdint.h faac.h" faacEncGetVersion -lfaac
> +enabled libflite   && require2 libflite "flite/flite.h" flite_init -lflite_cmu_us_kal -lflite_usenglish -lflite_cmulex -lflite
>  enabled libfreetype && require_pkg_config freetype2 "ft2build.h freetype/freetype.h" FT_Init_FreeType
>  enabled libgsm     && require  libgsm gsm/gsm.h gsm_create -lgsm
>  enabled libmodplug && require  libmodplug libmodplug/modplug.h ModPlug_Load -lmodplug
> diff --git a/libavfilter/Makefile b/libavfilter/Makefile
> index 70a4f60..1789cb5 100644
> --- a/libavfilter/Makefile
> +++ b/libavfilter/Makefile
> @@ -62,6 +62,7 @@ OBJS-$(CONFIG_VOLUME_FILTER)                 += af_volume.o
>  OBJS-$(CONFIG_AEVALSRC_FILTER)               += asrc_aevalsrc.o
>  OBJS-$(CONFIG_AMOVIE_FILTER)                 += src_movie.o
>  OBJS-$(CONFIG_ANULLSRC_FILTER)               += asrc_anullsrc.o
> +OBJS-$(CONFIG_FLITE_FILTER)                  += asrc_flite.o
>  
>  OBJS-$(CONFIG_ABUFFERSINK_FILTER)            += sink_buffer.o
>  OBJS-$(CONFIG_ANULLSINK_FILTER)              += asink_anullsink.o
> diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
> index aaaef19..cbb30bc 100644
> --- a/libavfilter/allfilters.c
> +++ b/libavfilter/allfilters.c
> @@ -52,6 +52,7 @@ void avfilter_register_all(void)
>      REGISTER_FILTER (AEVALSRC,    aevalsrc,    asrc);
>      REGISTER_FILTER (AMOVIE,      amovie,      asrc);
>      REGISTER_FILTER (ANULLSRC,    anullsrc,    asrc);
> +    REGISTER_FILTER (FLITE,       flite,       asrc);
>  
>      REGISTER_FILTER (ABUFFERSINK, abuffersink, asink);
>      REGISTER_FILTER (ANULLSINK,   anullsink,   asink);
> diff --git a/libavfilter/asrc_flite.c b/libavfilter/asrc_flite.c
> new file mode 100644
> index 0000000..5ddf9f9
> --- /dev/null
> +++ b/libavfilter/asrc_flite.c
> @@ -0,0 +1,179 @@
> +/*
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +/**
> + * @file
> + * flite voice synth source
> + */
> +
> +#include <flite/flite.h>
> +#include "libavutil/audioconvert.h"
> +#include "libavutil/file.h"
> +#include "libavutil/opt.h"
> +#include "avfilter.h"
> +#include "audio.h"
> +#include "internal.h"
> +
> +typedef struct {
> +    const AVClass *class;
> +    char *voice_str;
> +    char *textfile;
> +    char *text;
> +    cst_wave *wave;
> +    int16_t *wave_samples;
> +    int      wave_nb_samples;
> +    int64_t pts;
> +} FliteContext;
> +
> +#define OFFSET(x) offsetof(FliteContext, x)
> +
> +static const AVOption flite_options[]= {
> +    {"textfile", "set text filename to speech", OFFSET(textfile),  FF_OPT_TYPE_STRING, {.str=NULL},  CHAR_MIN, CHAR_MAX },
> +    {"text",     "set text to speech",          OFFSET(text),      FF_OPT_TYPE_STRING, {.str=NULL},  CHAR_MIN, CHAR_MAX },
> +    {"voice",    "set voice",                   OFFSET(voice_str), FF_OPT_TYPE_STRING, {.str=NULL},  CHAR_MIN, CHAR_MAX },
> +    {NULL}
> +};
> +
> +static const char *flite_get_name(void *ctx)
> +{
> +    return "flite";
> +}
> +
> +static const AVClass flite_class = {
> +    "FlitetContext",

Extra "t".

> +    flite_get_name,
> +    flite_options
> +};

Since it is starting to be widely used, we could have a macro to do that:

FF_MAKE_CLASS(flite, FliteContext)

> +
> +cst_voice *register_cmu_us_kal(void *ptr);

What is that?

> +
> +static int init(AVFilterContext *ctx, const char *args, void *opaque)
> +{
> +    FliteContext *flite = ctx->priv;
> +    int err = 0;
> +    cst_voice *voice;
> +
> +    flite->class = &flite_class;
> +    av_opt_set_defaults(flite);
> +
> +    if ((err = (av_set_options_string(flite, args, "=", ":"))) < 0) {
> +        av_log(ctx, AV_LOG_ERROR, "Error parsing options string: '%s'\n", args);
> +        return err;
> +    }

... and we could have a class field in AVFilter to let lavfi do that.

> +
> +    if ((err = flite_init())) {

Does this support being called several times?

> +        av_log(ctx, AV_LOG_ERROR, "Could not init flite");
> +        return AVERROR(EINVAL);

Nit: AVERROR_UNKNOWN?

> +    }
> +
> +    voice = register_cmu_us_kal(NULL);
> +
> +    if (flite->textfile) {
> +        uint8_t *textbuf;
> +        size_t textbuf_size;
> +
> +        if (flite->text) {
> +            av_log(ctx, AV_LOG_ERROR,
> +                   "Both text and text file provided. Please provide only one\n");
> +            return AVERROR(EINVAL);
> +        }
> +        if ((err = av_file_map(flite->textfile, &textbuf, &textbuf_size, 0, ctx)) < 0) {

We can not use it with a pipe. Don't we have some kind of av_read_file?
Apparently not. I can submit a patch for that.

> +            av_log(ctx, AV_LOG_ERROR,
> +                   "The text file '%s' could not be read or is empty\n",
> +                   flite->textfile);
> +            return err;
> +        }
> +
> +        if (!(flite->text = av_malloc(textbuf_size+1)))
> +            return AVERROR(ENOMEM);
> +        memcpy(flite->text, textbuf, textbuf_size);
> +        flite->text[textbuf_size] = 0;
> +        av_file_unmap(textbuf, textbuf_size);
> +    }
> +
> +    /* synth all the file data in block */
> +    flite->wave = flite_text_to_wave(flite->text, voice);
> +    flite->wave_samples    = flite->wave->samples;
> +    flite->wave_nb_samples = flite->wave->num_samples;
> +    return 0;
> +}
> +
> +static int config_props(AVFilterLink *outlink)
> +{
> +    FliteContext *flite = outlink->src->priv;
> +
> +    outlink->sample_rate = flite->wave->sample_rate;
> +    outlink->time_base = (AVRational){1, flite->wave->sample_rate};
> +    return 0;
> +}
> +
> +static int query_formats(AVFilterContext *ctx)
> +{
> +    FliteContext *flite = ctx->priv;
> +    static enum AVSampleFormat sample_fmts[] = { AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_NONE };
> +    int64_t chlayouts[] = { av_get_default_channel_layout(flite->wave->num_channels), -1 };
> +    int sample_rates[] = { flite->wave->sample_rate, -1 };
> +
> +    avfilter_set_common_sample_formats (ctx, avfilter_make_format_list(sample_fmts));
> +    ff_set_common_channel_layouts(ctx, avfilter_make_format64_list(chlayouts));
> +    ff_set_common_samplerates(ctx, avfilter_make_format_list(sample_rates));

Maybe we could have ff_make_format(64)_list_single for this kind of
situation.

> +
> +    return 0;
> +}
> +
> +static int request_frame(AVFilterLink *outlink)
> +{
> +    AVFilterBufferRef *samplesref;
> +    FliteContext *flite = outlink->src->priv;
> +    int nb_samples = FFMIN(flite->wave_nb_samples, 512);

Is there a particular reason for 512?

> +
> +    if (!nb_samples)
> +        return AVERROR_EOF;
> +
> +    samplesref = ff_get_audio_buffer(outlink, AV_PERM_WRITE, nb_samples);
> +
> +    memcpy(samplesref->data[0], flite->wave_samples,
> +           nb_samples * flite->wave->num_channels * 2);
> +    samplesref->pts = flite->pts;
> +    samplesref->pos = -1;
> +    samplesref->audio->sample_rate = flite->wave->sample_rate;
> +    flite->pts += nb_samples;
> +    flite->wave_samples += nb_samples * flite->wave->num_channels;
> +    flite->wave_nb_samples -= nb_samples;
> +
> +    ff_filter_samples(outlink, samplesref);
> +
> +    return 0;
> +}
> +
> +AVFilter avfilter_asrc_flite = {
> +    .name        = "flite",
> +    .description = NULL_IF_CONFIG_SMALL("Flite voice synth source."),
> +
> +    .query_formats = query_formats,
> +    .init        = init,
> +    .priv_size   = sizeof(FliteContext),
> +
> +    .inputs      = (AVFilterPad[]) {{ .name = NULL}},
> +
> +    .outputs     = (AVFilterPad[]) {{ .name = "default",
> +                                      .type = AVMEDIA_TYPE_AUDIO,
> +                                      .config_props = config_props,
> +                                      .request_frame = request_frame, },
> +                                    { .name = NULL}},
> +};

Regards,

-- 
  Nicolas George
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 198 bytes
Desc: Digital signature
URL: <http://ffmpeg.org/pipermail/ffmpeg-devel/attachments/20120520/abf65935/attachment.asc>


More information about the ffmpeg-devel mailing list