[FFmpeg-devel] [PATCH] avfilter: add (a)separate filters

Mon Aug 2 13:11:30 EEST 2021

Paul B Mahol (12021-08-01):
> Signed-off-by: Paul B Mahol <onemda at gmail.com>
> ---
>  doc/filters.texi         |  31 ++++
>  libavfilter/Makefile     |   2 +
>  libavfilter/allfilters.c |   2 +
>  libavfilter/f_separate.c | 346 +++++++++++++++++++++++++++++++++++++++
>  4 files changed, 381 insertions(+)
>  create mode 100644 libavfilter/f_separate.c
> 
> diff --git a/doc/filters.texi b/doc/filters.texi
> index 635179edb9..e40322417b 100644
> --- a/doc/filters.texi
> +++ b/doc/filters.texi
> @@ -26067,6 +26067,37 @@ sendcmd=f=test.cmd,drawtext=fontfile=FreeSerif.ttf:text='',hue
>  @end example
>  @end itemize
>  
> + at section separate, aseparate
> +
> +Separate single input stream into multiple streams.
> +
> + at code{separate} works on video frames, @code{aseparate} on audio samples.
> +
> +This filter accepts the following options:
> +
> + at table @option
> + at item durations

> +Durations of input at which to separate input. Each duration point is separated by '|'.

"... to split input, separated by '|'."

"Each" is not correct here, because there is one less | than point. Same
below.

I also think it should be more explicit which one of these cases is
true:

- "1|7|11" will make segments with durations 1, 7, 11, and therefore
  split at timestamps 1, 1+7=8 and 1+7+11=19

- "1|7|11" will split at timestamps 1, 7, 11, and therefore make
  segments with durations 1, 7-1=6, 11-7=4.

In fact, I think it should be even better to offer both options. It is
very easy to mix them: just use a + sign to indicate a relative
timestamp:

"60|+3|180|+5" to split at 60, 63, 180, 185.

> +
> + at item frames, samples

> +Exact frame/sample at which to do separations of input video/audio stream. Each point
> +is separated by '|'.

This would benefit from allowing relative frame / sample numbers too.

> + at end table
> +
> + at subsection Examples
> +
> + at itemize
> +
> + at item
> +Separate input audio stream into three output audio streams, starting at start of input audio stream
> +and storing that in 1st output audio stream, then following at 60th second and storing than in 2nd
> +output audio stream, and last after 120th second of input audio stream store in 3rd output audio stream:
> + at example
> +aseparate=durations="60 | 120"
> + at end example
> +
> + at end itemize
> +
>  @anchor{setpts}
>  @section setpts, asetpts
>  
> diff --git a/libavfilter/Makefile b/libavfilter/Makefile
> index 772971521c..73c26f4870 100644
> --- a/libavfilter/Makefile
> +++ b/libavfilter/Makefile
> @@ -80,6 +80,7 @@ OBJS-$(CONFIG_AREVERSE_FILTER)               += f_reverse.o
>  OBJS-$(CONFIG_ARNNDN_FILTER)                 += af_arnndn.o
>  OBJS-$(CONFIG_ASELECT_FILTER)                += f_select.o
>  OBJS-$(CONFIG_ASENDCMD_FILTER)               += f_sendcmd.o
> +OBJS-$(CONFIG_ASEPARATE_FILTER)              += f_separate.o
>  OBJS-$(CONFIG_ASETNSAMPLES_FILTER)           += af_asetnsamples.o
>  OBJS-$(CONFIG_ASETPTS_FILTER)                += setpts.o
>  OBJS-$(CONFIG_ASETRATE_FILTER)               += af_asetrate.o
> @@ -408,6 +409,7 @@ OBJS-$(CONFIG_SCROLL_FILTER)                 += vf_scroll.o
>  OBJS-$(CONFIG_SELECT_FILTER)                 += f_select.o
>  OBJS-$(CONFIG_SELECTIVECOLOR_FILTER)         += vf_selectivecolor.o
>  OBJS-$(CONFIG_SENDCMD_FILTER)                += f_sendcmd.o
> +OBJS-$(CONFIG_SEPARATE_FILTER)               += f_separate.o
>  OBJS-$(CONFIG_SEPARATEFIELDS_FILTER)         += vf_separatefields.o
>  OBJS-$(CONFIG_SETDAR_FILTER)                 += vf_aspect.o
>  OBJS-$(CONFIG_SETFIELD_FILTER)               += vf_setparams.o
> diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
> index e0276eb98c..c11c680564 100644
> --- a/libavfilter/allfilters.c
> +++ b/libavfilter/allfilters.c
> @@ -73,6 +73,7 @@ extern const AVFilter ff_af_areverse;
>  extern const AVFilter ff_af_arnndn;
>  extern const AVFilter ff_af_aselect;
>  extern const AVFilter ff_af_asendcmd;
> +extern const AVFilter ff_af_aseparate;
>  extern const AVFilter ff_af_asetnsamples;
>  extern const AVFilter ff_af_asetpts;
>  extern const AVFilter ff_af_asetrate;
> @@ -389,6 +390,7 @@ extern const AVFilter ff_vf_scroll;
>  extern const AVFilter ff_vf_select;
>  extern const AVFilter ff_vf_selectivecolor;
>  extern const AVFilter ff_vf_sendcmd;
> +extern const AVFilter ff_vf_separate;
>  extern const AVFilter ff_vf_separatefields;
>  extern const AVFilter ff_vf_setdar;
>  extern const AVFilter ff_vf_setfield;
> diff --git a/libavfilter/f_separate.c b/libavfilter/f_separate.c
> new file mode 100644
> index 0000000000..46f8871c8a
> --- /dev/null
> +++ b/libavfilter/f_separate.c
> @@ -0,0 +1,346 @@
> +/*
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +#include <stdint.h>
> +
> +#include "libavutil/avstring.h"
> +#include "libavutil/channel_layout.h"
> +#include "libavutil/common.h"
> +#include "libavutil/log.h"
> +#include "libavutil/mathematics.h"
> +#include "libavutil/opt.h"
> +#include "libavutil/parseutils.h"
> +#include "libavutil/samplefmt.h"
> +
> +#include "audio.h"
> +#include "avfilter.h"
> +#include "filters.h"
> +#include "internal.h"
> +
> +typedef struct SeparateContext {
> +    const AVClass *class;
> +
> +    char *durations_str;
> +    char *points_str;
> +
> +    int current_point;
> +    int nb_points;
> +
> +    int64_t *points;
> +

> +    int64_t current_frame;

Use link->frame_count_out.

> +    int64_t current_sample;
> +} SeparateContext;
> +
> +static void count_points(char *item_str, int *nb_items)
> +{
> +    char *p;
> +
> +    if (!item_str)
> +        return;
> +
> +    *nb_items = 1;
> +    for (p = item_str; *p; p++) {
> +        if (*p == '|')
> +            (*nb_items)++;
> +    }
> +}
> +

> +static int parse_durations(AVFilterContext *ctx, char *item_str, int nb_points, int64_t *points)
> +{
> +    char *arg, *p = item_str;
> +    char *saveptr = NULL;
> +    int ret;
> +
> +    for (int i = 0; i < nb_points; i++) {
> +        if (!(arg = av_strtok(p, "|", &saveptr)))
> +            return AVERROR(EINVAL);
> +
> +        p = NULL;
> +
> +        ret = av_parse_time(&points[i], arg, 1);
> +        if (ret < 0) {
> +            av_log(ctx, AV_LOG_ERROR, "Invalid durations supplied: %s\n", arg);
> +            return AVERROR(EINVAL);
> +        }
> +    }
> +
> +    return 0;
> +}
> +
> +static int parse_points(AVFilterContext *ctx, char *item_str, int nb_points, int64_t *points)
> +{
> +    char *arg, *p = item_str;
> +    char *saveptr = NULL;
> +
> +    for (int i = 0; i < nb_points; i++) {
> +        if (!(arg = av_strtok(p, "|", &saveptr)))
> +            return AVERROR(EINVAL);
> +
> +        p = NULL;
> +
> +        if (sscanf(arg, "%"PRId64, &points[i]) != 1) {
> +            av_log(ctx, AV_LOG_ERROR, "Invalid points supplied: %s\n", arg);
> +            return AVERROR(EINVAL);
> +        }
> +    }
> +
> +    return 0;
> +}

These two functions are almost identical and not speed-critical: it
would ease maintenance to merge them, with just a boolean parameter to
select between av_parse_time and sscanf.

> +
> +static av_cold int init(AVFilterContext *ctx, int type)
> +{
> +    SeparateContext *s = ctx->priv;
> +    int ret;
> +

> +    if (s->durations_str) {
> +        count_points(s->durations_str, &s->nb_points);
> +        s->nb_points++;
> +    } else {
> +        count_points(s->points_str, &s->nb_points);
> +        s->nb_points++;
> +    }

Report an error, or at least a warning, if both are set.

> +
> +    s->points = av_calloc(s->nb_points, sizeof(*s->points));
> +    if (!s->points)
> +        return AVERROR(ENOMEM);
> +
> +    if (s->durations_str) {
> +        ret = parse_durations(ctx, s->durations_str, s->nb_points - 1, s->points);
> +        if (ret < 0)
> +            return ret;
> +    } else {
> +        ret = parse_points(ctx, s->points_str, s->nb_points - 1, s->points);
> +        if (ret < 0)
> +            return ret;
> +    }

At this point, s->points contains either durations in microseconds or
frame/sample numbers. Only s->durations_str being NULL distinguishes the
case, but s->durations_str is never used again.

It is the first clue that at least one of the cases does not work as
documented. Or am I missing something?

> +
> +    s->points[s->nb_points - 1] = INT64_MAX;
> +
> +    for (int i = 0; i < s->nb_points; i++) {
> +        AVFilterPad pad = { 0 };
> +
> +        pad.type = type;
> +        pad.name = av_asprintf("output%d", i);
> +        if (!pad.name)
> +            return AVERROR(ENOMEM);
> +
> +        if ((ret = ff_insert_outpad(ctx, i, &pad)) < 0) {
> +            av_freep(&pad.name);
> +            return ret;
> +        }
> +    }
> +
> +    return 0;
> +}
> +
> +static int config_input(AVFilterLink *inlink)
> +{
> +    AVFilterContext *ctx = inlink->dst;
> +    SeparateContext *s = ctx->priv;

> +    AVRational tb = (inlink->type == AVMEDIA_TYPE_VIDEO) ?
> +                     inlink->time_base : (AVRational){ 1, inlink->sample_rate };
> +
> +    for (int i = 0; i < s->nb_points - 1; i++) {
> +        int64_t pts = av_rescale_q(s->points[i], AV_TIME_BASE_Q, tb);

This code assumes s->points are a timestamp in microseconds, and
converts them to either timestamps in link time base or to a sample
count.

If s->points are frame/sample counts, the conversion is invalid.

For audio, it disregards the actual timestamps of the frames, including
timestamps gaps, and more problematic, the initial timestamp.

It is the second clue that at least one of the cases does not work as
documented.

> +
> +        s->points[i] = pts;
> +    }
> +
> +    return 0;
> +}
> +
> +static av_cold void uninit(AVFilterContext *ctx)
> +{
> +    SeparateContext *s = ctx->priv;
> +
> +    av_freep(&s->points);
> +
> +    for (int i = 0; i < ctx->nb_outputs; i++)
> +        av_freep(&ctx->output_pads[i].name);
> +}
> +
> +#define OFFSET(x) offsetof(SeparateContext, x)
> +#define COMMON_OPTS \
> +    { "durations", "durations of input at which to separate input", OFFSET(durations_str),  AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, FLAGS }, \
> +
> +#if CONFIG_SEPARATE_FILTER
> +
> +static av_cold int video_init(AVFilterContext *ctx)
> +{
> +    return init(ctx, AVMEDIA_TYPE_VIDEO);
> +}
> +
> +static int video_activate(AVFilterContext *ctx)
> +{
> +    SeparateContext *s = ctx->priv;
> +    AVFrame *frame = NULL;
> +    int ret, status;
> +    int64_t pts;
> +
> +    for (int i = s->current_point; i < s->nb_points; i++) {
> +        FF_FILTER_FORWARD_STATUS_BACK_ALL(ctx->outputs[i], ctx);
> +    }
> +
> +    if ((ret = ff_inlink_consume_frame(ctx->inputs[0], &frame)) > 0) {

> +        if (s->current_frame >= s->points[s->current_point]) {
> +            ff_outlink_set_status(ctx->outputs[s->current_point], AVERROR_EOF, frame->pts);

If s->points are timestamps, then the next point should be used as
timestamp for the EOF.

> +            s->current_point++;
> +        }

You need to loop: the user can ask to split at 13 and 14, if the stream
has only one frame every five seconds (1/5 fps) the segment between them
is empty and current_point must increment twice.

This does not example frame->pts.

It is the third clue that at least one of the cases does not work as
documented.

> +
> +        if (s->current_point >= s->nb_points) {
> +            av_frame_free(&frame);
> +            return AVERROR(EINVAL);
> +        }
> +
> +        s->current_frame++;
> +
> +        ret = ff_filter_frame(ctx->outputs[s->current_point], frame);
> +    }
> +
> +    if (ret < 0) {
> +        return ret;
> +    } else if (ff_inlink_acknowledge_status(ctx->inputs[0], &status, &pts)) {
> +        for (int i = s->current_point; i < s->nb_points; i++)
> +            ff_outlink_set_status(ctx->outputs[i], status, pts);
> +        return 0;
> +    } else {
> +        for (int i = s->current_point; i < s->nb_points; i++) {
> +            if (ff_outlink_frame_wanted(ctx->outputs[i]))
> +                ff_inlink_request_frame(ctx->inputs[0]);
> +        }
> +        return 0;
> +    }
> +}
> +
> +#define FLAGS AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_FILTERING_PARAM
> +static const AVOption separate_options[] = {
> +    { "frames", "frames at which to do separation", OFFSET(points_str), AV_OPT_TYPE_STRING,  { .str = "25" }, 0, 0, FLAGS },
> +    COMMON_OPTS
> +    { NULL }
> +};
> +#undef FLAGS
> +
> +AVFILTER_DEFINE_CLASS(separate);
> +
> +static const AVFilterPad separate_inputs[] = {
> +    {
> +        .name         = "default",
> +        .type         = AVMEDIA_TYPE_VIDEO,
> +        .config_props = config_input,
> +    },
> +    { NULL }
> +};
> +
> +const AVFilter ff_vf_separate = {
> +    .name        = "separate",
> +    .description = NULL_IF_CONFIG_SMALL("Separate video stream."),
> +    .init        = video_init,
> +    .uninit      = uninit,
> +    .priv_size   = sizeof(SeparateContext),
> +    .priv_class  = &separate_class,
> +    .activate    = video_activate,
> +    .inputs      = separate_inputs,
> +    .outputs     = NULL,
> +    .flags       = AVFILTER_FLAG_DYNAMIC_OUTPUTS,
> +};
> +#endif // CONFIG_SEPARATE_FILTER
> +
> +#if CONFIG_ASEPARATE_FILTER
> +
> +static av_cold int audio_init(AVFilterContext *ctx)
> +{
> +    return init(ctx, AVMEDIA_TYPE_AUDIO);
> +}
> +
> +static int audio_activate(AVFilterContext *ctx)
> +{
> +    SeparateContext *s = ctx->priv;
> +    AVFrame *frame = NULL;
> +    int ret, status;
> +    int64_t pts;
> +
> +    for (int i = s->current_point; i < s->nb_points; i++) {
> +        FF_FILTER_FORWARD_STATUS_BACK_ALL(ctx->outputs[i], ctx);
> +    }
> +
> +    if ((ret = ff_inlink_consume_samples(ctx->inputs[0], 1,
> +                                         FFMIN(s->points[s->current_point] - s->current_sample, INT_MAX),
> +                                         &frame)) > 0) {
> +        s->current_sample += frame->nb_samples;
> +

> +        if (s->current_sample >= s->points[s->current_point]) {
> +            ff_outlink_set_status(ctx->outputs[s->current_point], AVERROR_EOF, frame->pts);
> +            s->current_point++;
> +        }

Similar issues as for video, with subtle differences.

> +
> +        if (s->current_point >= s->nb_points) {
> +            av_frame_free(&frame);
> +            return AVERROR(EINVAL);
> +        }
> +
> +        ret = ff_filter_frame(ctx->outputs[s->current_point], frame);
> +    }
> +

> +    if (ret < 0) {
> +        return ret;
> +    } else if (ff_inlink_acknowledge_status(ctx->inputs[0], &status, &pts)) {
> +        for (int i = s->current_point; i < s->nb_points; i++)
> +            ff_outlink_set_status(ctx->outputs[i], status, pts);
> +        return 0;
> +    } else {
> +        for (int i = s->current_point; i < s->nb_points; i++) {
> +            if (ff_outlink_frame_wanted(ctx->outputs[i]))
> +                ff_inlink_request_frame(ctx->inputs[0]);
> +        }
> +        return 0;
> +    }

This is exactly identical to the video case: put it in a common
function.

> +}
> +
> +#define FLAGS AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_FILTERING_PARAM
> +static const AVOption aseparate_options[] = {
> +    { "samples", "samples at which to do separation", OFFSET(points_str), AV_OPT_TYPE_STRING,  { .str = "44100" }, 0, 0, FLAGS },
> +    COMMON_OPTS
> +    { NULL }
> +};
> +#undef FLAGS
> +
> +AVFILTER_DEFINE_CLASS(aseparate);
> +
> +static const AVFilterPad aseparate_inputs[] = {
> +    {
> +        .name         = "default",
> +        .type         = AVMEDIA_TYPE_AUDIO,
> +        .config_props = config_input,
> +    },
> +    { NULL }
> +};
> +
> +const AVFilter ff_af_aseparate = {
> +    .name        = "aseparate",
> +    .description = NULL_IF_CONFIG_SMALL("Separate audio stream."),
> +    .init        = audio_init,
> +    .uninit      = uninit,
> +    .priv_size   = sizeof(SeparateContext),
> +    .priv_class  = &aseparate_class,
> +    .activate    = audio_activate,
> +    .inputs      = aseparate_inputs,
> +    .outputs     = NULL,
> +    .flags       = AVFILTER_FLAG_DYNAMIC_OUTPUTS,
> +};
> +#endif // CONFIG_ASEPARATE_FILTER

This code needs testing. Ideally as FATE tests. At least:

- both duration and frame/sample count;

- variable frame rate;

- case where 1/tb != frame_rate.

Regards,

-- 
  Nicolas George
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 833 bytes
Desc: not available
URL: <https://ffmpeg.org/pipermail/ffmpeg-devel/attachments/20210802/2d7428a3/attachment.sig>