[FFmpeg-devel] [PATCH] filters: add the vediting and aediting filters

Thu Jan 9 13:22:50 CET 2014

On date Monday 2014-01-06 15:59:51 +0100, Federico Simoncelli encoded:
> The vediting and aediting filters allow the user to select segments
> of video and audio to include.
> 
> Signed-off-by: Federico Simoncelli <fsimonce at redhat.com>
> ---
>  Changelog                |   1 +
>  doc/filters.texi         |  23 +++++
>  libavfilter/Makefile     |   2 +
>  libavfilter/allfilters.c |   2 +
>  libavfilter/f_editing.c  | 259 +++++++++++++++++++++++++++++++++++++++++++++++
>  5 files changed, 287 insertions(+)
>  create mode 100644 libavfilter/f_editing.c
> 
> diff --git a/Changelog b/Changelog
> index 2cab110..c45b2fe 100644
> --- a/Changelog
> +++ b/Changelog
> @@ -18,6 +18,7 @@ version <next>
>  - ATRAC3+ decoder
>  - VP8 in Ogg demuxing
>  - side & metadata support in NUT
> +- editing filter
>  
>  
>  version 2.1:
> diff --git a/doc/filters.texi b/doc/filters.texi
> index a579964..4d0b4f6 100644
> --- a/doc/filters.texi
> +++ b/doc/filters.texi
> @@ -1787,6 +1787,29 @@ slope
>  Determine how steep is the filter's shelf transition.
>  @end table
>  
> + at section vediting, aediting

I think [av]edit would be a shorter and better name (usually a filter
name is a simple verb, not its gerundive form).

> +
> +Select the segments of video and audio to include.

This is pretty ambiguous.

> +
> +The filter accepts the following options:
> +
> + at table @option
> + at item segments
> +The segments of the video or audio to include. The format is:

> + at example
> +vediting=START_TS1-END_TS1#START_TS2-END_TS2#...

Is this a timestamp or a time?

"|" as separator should be better than "#", which might be used soon
or later to denote comments in the graph syntax.

> + at end example
> + at end table
> +
> +The segments must be monotonically ordered and cannot overlap.
> +In general the same segments are declared both for video and
> +audio tracks (vediting/aediting) but there's no limitation in
> +this respect.
> +

> +The algorithm used to calculate the output pts is highly

PTS

> +resistant to damaged streams (e.g. DVB-T) as it reuses the
> +input pts preventing the audio/video de-synchronization.
> +
>  @section volume
>  
>  Adjust the input audio volume.
> diff --git a/libavfilter/Makefile b/libavfilter/Makefile
> index 3d587fe..87f8c5a 100644
> --- a/libavfilter/Makefile
> +++ b/libavfilter/Makefile
> @@ -53,6 +53,7 @@ OBJS-$(CONFIG_AVCODEC)                       += avcodec.o
>  OBJS-$(CONFIG_ACONVERT_FILTER)               += af_aconvert.o
>  OBJS-$(CONFIG_ADELAY_FILTER)                 += af_adelay.o
>  OBJS-$(CONFIG_AECHO_FILTER)                  += af_aecho.o
> +OBJS-$(CONFIG_AEDITING_FILTER)               += f_editing.o
>  OBJS-$(CONFIG_AEVAL_FILTER)                  += aeval.o
>  OBJS-$(CONFIG_AFADE_FILTER)                  += af_afade.o
>  OBJS-$(CONFIG_AFORMAT_FILTER)                += af_aformat.o
> @@ -205,6 +206,7 @@ OBJS-$(CONFIG_TINTERLACE_FILTER)             += vf_tinterlace.o
>  OBJS-$(CONFIG_TRANSPOSE_FILTER)              += vf_transpose.o
>  OBJS-$(CONFIG_TRIM_FILTER)                   += trim.o
>  OBJS-$(CONFIG_UNSHARP_FILTER)                += vf_unsharp.o
> +OBJS-$(CONFIG_VEDITING_FILTER)               += f_editing.o
>  OBJS-$(CONFIG_VFLIP_FILTER)                  += vf_vflip.o
>  OBJS-$(CONFIG_VIDSTABDETECT_FILTER)          += vidstabutils.o vf_vidstabdetect.o
>  OBJS-$(CONFIG_VIDSTABTRANSFORM_FILTER)       += vidstabutils.o vf_vidstabtransform.o
> diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
> index d58e8cc..6640ab6 100644
> --- a/libavfilter/allfilters.c
> +++ b/libavfilter/allfilters.c
> @@ -50,6 +50,7 @@ void avfilter_register_all(void)
>  #endif
>      REGISTER_FILTER(ADELAY,         adelay,         af);
>      REGISTER_FILTER(AECHO,          aecho,          af);
> +    REGISTER_FILTER(AEDITING,       aediting,       af);
>      REGISTER_FILTER(AEVAL,          aeval,          af);
>      REGISTER_FILTER(AFADE,          afade,          af);
>      REGISTER_FILTER(AFORMAT,        aformat,        af);
> @@ -201,6 +202,7 @@ void avfilter_register_all(void)
>      REGISTER_FILTER(TRANSPOSE,      transpose,      vf);
>      REGISTER_FILTER(TRIM,           trim,           vf);
>      REGISTER_FILTER(UNSHARP,        unsharp,        vf);
> +    REGISTER_FILTER(VEDITING,       vediting,       vf);
>      REGISTER_FILTER(VFLIP,          vflip,          vf);
>      REGISTER_FILTER(VIDSTABDETECT,  vidstabdetect,  vf);
>      REGISTER_FILTER(VIDSTABTRANSFORM, vidstabtransform, vf);
> diff --git a/libavfilter/f_editing.c b/libavfilter/f_editing.c
> new file mode 100644
> index 0000000..e31d069
> --- /dev/null
> +++ b/libavfilter/f_editing.c
> @@ -0,0 +1,259 @@
> +/*
> + * Copyright (c) 2013 Federico Simoncelli <federico.simoncelli at gmail.com>
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +/**
> + * @file
> + * filter for selecting video and audio segments
> + */
> +

> +#include <string.h>

probably useless

> +
> +#include "audio.h"
> +#include "video.h"
> +#include "libavutil/opt.h"
> +
> +
> +typedef struct _MediaSegment {

typedef struct { ... } MediaSegment;

"_MediaSegment" is unnecessary.

> +    double start;
> +    double end;
> +    struct _MediaSegment *next;
> +} MediaSegment;
> +
> +typedef struct {
> +    const AVClass *class;

> +    char *opt_segments;

nit: segments_str is more consistent with the codebase

> +    double ts_base;
> +    double ts_prev;
> +    int frame_out;

please document these

> +    MediaSegment *current;
> +    MediaSegment *segments;
> +} EditingContext;

EditContext?

In case we do: editing -> edit

> +
> +
> +#define FLAGS ( \
> +    AV_OPT_FLAG_AUDIO_PARAM | \
> +    AV_OPT_FLAG_VIDEO_PARAM | \
> +    AV_OPT_FLAG_FILTERING_PARAM \
> +)
> +#define OFFSET(x) offsetof(EditingContext, x)
> +static const AVOption options[] = {
> +    { "segments", "set the segment list", OFFSET(opt_segments),
> +        AV_OPT_TYPE_STRING, { .str = NULL }, .flags=FLAGS },
> +    { NULL }
> +};
> +
> +
> +static int filter_frame(AVFilterLink *inlink, AVFrame *frame)
> +{
> +    int ret;
> +    double frame_in_ts, frame_out_ts;
> +    EditingContext *editing = inlink->dst->priv;
> +
> +    if (!editing->current) /* fast-forward to the end */
> +        goto discard;
> +
> +    frame_in_ts = (double) frame->pts * av_q2d(inlink->time_base);
> +
> +    if (editing->ts_prev > frame_in_ts) {
> +        av_log(inlink->dst, AV_LOG_ERROR, "Frame discontinuity "
> +            "error %f\n", editing->ts_prev - frame_in_ts);
> +        av_frame_free(&frame);
> +        return AVERROR(EINVAL);
> +    }
> +
> +    editing->ts_prev = frame_in_ts;
> +    frame_out_ts = editing->ts_base +
> +        (frame_in_ts - editing->current->start);
> +
> +    if (frame_in_ts >= editing->current->end) {
> +        editing->current = editing->current->next;
> +        editing->ts_base = frame_out_ts;
> +        goto discard;
> +    }
> +
> +    if (frame_in_ts <= editing->current->start) {
> +        goto discard;
> +    }
> +
> +    frame->pts = frame_out_ts / av_q2d(inlink->time_base);
> +
> +    ret = ff_filter_frame(inlink->dst->outputs[0], frame);
> +    editing->frame_out = (ret == 0) ? 1 : 0;

editing->frame_out = (ret == 0);
should be enough

> +
> +    return ret;
> +
> +  discard:
> +    av_frame_free(&frame);
> +    editing->frame_out = 0;
> +    return 0;
> +}
> +
> +static int request_frame(AVFilterLink *outlink)
> +{
> +    int ret;
> +    EditingContext *editing = outlink->src->priv;
> +
> +    if (!editing->current) /* exit after last segment */
> +        return AVERROR_EOF;
> +
> +    do {
> +        ret = ff_request_frame(outlink->src->inputs[0]);
> +        if (ret < 0)
> +            return ret;
> +    } while (!editing->frame_out);
> +
> +    return 0;
> +}
> +
> +static int parse_segments(AVFilterContext *ctx)
> +{
> +    char *p, *sp;
> +    EditingContext *editing = ctx->priv;
> +    MediaSegment *segment, *j = NULL, **i = &editing->segments;
> +
> +    if (!editing->opt_segments) {
> +        av_log(ctx, AV_LOG_ERROR, "Missing segments list\n");
> +        return AVERROR(EINVAL);
> +    }
> +
> +    for (p = editing->opt_segments;; p = NULL) {

> +        if (!(p = strtok_r(p, "-", &sp)))

strtok_r is not portable, use av_strtok().

> +            break;
> +

> +        segment = av_malloc(sizeof(MediaSegment));

check in case of malloc failure, also you can av_mallocz and avoid next
assignment.

> +        segment->next = NULL;
> +

> +        segment->start = atof(p);

This could be replaced with av_parse_time()

> +
> +        if (j && segment->start < j->end) {

> +            av_log(ctx, AV_LOG_ERROR, "Non-monotonic segments\n");

please provide some context, for example the number of the segments
and/or the overlapping ends values

> +            return AVERROR(EINVAL);
> +        }
> +
> +        if (!(p = strtok_r(NULL, "#", &sp))) {
> +            av_log(ctx, AV_LOG_ERROR, "Invalid segment list\n");
> +            return AVERROR(EINVAL);
> +        }
> +
> +        segment->end = atof(p);
> +
> +        if (segment->start >= segment->end) {
> +            av_log(ctx, AV_LOG_ERROR, "Invalid or empty segment\n");
> +            return AVERROR(EINVAL);
> +        }
> +
> +        *i = j = segment, i = &segment->next;
> +    }
> +
> +    return 0;
> +}
> +
> +static av_cold int init(AVFilterContext *ctx)
> +{
> +    int ret;
> +    EditingContext *editing = ctx->priv;
> +
> +    ret = parse_segments(ctx);
> +    if (ret < 0)
> +        return ret;
> +
> +    editing->current = editing->segments;
> +    editing->ts_base = 0;
> +    editing->ts_prev = 0;
> +    editing->frame_out = 0;
> +
> +    return 0;
> +}

Please move parse and init at the begin, so the file can be read from
head to tail with no jumps.

> +
> +static av_cold void uninit(AVFilterContext *ctx) {
> +    MediaSegment *n, *i;
> +    EditingContext *editing = ctx->priv;
> +
> +    for (i = editing->segments; i != NULL; i = n) {
> +        n = i->next;
> +        av_freep(&i);
> +    }
> +
> +    editing->current = editing->segments = NULL;
> +}
> +
> +#define vediting_options options
> +AVFILTER_DEFINE_CLASS(vediting);
> +
> +#define aediting_options options
> +AVFILTER_DEFINE_CLASS(aediting);
> +
> +static const AVFilterPad avfilter_af_editing_inputs[] = {
> +    {
> +        .name           = "default",
> +        .type           = AVMEDIA_TYPE_AUDIO,
> +        .filter_frame   = filter_frame,
> +    },
> +    {NULL}
> +};
> +
> +static const AVFilterPad avfilter_af_editing_outputs[] = {
> +    {
> +        .name           = "default",
> +        .type           = AVMEDIA_TYPE_AUDIO,
> +        .request_frame  = request_frame,
> +    },
> +    {NULL}
> +};
> +
> +AVFilter ff_af_aediting = {
> +    .name           = "aediting",
> +    .description    = NULL_IF_CONFIG_SMALL("Select audio segments"),
> +    .init           = init,
> +    .uninit         = uninit,
> +    .priv_size      = sizeof(EditingContext),
> +    .priv_class     = &aediting_class,
> +    .inputs         = avfilter_af_editing_inputs,
> +    .outputs        = avfilter_af_editing_outputs,
> +};
> +
> +static const AVFilterPad avfilter_vf_editing_inputs[] = {
> +    {
> +        .name           = "default",
> +        .type           = AVMEDIA_TYPE_VIDEO,
> +        .filter_frame   = filter_frame,
> +    },
> +    {NULL}
> +};
> +
> +static const AVFilterPad avfilter_vf_editing_outputs[] = {
> +    {
> +        .name           = "default",
> +        .type           = AVMEDIA_TYPE_VIDEO,
> +        .request_frame  = request_frame,
> +    },
> +    {NULL}
> +};
> +
> +AVFilter ff_vf_vediting = {
> +    .name           = "vediting",
> +    .description    = NULL_IF_CONFIG_SMALL("Select video segments"),
> +    .init           = init,
> +    .uninit         = uninit,
> +    .priv_size      = sizeof(EditingContext),
> +    .priv_class     = &vediting_class,
> +    .inputs         = avfilter_vf_editing_inputs,
> +    .outputs        = avfilter_vf_editing_outputs,
> +};
> -- 
> 1.8.4.2
> 
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel at ffmpeg.org
> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel

-- 
FFmpeg = Formidable & Freak Monstrous Programmable Ecletic Gargoyle