[FFmpeg-devel] [PATCH] avfilter: add feedback video filter

Mon Apr 11 19:43:21 EEST 2022

Paul B Mahol:
> Signed-off-by: Paul B Mahol <onemda at gmail.com>
> ---
>  doc/filters.texi          |  32 ++++
>  libavfilter/Makefile      |   1 +
>  libavfilter/allfilters.c  |   1 +
>  libavfilter/vf_feedback.c | 306 ++++++++++++++++++++++++++++++++++++++
>  4 files changed, 340 insertions(+)
>  create mode 100644 libavfilter/vf_feedback.c
> 
> diff --git a/doc/filters.texi b/doc/filters.texi
> index ac49092743..612497d865 100644
> --- a/doc/filters.texi
> +++ b/doc/filters.texi
> @@ -12214,6 +12214,38 @@ fade=t=in:st=5.5:d=0.5
>  
>  @end itemize
>  
> + at section feedback
> +Apply feedback video filter.
> +
> +This filter pass cropped input frames to 2nd output.
> +From there it can be filtered with other video filters.
> +After filter receives frame from 2nd input, that frame
> +is combined on top of original frame from 1st input and passed
> +to 1st output.
> +
> +The typical usage is filter only part of frame.
> +
> +The filter accepts the following options:
> + at table @option
> + at item x
> + at item y
> +Set the top left crop position.
> +
> + at item w
> + at item h
> +Set the crop size.
> + at end table
> +
> + at subsection Examples
> +
> + at itemize
> + at item
> +Blur only top left rectangular part of size 100x100 of frame with gblur filter.
> + at example
> +[in][blurin]feedback=x=0:y=0:w=100:h=100[out][blurout];[blurout]gblur[blurin]
> + at end example
> + at end itemize
> +
>  @section fftdnoiz
>  Denoise frames using 3D FFT (frequency domain filtering).
>  
> diff --git a/libavfilter/Makefile b/libavfilter/Makefile
> index d69bd59bb6..bdfdfdc04a 100644
> --- a/libavfilter/Makefile
> +++ b/libavfilter/Makefile
> @@ -279,6 +279,7 @@ OBJS-$(CONFIG_ESTDIF_FILTER)                 += vf_estdif.o
>  OBJS-$(CONFIG_EXPOSURE_FILTER)               += vf_exposure.o
>  OBJS-$(CONFIG_EXTRACTPLANES_FILTER)          += vf_extractplanes.o
>  OBJS-$(CONFIG_FADE_FILTER)                   += vf_fade.o
> +OBJS-$(CONFIG_FEEDBACK_FILTER)               += vf_feedback.o
>  OBJS-$(CONFIG_FFTDNOIZ_FILTER)               += vf_fftdnoiz.o
>  OBJS-$(CONFIG_FFTFILT_FILTER)                += vf_fftfilt.o
>  OBJS-$(CONFIG_FIELD_FILTER)                  += vf_field.o
> diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
> index abd1fe2367..44fac46521 100644
> --- a/libavfilter/allfilters.c
> +++ b/libavfilter/allfilters.c
> @@ -261,6 +261,7 @@ extern const AVFilter ff_vf_estdif;
>  extern const AVFilter ff_vf_exposure;
>  extern const AVFilter ff_vf_extractplanes;
>  extern const AVFilter ff_vf_fade;
> +extern const AVFilter ff_vf_feedback;
>  extern const AVFilter ff_vf_fftdnoiz;
>  extern const AVFilter ff_vf_fftfilt;
>  extern const AVFilter ff_vf_field;
> diff --git a/libavfilter/vf_feedback.c b/libavfilter/vf_feedback.c
> new file mode 100644
> index 0000000000..cf6ef7ef0c
> --- /dev/null
> +++ b/libavfilter/vf_feedback.c
> @@ -0,0 +1,306 @@
> +/*
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +/**
> + * @file
> + * feedback video filter
> + */
> +
> +#include "libavutil/avstring.h"

?

> +#include "libavutil/fifo.h"
> +#include "libavutil/imgutils.h"
> +#include "libavutil/opt.h"
> +#include "libavutil/internal.h"
> +#include "avfilter.h"
> +#include "filters.h"
> +#include "internal.h"
> +#include "video.h"
> +
> +typedef struct FeedbackContext {
> +    const AVClass *class;
> +
> +    int x, y;
> +    int w, h;
> +
> +    int max_step[4];
> +    int hsub, vsub;
> +
> +    AVFrame *feed;
> +
> +    AVFifo *fifo;
> +} FeedbackContext;
> +
> +static void adjust_pos(AVFilterContext *ctx, FeedbackContext *s)
> +{
> +    if (s->x + s->w > ctx->inputs[0]->w)
> +        s->x = ctx->inputs[0]->w - s->w;
> +    if (s->y + s->h > ctx->inputs[0]->h)
> +        s->y = ctx->inputs[0]->h - s->h;
> +}
> +
> +static void adjust_parameters(AVFilterContext *ctx, FeedbackContext *s)
> +{
> +    if (s->x >= ctx->inputs[0]->w)
> +        s->x = 0;
> +    if (s->y >= ctx->inputs[0]->h)
> +        s->y = 0;
> +
> +    if (s->w <= 0)
> +        s->w = ctx->inputs[0]->w - s->x;
> +    if (s->h <= 0)
> +        s->h = ctx->inputs[0]->h - s->y;
> +
> +    if (s->w > ctx->inputs[0]->w)
> +        s->w = ctx->inputs[0]->w;
> +    if (s->h > ctx->inputs[0]->h)
> +        s->h = ctx->inputs[0]->h;
> +
> +    adjust_pos(ctx, s);
> +}
> +
> +static int config_input(AVFilterLink *inlink)
> +{
> +    AVFilterContext *ctx = inlink->dst;
> +    const AVPixFmtDescriptor *pix_desc = av_pix_fmt_desc_get(inlink->format);
> +    FeedbackContext *s = ctx->priv;
> +
> +    s->hsub = pix_desc->log2_chroma_w;
> +    s->vsub = pix_desc->log2_chroma_h;
> +
> +    av_image_fill_max_pixsteps(s->max_step, NULL, pix_desc);
> +
> +    adjust_parameters(ctx, s);
> +
> +    ctx->inputs[1]->w = s->w;
> +    ctx->inputs[1]->h = s->h;
> +
> +    return 0;
> +}
> +
> +static int config_output(AVFilterLink *outlink)
> +{
> +    AVFilterContext *ctx = outlink->src;
> +    FeedbackContext *s = ctx->priv;
> +
> +    adjust_parameters(ctx, s);
> +
> +    ctx->outputs[0]->w = ctx->inputs[0]->w;
> +    ctx->outputs[0]->h = ctx->inputs[0]->h;
> +    ctx->outputs[1]->w = s->w;
> +    ctx->outputs[1]->h = s->h;
> +
> +    return 0;
> +}
> +
> +static int query_formats(AVFilterContext *ctx)
> +{
> +    return ff_set_common_formats(ctx, ff_formats_pixdesc_filter(0, AV_PIX_FMT_FLAG_BITSTREAM |
> +                                                                   AV_PIX_FMT_FLAG_HWACCEL |
> +                                                                   AV_PIX_FMT_FLAG_PAL));
> +}
> +
> +static int activate(AVFilterContext *ctx)
> +{
> +    FeedbackContext *s = ctx->priv;
> +    int status, ret;
> +    int64_t pts;
> +
> +    adjust_pos(ctx, s);
> +
> +    for (int i = 0; i < ctx->nb_outputs; i++)
> +        FF_FILTER_FORWARD_STATUS_BACK_ALL(ctx->outputs[i], ctx);
> +
> +    ret = ff_inlink_consume_frame(ctx->inputs[1], &s->feed);
> +    if (ret < 0)
> +        return ret;
> +
> +    if (ret > 0 && av_fifo_can_read(s->fifo)) {
> +        AVFrame *src = s->feed;
> +        AVFrame *dst = NULL;
> +
> +        ret = av_fifo_read(s->fifo, &dst, 1);
> +        if (!dst || ret < 0)
> +            return ret;

You are never writing NULL to the fifo, so you can't ever get NULL back.
And given that you check that you can read, ret < 0 can't happen either.

You could also just use "if (ret > 0 && av_fifo_read(s->fifo, &dst, 1)
>= 0)" (you need to increase the scope for dst for this).

> +
> +        for (int y = 0; y < src->height; y++) {
> +            memmove(dst->data[0] + (s->y + y) * dst->linesize[0] + s->x * s->max_step[0],
> +                    src->data[0] + y * src->linesize[0], src->width * s->max_step[0]);
> +        }
> +
> +        for (int i = 1; i < 3; i ++) {
> +            if (dst->data[i]) {
> +                for (int y = 0; y < src->height; y++) {
> +                    memmove(dst->data[i] + ((s->y + y) >> s->vsub) * dst->linesize[i] + ((s->x * s->max_step[i]) >> s->hsub),
> +                            src->data[i] + (y >> s->vsub) * src->linesize[i], (src->width * s->max_step[i]) >> s->hsub);
> +                }
> +            }
> +        }
> +
> +        if (dst->data[3]) {
> +            for (int y = 0; y < src->height; y++) {
> +                memmove(dst->data[3] + (s->y + y) * dst->linesize[3] + s->x * s->max_step[3],
> +                        src->data[3] + y * src->linesize[3], src->width * s->max_step[3]);
> +            }
> +        }
> +
> +        ret = ff_filter_frame(ctx->outputs[0], dst);
> +        av_frame_free(&s->feed);
> +        return ret;
> +    }
> +
> +    if (!s->feed) {
> +        AVFrame *in = NULL;
> +
> +        ret = ff_inlink_consume_frame(ctx->inputs[0], &in);
> +        if (ret < 0)
> +            return ret;
> +
> +        if (ret > 0) {
> +            AVFrame *frame;
> +
> +            ret = av_fifo_write(s->fifo, &in, 1);

in leaks here on error.

> +            if (ret < 0)
> +                return ret;
> +
> +            frame = av_frame_clone(in);
> +            if (!frame)
> +                return AVERROR(ENOMEM);
> +
> +            frame->width  = s->w;
> +            frame->height = s->h;
> +
> +            frame->data[0] += s->y * frame->linesize[0];
> +            frame->data[0] += s->x * s->max_step[0];
> +
> +            for (int i = 1; i < 3; i ++) {
> +                if (frame->data[i]) {
> +                    frame->data[i] += (s->y >> s->vsub) * frame->linesize[i];
> +                    frame->data[i] += (s->x * s->max_step[i]) >> s->hsub;
> +                }
> +            }
> +
> +            if (frame->data[3]) {
> +                frame->data[3] += s->y * frame->linesize[3];
> +                frame->data[3] += s->x * s->max_step[3];
> +            }
> +
> +            return ff_filter_frame(ctx->outputs[1], frame);
> +        }
> +    }
> +
> +    if (ff_inlink_acknowledge_status(ctx->inputs[0], &status, &pts)) {
> +        ff_outlink_set_status(ctx->outputs[0], status, pts);
> +        ff_outlink_set_status(ctx->outputs[1], status, pts);
> +        return 0;
> +    }
> +
> +    if (ff_inlink_acknowledge_status(ctx->inputs[1], &status, &pts)) {
> +        ff_outlink_set_status(ctx->outputs[0], status, pts);
> +        ff_outlink_set_status(ctx->outputs[1], status, pts);
> +        return 0;
> +    }
> +
> +    if (!s->feed) {
> +        if (ff_outlink_frame_wanted(ctx->outputs[0])) {
> +            ff_inlink_request_frame(ctx->inputs[0]);
> +            ff_inlink_request_frame(ctx->inputs[1]);
> +            return 0;
> +        }
> +    }
> +
> +    return FFERROR_NOT_READY;
> +}
> +
> +static av_cold int init(AVFilterContext *ctx)
> +{
> +    FeedbackContext *s = ctx->priv;
> +
> +    s->fifo = av_fifo_alloc2(8, sizeof(AVFrame *), AV_FIFO_FLAG_AUTO_GROW);
> +    if (!s->fifo)
> +        return AVERROR(ENOMEM);
> +
> +    return 0;
> +}
> +
> +static av_cold void uninit(AVFilterContext *ctx)
> +{
> +    FeedbackContext *s = ctx->priv;
> +
> +    for (int n = 0; n < av_fifo_can_read(s->fifo); n++) {

This construct is completely wrong:
1. It just crashes in case the fifo has not been allocated (e.g. due to
allocation error).
2. It calls av_fifo_can_read() on every iteration of the loop and
increments n on every iteration of the loop. But av_fifo_read() in the
loop body decrements the number of items one can read by 1, so that one
gets a leak as soon as the number of initial items in the fifo is >= 2.

Here is a working approach:

if (s->fifo) {
    AVFrame *frame;

    while (av_fifo_read(s->fifo, &frame, 1) >= 0)
        av_frame_free(&frame);

    av_fifo_freep2(&s->fifo);
}

> +        AVFrame *frame = NULL;
> +
> +        av_fifo_read(s->fifo, &frame, 1);
> +
> +        av_frame_free(&frame);
> +    }
> +
> +    av_fifo_freep2(&s->fifo);
> +}
> +
> +static const AVFilterPad inputs[] = {
> +    {
> +        .name         = "default",
> +        .type         = AVMEDIA_TYPE_VIDEO,
> +        .config_props = config_input,
> +    },
> +    {
> +        .name         = "feedin",
> +        .type         = AVMEDIA_TYPE_VIDEO,
> +        .config_props = config_input,
> +    },
> +};
> +
> +static const AVFilterPad outputs[] = {
> +    {
> +        .name         = "default",
> +        .type         = AVMEDIA_TYPE_VIDEO,
> +        .config_props = config_output,
> +    },
> +    {
> +        .name         = "feedout",
> +        .type         = AVMEDIA_TYPE_VIDEO,
> +        .config_props = config_output,
> +    },
> +};
> +
> +#define OFFSET(x) offsetof(FeedbackContext, x)
> +#define FLAGS (AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_FILTERING_PARAM)
> +#define TFLAGS (AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_RUNTIME_PARAM)
> +
> +static const AVOption feedback_options[] = {
> +    { "x", "set top left crop position", OFFSET(x), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, TFLAGS },
> +    { "y", "set top left crop position", OFFSET(y), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, TFLAGS },
> +    { "w", "set crop size",              OFFSET(w), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, FLAGS },
> +    { "h", "set crop size",              OFFSET(h), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, FLAGS },
> +    { NULL }
> +};
> +
> +AVFILTER_DEFINE_CLASS(feedback);
> +
> +const AVFilter ff_vf_feedback = {
> +    .name        = "feedback",
> +    .description = NULL_IF_CONFIG_SMALL("Apply feedback video filter."),
> +    .priv_class  = &feedback_class,
> +    .priv_size   = sizeof(FeedbackContext),
> +    .activate    = activate,
> +    .init        = init,
> +    .uninit      = uninit,
> +    FILTER_INPUTS(inputs),
> +    FILTER_OUTPUTS(outputs),
> +    FILTER_QUERY_FUNC(query_formats),
> +};