[FFmpeg-devel] [PATCH v2] lavf/vf_find_rect: add the dual input support function

Thu Jun 27 05:12:35 EEST 2019

From: Limin Wang <lance.lmwang at gmail.com>

 Please using the below command for the testing:
 ./ffmpeg -i input.ts -i ref.png -filter_complex find_rect,cover_rect=cover.jpg:mode=cover output.ts

 I have updated the help document for the function change.

Signed-off-by: Limin Wang <lance.lmwang at gmail.com>
---
 doc/filters.texi           |  12 +--
 libavfilter/vf_find_rect.c | 172 +++++++++++++++++++++++--------------
 2 files changed, 116 insertions(+), 68 deletions(-)

diff --git a/doc/filters.texi b/doc/filters.texi
index 2d9af46a6b..ceb66aba3d 100644
--- a/doc/filters.texi
+++ b/doc/filters.texi
@@ -10156,12 +10156,14 @@ Set color for pixels in fixed mode. Default is @var{black}.
 
 Find a rectangular object
 
+This filter takes in two video input, the first input is considered
+the "main" source and is passed unchanged to the output. The "second"
+input is used as a rectangular object for finding, now the "second"
+input will be auto converted to gray8 format.
+
 It accepts the following options:
 
 @table @option
- at item object
-Filepath of the object image, needs to be in gray8.
-
 @item threshold
 Detection threshold, default is 0.5.
 
@@ -10178,7 +10180,7 @@ Specifies the rectangle in which to search.
 @item
 Cover a rectangular object by the supplied image of a given video using @command{ffmpeg}:
 @example
-ffmpeg -i file.ts -vf find_rect=newref.pgm,cover_rect=cover.jpg:mode=cover new.mkv
+ffmpeg -i file.ts -newref.pgm -filter_complex find_rect,cover_rect=cover.jpg:mode=cover new.mkv
 @end example
 @end itemize
 
@@ -10212,7 +10214,7 @@ Default value is @var{blur}.
 @item
 Cover a rectangular object by the supplied image of a given video using @command{ffmpeg}:
 @example
-ffmpeg -i file.ts -vf find_rect=newref.pgm,cover_rect=cover.jpg:mode=cover new.mkv
+ffmpeg -i file.ts -newref.pgm -filter_complex find_rect,cover_rect=cover.jpg:mode=cover new.mkv
 @end example
 @end itemize
 
diff --git a/libavfilter/vf_find_rect.c b/libavfilter/vf_find_rect.c
index d7e6579af7..055d2d5f4a 100644
--- a/libavfilter/vf_find_rect.c
+++ b/libavfilter/vf_find_rect.c
@@ -18,13 +18,10 @@
  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  */
 
-/**
- * @todo switch to dualinput
- */
-
 #include "libavutil/avassert.h"
 #include "libavutil/imgutils.h"
 #include "libavutil/opt.h"
+#include "framesync.h"
 #include "internal.h"
 
 #include "lavfutils.h"
@@ -36,9 +33,9 @@ typedef struct FOCContext {
     float threshold;
     int mipmaps;
     int xmin, ymin, xmax, ymax;
-    char *obj_filename;
     int last_x, last_y;
-    AVFrame *obj_frame;
+    FFFrameSync fs;
+
     AVFrame *needle_frame[MAX_MIPMAPS];
     AVFrame *haystack_frame[MAX_MIPMAPS];
 } FOCContext;
@@ -46,7 +43,6 @@ typedef struct FOCContext {
 #define OFFSET(x) offsetof(FOCContext, x)
 #define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
 static const AVOption find_rect_options[] = {
-    { "object", "object bitmap filename", OFFSET(obj_filename), AV_OPT_TYPE_STRING, {.str = NULL}, .flags = FLAGS },
     { "threshold", "set threshold", OFFSET(threshold), AV_OPT_TYPE_FLOAT, {.dbl = 0.5}, 0, 1.0, FLAGS },
     { "mipmaps", "set mipmaps", OFFSET(mipmaps), AV_OPT_TYPE_INT, {.i64 = 3}, 1, MAX_MIPMAPS, FLAGS },
     { "xmin", "", OFFSET(xmin), AV_OPT_TYPE_INT, {.i64 = 0}, 0, INT_MAX, FLAGS },
@@ -56,17 +52,32 @@ static const AVOption find_rect_options[] = {
     { NULL }
 };
 
-AVFILTER_DEFINE_CLASS(find_rect);
+FRAMESYNC_DEFINE_CLASS(find_rect, FOCContext, fs);
 
 static int query_formats(AVFilterContext *ctx)
 {
-    static const enum AVPixelFormat pix_fmts[] = {
-        AV_PIX_FMT_YUV420P,
-        AV_PIX_FMT_YUVJ420P,
-        AV_PIX_FMT_NONE
-    };
+    static const enum AVPixelFormat in_fmts[]    = {AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUVJ420P, AV_PIX_FMT_NONE};
+    static const enum AVPixelFormat obj_fmts[]   = {AV_PIX_FMT_GRAY8, AV_PIX_FMT_NONE};
+    static const enum AVPixelFormat out_fmts[]   = {AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUVJ420P, AV_PIX_FMT_NONE};
+    int ret;
+    AVFilterFormats *in    = ff_make_format_list(in_fmts);
+    AVFilterFormats *obj   = ff_make_format_list(obj_fmts);
+    AVFilterFormats *out   = ff_make_format_list(out_fmts);
+
+    if (!in || !obj || !out) {
+        av_freep(&in);
+        av_freep(&obj);
+        av_freep(&out);
+        return AVERROR(ENOMEM);
+    }
+
+    if ((ret = ff_formats_ref(in   , &ctx->inputs[0]->out_formats)) < 0 ||
+        (ret = ff_formats_ref(obj  , &ctx->inputs[1]->out_formats)) < 0 ||
+        (ret = ff_formats_ref(out  , &ctx->outputs[0]->in_formats)) < 0)
+        return ret;
+
+    return 0;
 
-    return ff_set_common_formats(ctx, ff_make_format_list(pix_fmts));
 }
 
 static AVFrame *downscale(AVFrame *in)
@@ -140,19 +151,54 @@ static float compare(const AVFrame *haystack, const AVFrame *obj, int offx, int
     return 1 - fabs(c);
 }
 
-static int config_input(AVFilterLink *inlink)
+static int config_main_input(AVFilterLink *inlink)
+{
+    AVFilterContext *ctx = inlink->dst;
+
+    av_log(ctx, AV_LOG_DEBUG, "main input width: %d, height: %d\n", inlink->w, inlink->h);
+    return 0;
+}
+
+static int config_find_rect_input(AVFilterLink *inlink)
 {
     AVFilterContext *ctx = inlink->dst;
     FOCContext *foc = ctx->priv;
+    AVFilterLink *mainlink = ctx->inputs[0];
+
+    if (inlink->format !=  AV_PIX_FMT_GRAY8) {
+        av_log(ctx, AV_LOG_ERROR, "object input is not a grayscale input: %s\n",
+                av_get_pix_fmt_name(inlink->format));
+        return AVERROR(EINVAL);
+    }
 
     if (foc->xmax <= 0)
-        foc->xmax = inlink->w - foc->obj_frame->width;
+        foc->xmax = mainlink->w - inlink->w;
     if (foc->ymax <= 0)
-        foc->ymax = inlink->h - foc->obj_frame->height;
+        foc->ymax = mainlink->h - inlink->h;
 
+    av_log(ctx, AV_LOG_DEBUG, "object input width: %d, height: %d\n", inlink->w, inlink->h);
     return 0;
 }
 
+static int config_output(AVFilterLink *outlink)
+{
+    AVFilterContext *ctx = outlink->src;
+    FOCContext *foc = ctx->priv;
+    int ret;
+    AVFilterLink *mainlink = ctx->inputs[0];
+
+    if ((ret = ff_framesync_init_dualinput(&foc->fs, ctx)) < 0)
+        return ret;
+
+    outlink->w = mainlink->w;
+    outlink->h = mainlink->h;
+    outlink->time_base = mainlink->time_base;
+    outlink->sample_aspect_ratio = mainlink->sample_aspect_ratio;
+    outlink->frame_rate = mainlink->frame_rate;
+
+    return ff_framesync_configure(&foc->fs);
+}
+
 static float search(FOCContext *foc, int pass, int maxpass, int xmin, int xmax, int ymin, int ymax, int *best_x, int *best_y, float best_score)
 {
     int x, y;
@@ -180,19 +226,33 @@ static float search(FOCContext *foc, int pass, int maxpass, int xmin, int xmax,
     return best_score;
 }
 
-static int filter_frame(AVFilterLink *inlink, AVFrame *in)
+static int do_find_rect(FFFrameSync *fs)
 {
-    AVFilterContext *ctx = inlink->dst;
+    AVFilterContext *ctx = fs->parent;
+    AVFrame *mainframe, *second;
     FOCContext *foc = ctx->priv;
     float best_score;
     int best_x, best_y;
-    int i;
+    int ret, i;
+
+    ret = ff_framesync_dualinput_get_writable(fs, &mainframe, &second);
+    if (ret < 0)
+        return ret;
+    if (!second)
+        return ff_filter_frame(ctx->outputs[0], mainframe);
 
-    foc->haystack_frame[0] = av_frame_clone(in);
+    foc->haystack_frame[0] = av_frame_clone(mainframe);
     for (i=1; i<foc->mipmaps; i++) {
         foc->haystack_frame[i] = downscale(foc->haystack_frame[i-1]);
     }
 
+    foc->needle_frame[0] = av_frame_clone(second);
+    for (i = 1; i < foc->mipmaps; i++) {
+        foc->needle_frame[i] = downscale(foc->needle_frame[i-1]);
+        if (!foc->needle_frame[i])
+            return AVERROR(ENOMEM);
+    }
+
     best_score = search(foc, 0, 0,
                         FFMAX(foc->xmin, foc->last_x - 8),
                         FFMIN(foc->xmax, foc->last_x + 8),
@@ -207,22 +267,25 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
         av_frame_free(&foc->haystack_frame[i]);
     }
 
+    for (i = 1; i < foc->mipmaps; i++) {
+        av_frame_free(&foc->needle_frame[i]);
+    }
+
     if (best_score > foc->threshold) {
-        return ff_filter_frame(ctx->outputs[0], in);
+        return ff_filter_frame(ctx->outputs[0], mainframe);
     }
 
     av_log(ctx, AV_LOG_DEBUG, "Found at %d %d score %f\n", best_x, best_y, best_score);
     foc->last_x = best_x;
     foc->last_y = best_y;
 
-    av_frame_make_writable(in);
+    av_frame_make_writable(mainframe);
 
-    av_dict_set_int(&in->metadata, "lavfi.rect.w", foc->obj_frame->width, 0);
-    av_dict_set_int(&in->metadata, "lavfi.rect.h", foc->obj_frame->height, 0);
-    av_dict_set_int(&in->metadata, "lavfi.rect.x", best_x, 0);
-    av_dict_set_int(&in->metadata, "lavfi.rect.y", best_y, 0);
-
-    return ff_filter_frame(ctx->outputs[0], in);
+    av_dict_set_int(&mainframe->metadata, "lavfi.rect.w", second->width, 0);
+    av_dict_set_int(&mainframe->metadata, "lavfi.rect.h", second->height, 0);
+    av_dict_set_int(&mainframe->metadata, "lavfi.rect.x", best_x, 0);
+    av_dict_set_int(&mainframe->metadata, "lavfi.rect.y", best_y, 0);
+    return ff_filter_frame(ctx->outputs[0], mainframe);
 }
 
 static av_cold void uninit(AVFilterContext *ctx)
@@ -234,52 +297,32 @@ static av_cold void uninit(AVFilterContext *ctx)
         av_frame_free(&foc->needle_frame[i]);
         av_frame_free(&foc->haystack_frame[i]);
     }
-
-    if (foc->obj_frame)
-        av_freep(&foc->obj_frame->data[0]);
-    av_frame_free(&foc->obj_frame);
 }
 
 static av_cold int init(AVFilterContext *ctx)
 {
     FOCContext *foc = ctx->priv;
-    int ret, i;
-
-    if (!foc->obj_filename) {
-        av_log(ctx, AV_LOG_ERROR, "object filename not set\n");
-        return AVERROR(EINVAL);
-    }
-
-    foc->obj_frame = av_frame_alloc();
-    if (!foc->obj_frame)
-        return AVERROR(ENOMEM);
-
-    if ((ret = ff_load_image(foc->obj_frame->data, foc->obj_frame->linesize,
-                             &foc->obj_frame->width, &foc->obj_frame->height,
-                             &foc->obj_frame->format, foc->obj_filename, ctx)) < 0)
-        return ret;
-
-    if (foc->obj_frame->format != AV_PIX_FMT_GRAY8) {
-        av_log(ctx, AV_LOG_ERROR, "object image is not a grayscale image\n");
-        return AVERROR(EINVAL);
-    }
-
-    foc->needle_frame[0] = av_frame_clone(foc->obj_frame);
-    for (i = 1; i < foc->mipmaps; i++) {
-        foc->needle_frame[i] = downscale(foc->needle_frame[i-1]);
-        if (!foc->needle_frame[i])
-            return AVERROR(ENOMEM);
-    }
 
+    foc->fs.on_event = do_find_rect;
     return 0;
 }
 
+static int activate(AVFilterContext *ctx)
+{
+    FOCContext *foc = ctx->priv;
+    return ff_framesync_activate(&foc->fs);
+}
+
 static const AVFilterPad foc_inputs[] = {
     {
-        .name         = "default",
+        .name         = "main",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .config_props = config_main_input,
+    },
+    {
+        .name         = "object",
         .type         = AVMEDIA_TYPE_VIDEO,
-        .config_props = config_input,
-        .filter_frame = filter_frame,
+        .config_props = config_find_rect_input,
     },
     { NULL }
 };
@@ -288,6 +331,7 @@ static const AVFilterPad foc_outputs[] = {
     {
         .name = "default",
         .type = AVMEDIA_TYPE_VIDEO,
+        .config_props  = config_output,
     },
     { NULL }
 };
@@ -296,7 +340,9 @@ AVFilter ff_vf_find_rect = {
     .name            = "find_rect",
     .description     = NULL_IF_CONFIG_SMALL("Find a user specified object."),
     .priv_size       = sizeof(FOCContext),
+    .preinit         = find_rect_framesync_preinit,
     .init            = init,
+    .activate        = activate,
     .uninit          = uninit,
     .query_formats   = query_formats,
     .inputs          = foc_inputs,
-- 
2.21.0