[FFmpeg-devel] [PATCH v3] lavf/vf_find_rect: add the dual input support function
lance.lmwang at gmail.com
lance.lmwang at gmail.com
Thu Jun 27 12:40:31 EEST 2019
From: Limin Wang <lance.lmwang at gmail.com>
Please using the below command for the testing:
./ffmpeg -i input.ts -i ref.png -filter_complex find_rect,cover_rect=cover.jpg:mode=cover output.ts
I have updated the help document for the function change.
Reviewed-by: Moritz Barsnick <barsnick at gmx.net>
Signed-off-by: Limin Wang <lance.lmwang at gmail.com>
---
doc/filters.texi | 12 +--
libavfilter/version.h | 2 +-
libavfilter/vf_find_rect.c | 173 +++++++++++++++++++++++--------------
3 files changed, 118 insertions(+), 69 deletions(-)
diff --git a/doc/filters.texi b/doc/filters.texi
index 2d9af46a6b..92e1dee07e 100644
--- a/doc/filters.texi
+++ b/doc/filters.texi
@@ -10156,12 +10156,14 @@ Set color for pixels in fixed mode. Default is @var{black}.
Find a rectangular object
+This filter takes in two video inputs, the first input is considered
+the "main" source and is passed unchanged to the output. The "second"
+input is used as a rectangular object for finding. Now the "second"
+input will be auto converted to gray8 format.
+
It accepts the following options:
@table @option
- at item object
-Filepath of the object image, needs to be in gray8.
-
@item threshold
Detection threshold, default is 0.5.
@@ -10178,7 +10180,7 @@ Specifies the rectangle in which to search.
@item
Cover a rectangular object by the supplied image of a given video using @command{ffmpeg}:
@example
-ffmpeg -i file.ts -vf find_rect=newref.pgm,cover_rect=cover.jpg:mode=cover new.mkv
+ffmpeg -i file.ts -i newref.pgm -filter_complex find_rect,cover_rect=cover.jpg:mode=cover new.mkv
@end example
@end itemize
@@ -10212,7 +10214,7 @@ Default value is @var{blur}.
@item
Cover a rectangular object by the supplied image of a given video using @command{ffmpeg}:
@example
-ffmpeg -i file.ts -vf find_rect=newref.pgm,cover_rect=cover.jpg:mode=cover new.mkv
+ffmpeg -i file.ts -i newref.pgm -filter_complex find_rect,cover_rect=cover.jpg:mode=cover new.mkv
@end example
@end itemize
diff --git a/libavfilter/version.h b/libavfilter/version.h
index 91a5a1604e..c0a4b969f1 100644
--- a/libavfilter/version.h
+++ b/libavfilter/version.h
@@ -31,7 +31,7 @@
#define LIBAVFILTER_VERSION_MAJOR 7
#define LIBAVFILTER_VERSION_MINOR 55
-#define LIBAVFILTER_VERSION_MICRO 100
+#define LIBAVFILTER_VERSION_MICRO 101
#define LIBAVFILTER_VERSION_INT AV_VERSION_INT(LIBAVFILTER_VERSION_MAJOR, \
LIBAVFILTER_VERSION_MINOR, \
diff --git a/libavfilter/vf_find_rect.c b/libavfilter/vf_find_rect.c
index d7e6579af7..91757b666d 100644
--- a/libavfilter/vf_find_rect.c
+++ b/libavfilter/vf_find_rect.c
@@ -18,13 +18,10 @@
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
-/**
- * @todo switch to dualinput
- */
-
#include "libavutil/avassert.h"
#include "libavutil/imgutils.h"
#include "libavutil/opt.h"
+#include "framesync.h"
#include "internal.h"
#include "lavfutils.h"
@@ -36,9 +33,9 @@ typedef struct FOCContext {
float threshold;
int mipmaps;
int xmin, ymin, xmax, ymax;
- char *obj_filename;
int last_x, last_y;
- AVFrame *obj_frame;
+ FFFrameSync fs;
+
AVFrame *needle_frame[MAX_MIPMAPS];
AVFrame *haystack_frame[MAX_MIPMAPS];
} FOCContext;
@@ -46,7 +43,6 @@ typedef struct FOCContext {
#define OFFSET(x) offsetof(FOCContext, x)
#define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
static const AVOption find_rect_options[] = {
- { "object", "object bitmap filename", OFFSET(obj_filename), AV_OPT_TYPE_STRING, {.str = NULL}, .flags = FLAGS },
{ "threshold", "set threshold", OFFSET(threshold), AV_OPT_TYPE_FLOAT, {.dbl = 0.5}, 0, 1.0, FLAGS },
{ "mipmaps", "set mipmaps", OFFSET(mipmaps), AV_OPT_TYPE_INT, {.i64 = 3}, 1, MAX_MIPMAPS, FLAGS },
{ "xmin", "", OFFSET(xmin), AV_OPT_TYPE_INT, {.i64 = 0}, 0, INT_MAX, FLAGS },
@@ -56,17 +52,32 @@ static const AVOption find_rect_options[] = {
{ NULL }
};
-AVFILTER_DEFINE_CLASS(find_rect);
+FRAMESYNC_DEFINE_CLASS(find_rect, FOCContext, fs);
static int query_formats(AVFilterContext *ctx)
{
- static const enum AVPixelFormat pix_fmts[] = {
- AV_PIX_FMT_YUV420P,
- AV_PIX_FMT_YUVJ420P,
- AV_PIX_FMT_NONE
- };
+ static const enum AVPixelFormat in_fmts[] = {AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUVJ420P, AV_PIX_FMT_NONE};
+ static const enum AVPixelFormat obj_fmts[] = {AV_PIX_FMT_GRAY8, AV_PIX_FMT_NONE};
+ static const enum AVPixelFormat out_fmts[] = {AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUVJ420P, AV_PIX_FMT_NONE};
+ int ret;
+ AVFilterFormats *in = ff_make_format_list(in_fmts);
+ AVFilterFormats *obj = ff_make_format_list(obj_fmts);
+ AVFilterFormats *out = ff_make_format_list(out_fmts);
+
+ if (!in || !obj || !out) {
+ av_freep(&in);
+ av_freep(&obj);
+ av_freep(&out);
+ return AVERROR(ENOMEM);
+ }
+
+ if ((ret = ff_formats_ref(in , &ctx->inputs[0]->out_formats)) < 0 ||
+ (ret = ff_formats_ref(obj , &ctx->inputs[1]->out_formats)) < 0 ||
+ (ret = ff_formats_ref(out , &ctx->outputs[0]->in_formats)) < 0)
+ return ret;
+
+ return 0;
- return ff_set_common_formats(ctx, ff_make_format_list(pix_fmts));
}
static AVFrame *downscale(AVFrame *in)
@@ -140,19 +151,54 @@ static float compare(const AVFrame *haystack, const AVFrame *obj, int offx, int
return 1 - fabs(c);
}
-static int config_input(AVFilterLink *inlink)
+static int config_main_input(AVFilterLink *inlink)
+{
+ AVFilterContext *ctx = inlink->dst;
+
+ av_log(ctx, AV_LOG_DEBUG, "main input width: %d, height: %d\n", inlink->w, inlink->h);
+ return 0;
+}
+
+static int config_find_rect_input(AVFilterLink *inlink)
{
AVFilterContext *ctx = inlink->dst;
FOCContext *foc = ctx->priv;
+ AVFilterLink *mainlink = ctx->inputs[0];
+
+ if (inlink->format != AV_PIX_FMT_GRAY8) {
+ av_log(ctx, AV_LOG_ERROR, "object input is not a grayscale input: %s\n",
+ av_get_pix_fmt_name(inlink->format));
+ return AVERROR(EINVAL);
+ }
if (foc->xmax <= 0)
- foc->xmax = inlink->w - foc->obj_frame->width;
+ foc->xmax = mainlink->w - inlink->w;
if (foc->ymax <= 0)
- foc->ymax = inlink->h - foc->obj_frame->height;
+ foc->ymax = mainlink->h - inlink->h;
+ av_log(ctx, AV_LOG_DEBUG, "object input width: %d, height: %d\n", inlink->w, inlink->h);
return 0;
}
+static int config_output(AVFilterLink *outlink)
+{
+ AVFilterContext *ctx = outlink->src;
+ FOCContext *foc = ctx->priv;
+ int ret;
+ AVFilterLink *mainlink = ctx->inputs[0];
+
+ if ((ret = ff_framesync_init_dualinput(&foc->fs, ctx)) < 0)
+ return ret;
+
+ outlink->w = mainlink->w;
+ outlink->h = mainlink->h;
+ outlink->time_base = mainlink->time_base;
+ outlink->sample_aspect_ratio = mainlink->sample_aspect_ratio;
+ outlink->frame_rate = mainlink->frame_rate;
+
+ return ff_framesync_configure(&foc->fs);
+}
+
static float search(FOCContext *foc, int pass, int maxpass, int xmin, int xmax, int ymin, int ymax, int *best_x, int *best_y, float best_score)
{
int x, y;
@@ -180,19 +226,33 @@ static float search(FOCContext *foc, int pass, int maxpass, int xmin, int xmax,
return best_score;
}
-static int filter_frame(AVFilterLink *inlink, AVFrame *in)
+static int do_find_rect(FFFrameSync *fs)
{
- AVFilterContext *ctx = inlink->dst;
+ AVFilterContext *ctx = fs->parent;
+ AVFrame *mainframe, *second;
FOCContext *foc = ctx->priv;
float best_score;
int best_x, best_y;
- int i;
+ int ret, i;
- foc->haystack_frame[0] = av_frame_clone(in);
+ ret = ff_framesync_dualinput_get_writable(fs, &mainframe, &second);
+ if (ret < 0)
+ return ret;
+ if (!second)
+ return ff_filter_frame(ctx->outputs[0], mainframe);
+
+ foc->haystack_frame[0] = av_frame_clone(mainframe);
for (i=1; i<foc->mipmaps; i++) {
foc->haystack_frame[i] = downscale(foc->haystack_frame[i-1]);
}
+ foc->needle_frame[0] = av_frame_clone(second);
+ for (i = 1; i < foc->mipmaps; i++) {
+ foc->needle_frame[i] = downscale(foc->needle_frame[i-1]);
+ if (!foc->needle_frame[i])
+ return AVERROR(ENOMEM);
+ }
+
best_score = search(foc, 0, 0,
FFMAX(foc->xmin, foc->last_x - 8),
FFMIN(foc->xmax, foc->last_x + 8),
@@ -207,22 +267,25 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
av_frame_free(&foc->haystack_frame[i]);
}
+ for (i = 0; i < foc->mipmaps; i++) {
+ av_frame_free(&foc->needle_frame[i]);
+ }
+
if (best_score > foc->threshold) {
- return ff_filter_frame(ctx->outputs[0], in);
+ return ff_filter_frame(ctx->outputs[0], mainframe);
}
av_log(ctx, AV_LOG_DEBUG, "Found at %d %d score %f\n", best_x, best_y, best_score);
foc->last_x = best_x;
foc->last_y = best_y;
- av_frame_make_writable(in);
+ av_frame_make_writable(mainframe);
- av_dict_set_int(&in->metadata, "lavfi.rect.w", foc->obj_frame->width, 0);
- av_dict_set_int(&in->metadata, "lavfi.rect.h", foc->obj_frame->height, 0);
- av_dict_set_int(&in->metadata, "lavfi.rect.x", best_x, 0);
- av_dict_set_int(&in->metadata, "lavfi.rect.y", best_y, 0);
-
- return ff_filter_frame(ctx->outputs[0], in);
+ av_dict_set_int(&mainframe->metadata, "lavfi.rect.w", second->width, 0);
+ av_dict_set_int(&mainframe->metadata, "lavfi.rect.h", second->height, 0);
+ av_dict_set_int(&mainframe->metadata, "lavfi.rect.x", best_x, 0);
+ av_dict_set_int(&mainframe->metadata, "lavfi.rect.y", best_y, 0);
+ return ff_filter_frame(ctx->outputs[0], mainframe);
}
static av_cold void uninit(AVFilterContext *ctx)
@@ -230,56 +293,37 @@ static av_cold void uninit(AVFilterContext *ctx)
FOCContext *foc = ctx->priv;
int i;
+ ff_framesync_uninit(&foc->fs);
for (i = 0; i < MAX_MIPMAPS; i++) {
av_frame_free(&foc->needle_frame[i]);
av_frame_free(&foc->haystack_frame[i]);
}
-
- if (foc->obj_frame)
- av_freep(&foc->obj_frame->data[0]);
- av_frame_free(&foc->obj_frame);
}
static av_cold int init(AVFilterContext *ctx)
{
FOCContext *foc = ctx->priv;
- int ret, i;
-
- if (!foc->obj_filename) {
- av_log(ctx, AV_LOG_ERROR, "object filename not set\n");
- return AVERROR(EINVAL);
- }
-
- foc->obj_frame = av_frame_alloc();
- if (!foc->obj_frame)
- return AVERROR(ENOMEM);
-
- if ((ret = ff_load_image(foc->obj_frame->data, foc->obj_frame->linesize,
- &foc->obj_frame->width, &foc->obj_frame->height,
- &foc->obj_frame->format, foc->obj_filename, ctx)) < 0)
- return ret;
-
- if (foc->obj_frame->format != AV_PIX_FMT_GRAY8) {
- av_log(ctx, AV_LOG_ERROR, "object image is not a grayscale image\n");
- return AVERROR(EINVAL);
- }
-
- foc->needle_frame[0] = av_frame_clone(foc->obj_frame);
- for (i = 1; i < foc->mipmaps; i++) {
- foc->needle_frame[i] = downscale(foc->needle_frame[i-1]);
- if (!foc->needle_frame[i])
- return AVERROR(ENOMEM);
- }
+ foc->fs.on_event = do_find_rect;
return 0;
}
+static int activate(AVFilterContext *ctx)
+{
+ FOCContext *foc = ctx->priv;
+ return ff_framesync_activate(&foc->fs);
+}
+
static const AVFilterPad foc_inputs[] = {
{
- .name = "default",
+ .name = "main",
+ .type = AVMEDIA_TYPE_VIDEO,
+ .config_props = config_main_input,
+ },
+ {
+ .name = "object",
.type = AVMEDIA_TYPE_VIDEO,
- .config_props = config_input,
- .filter_frame = filter_frame,
+ .config_props = config_find_rect_input,
},
{ NULL }
};
@@ -288,6 +332,7 @@ static const AVFilterPad foc_outputs[] = {
{
.name = "default",
.type = AVMEDIA_TYPE_VIDEO,
+ .config_props = config_output,
},
{ NULL }
};
@@ -296,7 +341,9 @@ AVFilter ff_vf_find_rect = {
.name = "find_rect",
.description = NULL_IF_CONFIG_SMALL("Find a user specified object."),
.priv_size = sizeof(FOCContext),
+ .preinit = find_rect_framesync_preinit,
.init = init,
+ .activate = activate,
.uninit = uninit,
.query_formats = query_formats,
.inputs = foc_inputs,
--
2.21.0
More information about the ffmpeg-devel
mailing list