[FFmpeg-devel] [PATCH V3] lavf/vf_ocr: add subregion support

Mon Jul 5 09:54:47 EEST 2021

On Fri, 18 Jun 2021 23:56:56 +0800
Lingjiang Fang <vacingfang at foxmail.com> wrote:

ping for review, thanks 

> fix doc errors, ping for review, thanks :)
> ---
>  doc/filters.texi     |  8 ++++++++
>  libavfilter/vf_ocr.c | 35 ++++++++++++++++++++++++++++++++++-
>  2 files changed, 42 insertions(+), 1 deletion(-)
> 
> diff --git a/doc/filters.texi b/doc/filters.texi
> index da8f7d7726..041fd28c57 100644
> --- a/doc/filters.texi
> +++ b/doc/filters.texi
> @@ -15451,6 +15451,14 @@ Set character whitelist.
>  
>  @item blacklist
>  Set character blacklist.
> +
> + at item x, y
> +Set top-left corner of the subregion, in pixels, default is (0,0).
> +
> + at item w, h
> +Set width and height of the subregion, in pixels,
> +default is the bottom-right part from given top-left corner.
> +
>  @end table
>  
>  The filter exports recognized text as the frame metadata
> @code{lavfi.ocr.text}. diff --git a/libavfilter/vf_ocr.c
> b/libavfilter/vf_ocr.c index 6de474025a..e96dce2d87 100644
> --- a/libavfilter/vf_ocr.c
> +++ b/libavfilter/vf_ocr.c
> @@ -33,6 +33,8 @@ typedef struct OCRContext {
>      char *language;
>      char *whitelist;
>      char *blacklist;
> +    int x, y;
> +    int w, h;
>  
>      TessBaseAPI *tess;
>  } OCRContext;
> @@ -45,6 +47,10 @@ static const AVOption ocr_options[] = {
>      { "language",  "set language",            OFFSET(language),
> AV_OPT_TYPE_STRING, {.str="eng"}, 0, 0, FLAGS }, { "whitelist", "set
> character whitelist", OFFSET(whitelist), AV_OPT_TYPE_STRING,
> {.str="0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ.:;,-+_!?\"'[]{}()<>|/\\=*&%$#@!~
> "}, 0, 0, FLAGS }, { "blacklist", "set character blacklist",
> OFFSET(blacklist), AV_OPT_TYPE_STRING, {.str=""},    0, 0, FLAGS },
> +    { "x",         "top x of sub region",     OFFSET(x),
> AV_OPT_TYPE_INT,    {.i64=0},     0, INT_MAX, FLAGS },
> +    { "y",         "top y of sub region",     OFFSET(y),
> AV_OPT_TYPE_INT,    {.i64=0},     0, INT_MAX, FLAGS },
> +    { "w",         "width of sub region",     OFFSET(w),
> AV_OPT_TYPE_INT,    {.i64=0},     0, INT_MAX, FLAGS },
> +    { "h",         "height of sub region",    OFFSET(h),
> AV_OPT_TYPE_INT,    {.i64=0},     0, INT_MAX, FLAGS }, { NULL }
>  };
>  
> @@ -93,6 +99,21 @@ static int query_formats(AVFilterContext *ctx)
>      return ff_set_common_formats(ctx, fmts_list);
>  }
>  
> +static void check_fix(int *x, int *y, int *w, int *h, int pic_w, int
> pic_h) +{
> +    // 0 <= x < pic_w
> +    if (*x >= pic_w)
> +        *x = 0;
> +    // 0 <= y < pic_h
> +    if (*y >= pic_h)
> +        *y = 0;
> +
> +    if (*w == 0 || *w + *x > pic_w)
> +        *w = pic_w - *x;
> +    if (*h == 0 || *h + *y > pic_h)
> +        *h = pic_h - *y;
> +}
> +
>  static int filter_frame(AVFilterLink *inlink, AVFrame *in)
>  {
>      AVDictionary **metadata = &in->metadata;
> @@ -102,8 +123,20 @@ static int filter_frame(AVFilterLink *inlink,
> AVFrame *in) char *result;
>      int *confs;
>  
> +    // TODO(vacing): support expression
> +    int x = s->x;
> +    int y = s->y;
> +    int w = s->w;
> +    int h = s->h;
> +    check_fix(&x, &y, &w, &h, in->width, in->height);
> +    if ( x != s->x || y != s->y  ||
> +        (s->w != 0 && w != s->w) || (s->h != 0 && h != s->h)) {
> +        av_log(s, AV_LOG_WARNING, "config error, subregion changed
> to x=%d, y=%d, w=%d, h=%d\n",
> +
> x, y, w, h);
> +    }
> +
>      result = TessBaseAPIRect(s->tess, in->data[0], 1,
> -                             in->linesize[0], 0, 0, in->width,
> in->height);
> +                             in->linesize[0], x, y, w, h);
>      confs = TessBaseAPIAllWordConfidences(s->tess);
>      av_dict_set(metadata, "lavfi.ocr.text", result, 0);
>      for (int i = 0; confs[i] != -1; i++) {


Regards,
Lingjiang Fang