[FFmpeg-devel] [PATCH V2] lavf/vf_ocr: add subregion support

Gyan Doshi ffmpeg at gyani.pro
Thu Jun 17 19:38:52 EEST 2021



On 2021-06-17 20:28, Lingjiang Fang wrote:
> fix bugs of previous patch, ping for review
> ---
>   doc/filters.texi     |  7 +++++++
>   libavfilter/vf_ocr.c | 35 ++++++++++++++++++++++++++++++++++-
>   2 files changed, 41 insertions(+), 1 deletion(-)
>
> diff --git a/doc/filters.texi b/doc/filters.texi
> index da8f7d7726..a955cf46e0 100644
> --- a/doc/filters.texi
> +++ b/doc/filters.texi
> @@ -15451,6 +15451,13 @@ Set character whitelist.
>   
>   @item blacklist
>   Set character blacklist.
> +
> + at item x, y
> +Set top point position of subregion, not support expression now

This isn't idiomatic. And the docs should state what the option accepts, 
not what it doesn't.

Change to

     Set position of top-left corner, in pixels.


> +
> + at item w, h
> +Set width and height of subregion
> +
>   @end table
>   
>   The filter exports recognized text as the frame metadata @code{lavfi.ocr.text}.
> diff --git a/libavfilter/vf_ocr.c b/libavfilter/vf_ocr.c
> index 6de474025a..e96dce2d87 100644
> --- a/libavfilter/vf_ocr.c
> +++ b/libavfilter/vf_ocr.c
> @@ -33,6 +33,8 @@ typedef struct OCRContext {
>       char *language;
>       char *whitelist;
>       char *blacklist;
> +    int x, y;
> +    int w, h;
>   
>       TessBaseAPI *tess;
>   } OCRContext;
> @@ -45,6 +47,10 @@ static const AVOption ocr_options[] = {
>       { "language",  "set language",            OFFSET(language),  AV_OPT_TYPE_STRING, {.str="eng"}, 0, 0, FLAGS },
>       { "whitelist", "set character whitelist", OFFSET(whitelist), AV_OPT_TYPE_STRING, {.str="0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ.:;,-+_!?\"'[]{}()<>|/\\=*&%$#@!~ "}, 0, 0, FLAGS },
>       { "blacklist", "set character blacklist", OFFSET(blacklist), AV_OPT_TYPE_STRING, {.str=""},    0, 0, FLAGS },
> +    { "x",         "top x of sub region",     OFFSET(x),         AV_OPT_TYPE_INT,    {.i64=0},     0, INT_MAX, FLAGS },
> +    { "y",         "top y of sub region",     OFFSET(y),         AV_OPT_TYPE_INT,    {.i64=0},     0, INT_MAX, FLAGS },
> +    { "w",         "width of sub region",     OFFSET(w),         AV_OPT_TYPE_INT,    {.i64=0},     0, INT_MAX, FLAGS },
> +    { "h",         "height of sub region",    OFFSET(h),         AV_OPT_TYPE_INT,    {.i64=0},     0, INT_MAX, FLAGS },
>       { NULL }
>   };
>   
> @@ -93,6 +99,21 @@ static int query_formats(AVFilterContext *ctx)
>       return ff_set_common_formats(ctx, fmts_list);
>   }
>   
> +static void check_fix(int *x, int *y, int *w, int *h, int pic_w, int pic_h)
> +{
> +    // 0 <= x < pic_w
> +    if (*x >= pic_w)
> +        *x = 0;
> +    // 0 <= y < pic_h
> +    if (*y >= pic_h)
> +        *y = 0;
> +
> +    if (*w == 0 || *w + *x > pic_w)
> +        *w = pic_w - *x;
> +    if (*h == 0 || *h + *y > pic_h)
> +        *h = pic_h - *y;
> +}
> +
>   static int filter_frame(AVFilterLink *inlink, AVFrame *in)
>   {
>       AVDictionary **metadata = &in->metadata;
> @@ -102,8 +123,20 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
>       char *result;
>       int *confs;
>   
> +    // TODO: support expression
> +    int x = s->x;
> +    int y = s->y;
> +    int w = s->w;
> +    int h = s->h;
> +    check_fix(&x, &y, &w, &h, in->width, in->height);
> +    if ( x != s->x || y != s->y  ||
> +        (s->w != 0 && w != s->w) || (s->h != 0 && h != s->h)) {
> +        av_log(s, AV_LOG_WARNING, "config error, subregion changed to x=%d, y=%d, w=%d, h=%d\n",
> +                                                                    x, y, w, h);
> +    }
> +
>       result = TessBaseAPIRect(s->tess, in->data[0], 1,
> -                             in->linesize[0], 0, 0, in->width, in->height);
> +                             in->linesize[0], x, y, w, h);
>       confs = TessBaseAPIAllWordConfidences(s->tess);
>       av_dict_set(metadata, "lavfi.ocr.text", result, 0);
>       for (int i = 0; confs[i] != -1; i++) {



More information about the ffmpeg-devel mailing list