[FFmpeg-devel] drawtext filter
Anton Khirnov
anton at khirnov.net
Thu Mar 16 18:52:25 EET 2023
Overall this patch could use a lot more polish. I really wish you
developed it in a more review-friendly form.
The commit message should use standard form and elaborate on what
advantage do we get from this huge change, which also adds a new
dependendency.
Quoting Francesco Carusi (2023-02-03 15:18:21)
> diff --git a/libavfilter/vf_drawtext.c b/libavfilter/vf_drawtext.c
> index 50012bb258..7a0a255c5e 100644
> --- a/libavfilter/vf_drawtext.c
> +++ b/libavfilter/vf_drawtext.c
You are adding a bunch of trailing whitespace in the file, which is
forbidden.
> @@ -1,4 +1,5 @@
> /*
> + * Copyright (c) 2023 Francesco Carusi
> * Copyright (c) 2011 Stefano Sabatini
> * Copyright (c) 2010 S.N. Hemanth Meenakshisundaram
> * Copyright (c) 2003 Gustavo Sverzut Barbieri <gsbarbieri at yahoo.com.br>
> @@ -20,6 +21,14 @@
> * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> */
>
> +/*
> + * Changelog - 2023
> + *
> + * - This filter now depends on libharfbuzz for text shaping.
> + * - Glyphs position is now accurate to 1/4 pixel in both directions
> + * - The default line height is now the one defined in the font
> + */
This is not the place for a changelog, that's what commit messages and
the Changelog file are for.
> +
> /**
> * @file
> * drawtext filter, based on the original vhook/drawtext.c
> @@ -72,14 +81,20 @@
> #include FT_GLYPH_H
> #include FT_STROKER_H
>
> +#include <hb.h>
> +#include <hb-ft.h>
> +
> +// Ceiling operation for positive integers division
> +#define POS_CEIL(x, y) ((x)/(y) + ((x)%(y) != 0))
> +
> static const char *const var_names[] = {
> "dar",
> "hsub", "vsub",
> - "line_h", "lh", ///< line height, same as max_glyph_h
> + "line_h", "lh", ///< line height
> "main_h", "h", "H", ///< height of the input video
> "main_w", "w", "W", ///< width of the input video
> - "max_glyph_a", "ascent", ///< max glyph ascent
> - "max_glyph_d", "descent", ///< min glyph descent
> + "max_glyph_a", "ascent", ///< max glyph ascender
> + "max_glyph_d", "descent", ///< min glyph descender
Seems like this should not be in this patch.
> static const AVOption drawtext_options[]= {
> - {"fontfile", "set font file", OFFSET(fontfile), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 0, FLAGS},
> - {"text", "set text", OFFSET(text), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 0, FLAGS},
> - {"textfile", "set text file", OFFSET(textfile), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 0, FLAGS},
> - {"fontcolor", "set foreground color", OFFSET(fontcolor.rgba), AV_OPT_TYPE_COLOR, {.str="black"}, 0, 0, FLAGS},
> + {"fontfile", "set font file", OFFSET(fontfile), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 0, FLAGS},
> + {"text", "set text", OFFSET(text), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 0, FLAGS},
> + {"textfile", "set text file", OFFSET(textfile), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 0, FLAGS},
> + {"fontcolor", "set foreground color", OFFSET(fontcolor.rgba), AV_OPT_TYPE_COLOR, {.str="black"}, 0, 0, FLAGS},
> {"fontcolor_expr", "set foreground color expression", OFFSET(fontcolor_expr), AV_OPT_TYPE_STRING, {.str=""}, 0, 0, FLAGS},
> - {"boxcolor", "set box color", OFFSET(boxcolor.rgba), AV_OPT_TYPE_COLOR, {.str="white"}, 0, 0, FLAGS},
> - {"bordercolor", "set border color", OFFSET(bordercolor.rgba), AV_OPT_TYPE_COLOR, {.str="black"}, 0, 0, FLAGS},
> - {"shadowcolor", "set shadow color", OFFSET(shadowcolor.rgba), AV_OPT_TYPE_COLOR, {.str="black"}, 0, 0, FLAGS},
> - {"box", "set box", OFFSET(draw_box), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1 , FLAGS},
> - {"boxborderw", "set box border width", OFFSET(boxborderw), AV_OPT_TYPE_INT, {.i64=0}, INT_MIN, INT_MAX , FLAGS},
> - {"line_spacing", "set line spacing in pixels", OFFSET(line_spacing), AV_OPT_TYPE_INT, {.i64=0}, INT_MIN, INT_MAX,FLAGS},
> - {"fontsize", "set font size", OFFSET(fontsize_expr), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 0 , FLAGS},
> - {"x", "set x expression", OFFSET(x_expr), AV_OPT_TYPE_STRING, {.str="0"}, 0, 0, FLAGS},
> - {"y", "set y expression", OFFSET(y_expr), AV_OPT_TYPE_STRING, {.str="0"}, 0, 0, FLAGS},
> - {"shadowx", "set shadow x offset", OFFSET(shadowx), AV_OPT_TYPE_INT, {.i64=0}, INT_MIN, INT_MAX , FLAGS},
> - {"shadowy", "set shadow y offset", OFFSET(shadowy), AV_OPT_TYPE_INT, {.i64=0}, INT_MIN, INT_MAX , FLAGS},
> - {"borderw", "set border width", OFFSET(borderw), AV_OPT_TYPE_INT, {.i64=0}, INT_MIN, INT_MAX , FLAGS},
> - {"tabsize", "set tab size", OFFSET(tabsize), AV_OPT_TYPE_INT, {.i64=4}, 0, INT_MAX , FLAGS},
> - {"basetime", "set base time", OFFSET(basetime), AV_OPT_TYPE_INT64, {.i64=AV_NOPTS_VALUE}, INT64_MIN, INT64_MAX , FLAGS},
> + {"boxcolor", "set box color", OFFSET(boxcolor.rgba), AV_OPT_TYPE_COLOR, {.str="white"}, 0, 0, FLAGS},
> + {"bordercolor", "set border color", OFFSET(bordercolor.rgba), AV_OPT_TYPE_COLOR, {.str="black"}, 0, 0, FLAGS},
> + {"shadowcolor", "set shadow color", OFFSET(shadowcolor.rgba), AV_OPT_TYPE_COLOR, {.str="black"}, 0, 0, FLAGS},
> + {"box", "set box", OFFSET(draw_box), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS},
> + {"boxborderw", "set box borders width", OFFSET(boxborderw), AV_OPT_TYPE_INT, {.i64=0}, 0, INT_MAX, FLAGS},
> + {"line_spacing", "set line spacing in pixels", OFFSET(line_spacing), AV_OPT_TYPE_INT, {.i64=-1}, INT_MIN, INT_MAX, FLAGS},
> + {"fontsize", "set font size", OFFSET(fontsize_expr), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 0, FLAGS},
> + {"x", "set x expression", OFFSET(x_expr), AV_OPT_TYPE_STRING, {.str="0"}, 0, 0, FLAGS},
> + {"y", "set y expression", OFFSET(y_expr), AV_OPT_TYPE_STRING, {.str="0"}, 0, 0, FLAGS},
> + {"shadowx", "set shadow x offset", OFFSET(shadowx), AV_OPT_TYPE_INT, {.i64=0}, INT_MIN, INT_MAX, FLAGS},
> + {"shadowy", "set shadow y offset", OFFSET(shadowy), AV_OPT_TYPE_INT, {.i64=0}, INT_MIN, INT_MAX, FLAGS},
> + {"borderw", "set border width", OFFSET(borderw), AV_OPT_TYPE_INT, {.i64=0}, INT_MIN, INT_MAX, FLAGS},
> + {"tabsize", "set tab size", OFFSET(tabsize), AV_OPT_TYPE_INT, {.i64=4}, 0, INT_MAX, FLAGS},
> + {"basetime", "set base time", OFFSET(basetime), AV_OPT_TYPE_INT64, {.i64=AV_NOPTS_VALUE}, INT64_MIN, INT64_MAX, FLAGS},
> #if CONFIG_LIBFONTCONFIG
> { "font", "Font name", OFFSET(font), AV_OPT_TYPE_STRING, { .str = "Sans" }, .flags = FLAGS },
> #endif
> @@ -248,15 +338,15 @@ static const AVOption drawtext_options[]= {
> {"strftime", "set strftime expansion (deprecated)", OFFSET(exp_mode), AV_OPT_TYPE_CONST, {.i64=EXP_STRFTIME}, 0, 0, FLAGS, "expansion"},
>
> {"timecode", "set initial timecode", OFFSET(tc_opt_string), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 0, FLAGS},
> - {"tc24hmax", "set 24 hours max (timecode only)", OFFSET(tc24hmax), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS},
> - {"timecode_rate", "set rate (timecode only)", OFFSET(tc_rate), AV_OPT_TYPE_RATIONAL, {.dbl=0}, 0, INT_MAX, FLAGS},
> - {"r", "set rate (timecode only)", OFFSET(tc_rate), AV_OPT_TYPE_RATIONAL, {.dbl=0}, 0, INT_MAX, FLAGS},
> - {"rate", "set rate (timecode only)", OFFSET(tc_rate), AV_OPT_TYPE_RATIONAL, {.dbl=0}, 0, INT_MAX, FLAGS},
> - {"reload", "reload text file at specified frame interval", OFFSET(reload), AV_OPT_TYPE_INT, {.i64=0}, 0, INT_MAX, FLAGS},
> - { "alpha", "apply alpha while rendering", OFFSET(a_expr), AV_OPT_TYPE_STRING, { .str = "1" }, .flags = FLAGS },
> - {"fix_bounds", "check and fix text coords to avoid clipping", OFFSET(fix_bounds), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS},
> - {"start_number", "start frame number for n/frame_num variable", OFFSET(start_number), AV_OPT_TYPE_INT, {.i64=0}, 0, INT_MAX, FLAGS},
> - {"text_source", "the source of text", OFFSET(text_source_string), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 1, FLAGS },
> + {"tc24hmax", "set 24 hours max (timecode only)", OFFSET(tc24hmax), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS},
> + {"timecode_rate", "set rate (timecode only)", OFFSET(tc_rate), AV_OPT_TYPE_RATIONAL, {.dbl=0}, 0, INT_MAX, FLAGS},
> + {"r", "set rate (timecode only)", OFFSET(tc_rate), AV_OPT_TYPE_RATIONAL, {.dbl=0}, 0, INT_MAX, FLAGS},
> + {"rate", "set rate (timecode only)", OFFSET(tc_rate), AV_OPT_TYPE_RATIONAL, {.dbl=0}, 0, INT_MAX, FLAGS},
> + {"reload", "reload text file at specified frame interval", OFFSET(reload), AV_OPT_TYPE_INT, {.i64=0}, 0, INT_MAX, FLAGS},
> + {"alpha", "apply alpha while rendering", OFFSET(a_expr), AV_OPT_TYPE_STRING, {.str = "1"}, .flags = FLAGS},
> + {"fix_bounds", "check and fix text coords to avoid clipping", OFFSET(fix_bounds), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS},
> + {"start_number", "start frame number for n/frame_num variable", OFFSET(start_number), AV_OPT_TYPE_INT, {.i64=0}, 0, INT_MAX, FLAGS},
> + {"text_source", "the source of text", OFFSET(text_source_string), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 1, FLAGS },
Most of these changes seem cosmetic, they do not belong in this patch.
>
> #if CONFIG_LIBFRIBIDI
> {"text_shaping", "attempt to shape text before drawing", OFFSET(text_shaping), AV_OPT_TYPE_BOOL, {.i64=1}, 0, 1, FLAGS},
> @@ -297,18 +387,24 @@ static const struct ft_error {
>
> #define FT_ERRMSG(e) ft_errors[e].err_msg
>
> -typedef struct Glyph {
> - FT_Glyph glyph;
> - FT_Glyph border_glyph;
> - uint32_t code;
> - unsigned int fontsize;
> - FT_Bitmap bitmap; ///< array holding bitmaps of font
> - FT_Bitmap border_bitmap; ///< array holding bitmaps of font border
> - FT_BBox bbox;
> - int advance;
> - int bitmap_left;
> - int bitmap_top;
> -} Glyph;
> +
> +// Loads and (optionally) renders a glyph
> +static int load_glyph(AVFilterContext *ctx, Glyph **glyph_ptr, uint32_t code,
> + int8_t shift_x64, int8_t shift_y64);
> +
> +// Shapes a line of text using libharfbuzz
> +static void shape_text_hb(DrawTextContext *s, HarfbuzzData* hb, const char* text, int textLen);
> +
> +// Performs text measurements
> +static int measure_text(AVFilterContext *ctx, TextMetrics *metrics);
> +
> +// Draws glyphs on the frame
> +static int draw_glyphs(DrawTextContext *s, AVFrame *frame,
> + FFDrawColor *color, TextMetrics *metrics,
> + int x, int y, int borderw);
> +
> +// Draws text on the frame
> +static int draw_text(AVFilterContext *ctx, AVFrame *frame);
Why is there a need for forward declarations?
> static int glyph_cmp(const void *key, const void *b)
> {
> @@ -316,80 +412,9 @@ static int glyph_cmp(const void *key, const void *b)
> int64_t diff = (int64_t)a->code - (int64_t)bb->code;
>
> if (diff != 0)
> - return diff > 0 ? 1 : -1;
> + return diff > 0 ? 1 : -1;
unrelated cosmetics
> @@ -439,7 +464,6 @@ static av_cold int update_fontsize(AVFilterContext *ctx)
> return err;
>
> size = av_expr_eval(s->fontsize_pexpr, s->var_values, &s->prng);
> -
unrelated cosmetics
> if (!isnan(size)) {
> roundedsize = round(size);
> // test for overflow before cast
> @@ -447,7 +471,6 @@ static av_cold int update_fontsize(AVFilterContext *ctx)
> av_log(ctx, AV_LOG_ERROR, "fontsize overflow\n");
> return AVERROR(EINVAL);
> }
> -
unrelated cosmetics
> fontsize = roundedsize;
> }
> }
> @@ -548,7 +571,7 @@ static int load_font_fontconfig(AVFilterContext *ctx)
> goto fail;
> }
>
> - av_log(ctx, AV_LOG_INFO, "Using \"%s\"\n", filename);
> + av_log(ctx, AV_LOG_VERBOSE, "Using \"%s\"\n", filename);
unrelated cosmetics
> if (parse_err)
> s->default_fontsize = size + 0.5;
>
> @@ -690,6 +713,7 @@ static int shape_text(AVFilterContext *ctx)
> s->text = tmp;
> len = fribidi_unicode_to_charset(FRIBIDI_CHAR_SET_UTF8,
> unicodestr, len, s->text);
> +
unrelated cosmetics
>
> +static void shape_text_hb(DrawTextContext *s, HarfbuzzData* hb, const char* text, int textLen)
> +{
> + hb->buf = hb_buffer_create();
Seems like it at least this needs to be checked for errors, maybe some
of the other calls too.
> @@ -1559,56 +1795,142 @@ continue_on_invalid2:
> update_color_with_alpha(s, &bordercolor, s->bordercolor);
> update_color_with_alpha(s, &boxcolor , s->boxcolor );
>
> - box_w = max_text_line_w;
> - box_h = y + s->max_glyph_h;
> + if (s->draw_box && s->boxborderw) {
> + s->bb_top = s->bb_right = s->bb_bottom = s->bb_left = s->boxborderw;
> + } else {
> + s->bb_top = s->bb_right = s->bb_bottom = s->bb_left = 0;
> + }
>
> if (s->fix_bounds) {
> -
> /* calculate footprint of text effects */
> - int boxoffset = s->draw_box ? FFMAX(s->boxborderw, 0) : 0;
> int borderoffset = s->borderw ? FFMAX(s->borderw, 0) : 0;
>
> - int offsetleft = FFMAX3(boxoffset, borderoffset,
> + int offsetleft = FFMAX3(FFMAX(s->bb_left, 0), borderoffset,
> (s->shadowx < 0 ? FFABS(s->shadowx) : 0));
> - int offsettop = FFMAX3(boxoffset, borderoffset,
> + int offsettop = FFMAX3(FFMAX(s->bb_top, 0), borderoffset,
> (s->shadowy < 0 ? FFABS(s->shadowy) : 0));
> -
> - int offsetright = FFMAX3(boxoffset, borderoffset,
> + int offsetright = FFMAX3(FFMAX(s->bb_right, 0), borderoffset,
> (s->shadowx > 0 ? s->shadowx : 0));
> - int offsetbottom = FFMAX3(boxoffset, borderoffset,
> + int offsetbottom = FFMAX3(FFMAX(s->bb_bottom, 0), borderoffset,
> (s->shadowy > 0 ? s->shadowy : 0));
>
> -
> if (s->x - offsetleft < 0) s->x = offsetleft;
> if (s->y - offsettop < 0) s->y = offsettop;
>
> - if (s->x + box_w + offsetright > width)
> - s->x = FFMAX(width - box_w - offsetright, 0);
> - if (s->y + box_h + offsetbottom > height)
> - s->y = FFMAX(height - box_h - offsetbottom, 0);
> + if (s->x + metrics.width + offsetright > width)
> + s->x = FFMAX(width - metrics.width - offsetright, 0);
> + if (s->y + metrics.height + offsetbottom > height)
> + s->y = FFMAX(height - metrics.height - offsetbottom, 0);
> }
>
> - /* draw box */
> - if (s->draw_box)
> - ff_blend_rectangle(&s->dc, &boxcolor,
> - frame->data, frame->linesize, width, height,
> - s->x - s->boxborderw, s->y - s->boxborderw,
> - box_w + s->boxborderw * 2, box_h + s->boxborderw * 2);
> + x = 0;
> + y = 0;
> + x64 = (int)(s->x * 64.);
> + y64 = (int)(s->y * 64. + metrics.offset_top64);
> +
> + for (int l = 0; l < s->line_count; ++l) {
> + TextLine *line = &s->lines[l];
> + HarfbuzzData *hb = &line->hb_data;
> + line->glyphs = av_mallocz(hb->glyph_count * sizeof(GlyphInfo));
> +
> + for (int t = 0; t < hb->glyph_count; ++t) {
> + GlyphInfo *g_info = &line->glyphs[t];
> + uint8_t is_tab = last_tab_idx < s->tab_count &&
> + hb->glyph_info[t].cluster == s->tab_clusters[last_tab_idx] - line->cluster_offset;
> + int true_x, true_y;
> + if (is_tab) {
> + ++last_tab_idx;
> + }
> + true_x = x + hb->glyph_pos[t].x_offset;
> + true_y = y + hb->glyph_pos[t].y_offset;
> + shift_x64 = (((x64 + true_x) >> 4) & 0b0011) << 4;
> + shift_y64 = ((4 - (((y64 + true_y) >> 4) & 0b0011)) & 0b0011) << 4;
> +
> + ret = load_glyph(ctx, &glyph, hb->glyph_info[t].codepoint, shift_x64, shift_y64);
> + if (ret != 0) {
> + return ret;
> + }
> + g_info->code = hb->glyph_info[t].codepoint;
> + g_info->x = (x64 + true_x) >> 6;
> + g_info->y = ((y64 + true_y) >> 6) + (shift_y64 > 0 ? 1 : 0);
> + g_info->shift_x64 = shift_x64;
> + g_info->shift_y64 = shift_y64;
> +
> + if (!is_tab) {
> + x += hb->glyph_pos[t].x_advance;
> + } else {
> + int size = s->blank_advance64 * s->tabsize;
> + x = (x / size + 1) * size;
> + }
> + y += hb->glyph_pos[t].y_advance;
> + }
>
> - if (s->shadowx || s->shadowy) {
> - if ((ret = draw_glyphs(s, frame, width, height,
> - &shadowcolor, s->shadowx, s->shadowy, 0)) < 0)
> - return ret;
> + y += metrics.line_height64 + s->line_spacing * 64;
> + x = 0;
> }
>
> - if (s->borderw) {
> - if ((ret = draw_glyphs(s, frame, width, height,
> - &bordercolor, 0, 0, s->borderw)) < 0)
> + metrics.rect_x = s->x;
> + metrics.rect_y = s->y;
> +
> + s->box_width = metrics.width;
> + s->box_height = metrics.height;
What is the point of duplicating these values in both structs?
--
Anton Khirnov
More information about the ffmpeg-devel
mailing list