[FFmpeg-devel] [PATCH v5 10/12] avfilter/textmod: Add textmod filter

Mon Sep 13 03:10:50 EEST 2021

13 Sept 2021, 00:34 by softworkz at hotmail.com:

>
>
>> -----Original Message-----
>> From: ffmpeg-devel <ffmpeg-devel-bounces at ffmpeg.org> On Behalf Of
>> Andreas Rheinhardt
>> Sent: Sunday, 12 September 2021 23:56
>> To: ffmpeg-devel at ffmpeg.org
>> Subject: Re: [FFmpeg-devel] [PATCH v5 10/12] avfilter/textmod: Add
>> textmod filter
>>
>> Soft Works:
>> > Signed-off-by: softworkz <softworkz at hotmail.com>
>> > ---
>> >  doc/filters.texi         |  64 +++++++
>> >  libavfilter/Makefile     |   3 +
>> >  libavfilter/allfilters.c |   1 +
>> >  libavfilter/sf_textmod.c | 381
>> +++++++++++++++++++++++++++++++++++++++
>> >  4 files changed, 449 insertions(+)
>> >  create mode 100644 libavfilter/sf_textmod.c
>> >
>> > diff --git a/doc/filters.texi b/doc/filters.texi
>> > index 1d76461ada..9fd2876d63 100644
>> > --- a/doc/filters.texi
>> > +++ b/doc/filters.texi
>> > @@ -25024,6 +25024,70 @@ existing filters using @code{--disable-
>> filters}.
>>
>
> [...]
>
>> > +static void uninit(AVFilterContext *ctx)
>> > +{
>> > +    TextModContext *s = ctx->priv;
>> > +    int i;
>> > +
>> > +    for (i = 0; i < s->nb_find_list; i++) {
>> > +        av_free(&s->find_list[i]);
>>
>> This is completely wrong and will crash: You either want to do
>> av_freep(&s->find_list[i]) or av_free(s->find_list[i]) if these
>> strings
>> were independently allocated; but looking at split_string() shows
>> that
>> they are not, they are substrings of s->find. Similar for the loop
>> below.
>>
>
> You are right of course, thanks.
>
>> > +    }
>> > +    s->nb_find_list = 0;
>> > +    av_freep(&s->find_list);
>> > +
>> > +    for (i = 0; i < s->nb_replace_list; i++) {
>> > +        av_free(&s->replace_list[i]);
>> > +    }
>> > +    s->nb_replace_list = 0;
>> > +    av_freep(&s->replace_list);
>> > +}
>> > +
>> > +static int query_formats(AVFilterContext *ctx)
>> > +{
>> > +    AVFilterFormats *formats;
>> > +    AVFilterLink *inlink = ctx->inputs[0];
>> > +    AVFilterLink *outlink = ctx->outputs[0];
>> > +    static const enum AVSubtitleType subtitle_fmts[] = {
>> AV_SUBTITLE_FMT_ASS, AV_SUBTITLE_FMT_NONE };
>> > +    int ret;
>> > +
>> > +    /* set input subtitle format */
>> > +    formats = ff_make_format_list(subtitle_fmts);
>> > +    if ((ret = ff_formats_ref(formats, &inlink->outcfg.formats)) <
>> 0)
>> > +        return ret;
>> > +
>> > +    /* set output video format */
>> > +    if ((ret = ff_formats_ref(formats, &outlink->incfg.formats)) <
>> 0)
>> > +        return ret;
>> > +
>> > +    return 0;
>> > +}
>> > +
>> > +static char *process_text(TextModContext *s, char *text)
>> > +{
>> > +    const char *char_src = s->find;
>> > +    const char *char_dst = s->replace;
>> > +    char *result = NULL;
>> > +    int escape_level = 0, k = 0;
>> > +
>> > +    switch (s->operation) {
>> > +    case OP_LEET:
>> > +    case OP_REPLACE_CHARS:
>> > +
>> > +        if (s->operation == OP_LEET) {
>> > +            char_src = leet_src;
>> > +            char_dst = leet_dst;
>> > +        }
>> > +
>> > +        result = av_strdup(text);
>> > +        if (!result)
>> > +            return NULL;
>> > +
>> > +        for (size_t n = 0; n < strlen(result); n++) {
>> > +            if (result[n] == '{')
>> > +                escape_level++;
>> > +
>> > +            if (!escape_level) {
>> > +                for (size_t t = 0; t < FF_ARRAY_ELEMS(char_src);
>> t++) {
>> > +                    if (result[n] == char_src[t]) {
>> > +                        result[n] = char_dst[t];
>> > +                        break;
>> > +                    }
>> > +                }
>> > +            }
>> > +
>> > +            if (result[n] == '}')
>> > +                escape_level--;
>> > +        }
>> > +
>> > +        break;
>> > +    case OP_TO_UPPER:
>> > +    case OP_TO_LOWER:
>> > +
>> > +        result = av_strdup(text);
>> > +        if (!result)
>> > +            return NULL;
>> > +
>> > +        for (size_t n = 0; n < strlen(result); n++) {
>> > +            if (result[n] == '{')
>> > +                escape_level++;
>> > +            if (!escape_level)
>> > +                result[n] = s->operation == OP_TO_LOWER ?
>> av_tolower(result[n]) : av_toupper(result[n]);
>> > +            if (result[n] == '}')
>> > +                escape_level--;
>> > +        }
>> > +
>> > +        break;
>> > +    case OP_REMOVE_CHARS:
>> > +
>> > +        result = av_strdup(text);
>> > +        if (!result)
>> > +            return NULL;
>> > +
>> > +        for (size_t n = 0; n < strlen(result); n++) {
>> > +            int skip_char = 0;
>> > +
>> > +            if (result[n] == '{')
>> > +                escape_level++;
>> > +
>> > +            if (!escape_level) {
>> > +                for (size_t t = 0; t < FF_ARRAY_ELEMS(char_src);
>> t++) {
>> > +                    if (result[n] == char_src[t]) {
>> > +                        skip_char = 1;
>> > +                        break;
>> > +                    }
>> > +                }
>> > +            }
>> > +
>> > +            if (!skip_char)
>> > +                result[k++] = result[n];
>> > +
>> > +            if (result[n] == '}')
>> > +                escape_level--;
>> > +        }
>> > +
>> > +        result[k] = 0;
>> > +
>> > +        break;
>> > +    case OP_REPLACE_WORDS:
>> > +    case OP_REMOVE_WORDS:
>> > +
>> > +        result = av_strdup(text);
>> > +        if (!result)
>> > +            return NULL;
>> > +
>> > +        for (int n = 0; n < s->nb_find_list; n++) {
>> > +            char *tmp           = result;
>> > +            const char *replace = (s->operation ==
>> OP_REPLACE_WORDS) ? s->replace_list[n] : "";
>> > +
>> > +            result = av_strireplace(result, s->find_list[n],
>> replace);
>> > +            if (!result)
>> > +                return NULL;
>> > +
>> > +            av_free(tmp);
>> > +        }
>> > +
>> > +        break;
>> > +    }
>> > +
>> > +    return result;
>> > +}
>> > +
>> > +static char *process_dialog(TextModContext *s, char *ass_line)
>> > +{
>> > +    ASSDialog *dialog = ff_ass_split_dialog(NULL, ass_line);
>> > +    char *result, *text;
>> > +
>> > +    if (!dialog)
>> > +        return NULL;
>> > +
>> > +    text = process_text(s, dialog->text);
>> > +    if (!text)
>> > +        return NULL;
>> > +
>> > +    result = ff_ass_get_dialog(dialog->readorder, dialog->layer,
>> dialog->style, dialog->name, text);
>> > +
>> > +    av_free(text);
>> > +    ff_ass_free_dialog(&dialog);
>> > +    return result;
>> > +}
>> > +
>> > +static int filter_frame(AVFilterLink *inlink, AVFrame *src_frame)
>> > +{
>> > +    TextModContext *s = inlink->dst->priv;
>> > +    AVFilterLink *outlink = inlink->dst->outputs[0];
>> > +    int ret;
>> > +    AVFrame *out;
>> > +
>> > +    outlink->format = inlink->format;
>> > +
>> > +    out = av_frame_clone(src_frame);
>>
>> Why clone? You can just reuse src_frame as is.
>>
>
> [..]
>
>>
>> You may not be the sole owner of this AVSubtitleRect; after all,
>> they are shared. Ergo you must not modify it. Is it possible that you
>> believed that av_frame_clone() would make the frame writable? It does
>> not. For non-subtitle frames, av_frame_make_writable() makes them
>> writable; but it does not for subtitles, because you made
>> av_frame_get_buffer2() a no-op for subtitles and so
>> av_frame_make_writable() will temporarily increment the refcount and
>> then decrement it again.
>>
>
> One unsolved problem I have about dealing with AVSubtitleRect
> as being part of AVFrame is that it's not possible to make a copy, 
> in a reliable way because the allocated sizes of the data[4] pointers
> are not reliably known.
>

We have cropping fields and cropping side data (IIRC) now,
can they be used for this?

> Usually, data[0] is the image and data[1] is the palette, but will 
> it always be like this? Then better not have a data array but 
> named pointer variables instead.
>
>
> I was not sure what will be the general route: Merging AVSubtitle
> into AVFrame or attaching AVSubtitle as a property to AVFrame.
>

I think that's not really the best way to go. Subtitles ought to
be contained in the data[] fields and described by the other
fields like with audio and video. While we do sometimes
have data[] contain pointers to structs (hardware frames),
for this case it's a crude solution.