[FFmpeg-soc] libavfilter - filter for .ass subtitle rendering using libass

Thu Feb 26 00:45:04 CET 2009

On date Tuesday 2009-02-24 20:28:53 +0300, Alexey Lebedeff encoded:
> On Tue, 24 Feb 2009 13:54:08 +0300, Alexey Lebedeff wrote:
> 
>  AL> If there is any interest in it, I'll do some cleanup to code,
>  AL> properly integrate it into build process, and share the results.
> 
> Here is the very primitive patch to libavfilter Makefile, and source
> of filter itself. No proper build process integration yet.
> 

Content-Description: Makefile patch
> --- Makefile.orig	2009-02-23 10:41:03.000000000 +0000
> +++ Makefile	2009-02-24 16:57:36.000000000 +0000
> @@ -13,6 +13,7 @@
>         graphparser.o \
>  
>  
> +OBJS-$(CONFIG_ASS_FILTER)       += vf_ass.o
>  OBJS-$(CONFIG_CROP_FILTER)       += vf_crop.o
>  OBJS-$(CONFIG_DRAWBOX_FILTER)    += vf_drawbox.o
>  OBJS-$(CONFIG_FPS_FILTER)        += vf_fps.o
> @@ -34,4 +35,8 @@
>  
>  HEADERS = avfilter.h
>  
> +CFLAGS += -I/usr/include/ass
> +EXTRALIBS += -lass
> +
>  include $(SUBDIR)../subdir.mak
> +
> --- allfilters.c.orig	2009-02-23 10:41:03.000000000 +0000
> +++ allfilters.c	2009-02-23 11:26:18.000000000 +0000
> @@ -34,6 +34,7 @@
>          return;
>      initialized = 1;
>  
> +    REGISTER_FILTER(ASS,ass,vf);
>      REGISTER_FILTER(CROP,crop,vf);
>      REGISTER_FILTER(DRAWBOX,drawbox,vf);
>      REGISTER_FILTER(FIFO,fifo,vf);

Content-Description: ass filter
> /*
>  * SSA/ASS subtitles rendering filter, using libssa.
>  * Based on vf_drawbox.c from libavfilter and vf_ass.c from mplayer.
>  *
>  * Copyright (c) 2006 Evgeniy Stepanov <eugeni.stepanov at gmail.com>
>  * Copyright (c) 2008 Affine Systems, Inc (Michael Sullivan, Bobby Impollonia)
>  * Copyright (c) 2009 Alexey Lebedeff <binarin at binarin.ru>
>  *
>  * This program is free software; you can redistribute it and/or
>  * modify it under the terms of the GNU General Public License
>  * as published by the Free Software Foundation; either version 2
>  * of the License, or (at your option) any later version.
> 
>  * This program is distributed in the hope that it will be useful,
>  * but WITHOUT ANY WARRANTY; without even the implied warranty of
>  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
>  * GNU General Public License for more details.
> 
>  * You should have received a copy of the GNU General Public License
>  * along with this program; if not, write to the Free Software
>  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
>  * MA 02110-1301, USA.
>  */
> 
> /*
>  * Usage: '-vfilters ass=filename:somefile.ass|margin:50|encoding:utf-8'
>  * Only 'filename' param is mandatory.
>  */
> 
> #include <string.h>
> #include <stdio.h>
> #include <ctype.h>
> 
> #include <ass.h>
> 
> #include "avfilter.h"
> 
> typedef struct
> {
>   ass_library_t *ass_library;
>   ass_renderer_t *ass_renderer;
>   ass_track_t *ass_track;
> 
>   int margin;
>   char *filename;
>   char *encoding;
> 
>   int frame_width, frame_height;
>   int vsub,hsub;   //< chroma subsampling

missing space after the comma

> } AssContext;
> 
> static int parse_args(AVFilterContext *ctx, AssContext *context, const char* args);
> static av_cold int init(AVFilterContext *ctx, const char *args, void *opaque)
> {
>   AssContext *context= ctx->priv;
> 
>   /* defaults */
>   context->margin = 10;
>   context->encoding = "utf-8";
> 
>   if ( parse_args(ctx, context, args) )

if (parse_args(...))

style is preferred.

>     return 1;

The documentation only says to return 0 in case of success, but all
the current filters return a negative value in case of failure, which
is consistent with the FFmpeg API, so I'd say to return -1 here and in
all the other cases.

Also I find quite confusing to parse the args in a distinct function,
but maybe it's only me.

>   return 0;
> }
> 
> static int query_formats(AVFilterContext *ctx)
> {
>   avfilter_set_common_formats
>     (ctx,
>      avfilter_make_format_list(10,
> 			       PIX_FMT_YUV444P,  PIX_FMT_YUV422P,  PIX_FMT_YUV420P,
> 			       PIX_FMT_YUV411P,  PIX_FMT_YUV410P,
> 			       PIX_FMT_YUVJ444P, PIX_FMT_YUVJ422P, PIX_FMT_YUVJ420P,
> 			       PIX_FMT_YUV440P,  PIX_FMT_YUVJ440P));
>   return 0;
> }

Weird indent, please follows K&R style, no tabs and four spaces
indent.

> static int config_input(AVFilterLink *link)
> {
>   AssContext *context = link->dst->priv;
> 
>   context->frame_width = link->w;
>   context->frame_height = link->h;
> 
>   context->ass_library = ass_library_init();
> 
>   if ( !context->ass_library ) {
>     av_log(0, AV_LOG_ERROR, "ass_library_init() failed!\n");
>     return 1;
>   }
> 
>   ass_set_fonts_dir(context->ass_library, "");
>   ass_set_extract_fonts(context->ass_library, 1);
>   ass_set_style_overrides(context->ass_library, NULL);
> 
>   context->ass_renderer = ass_renderer_init(context->ass_library);
>   if ( ! context->ass_renderer ) {
>     av_log(0, AV_LOG_ERROR, "ass_renderer_init() failed!\n");
>     return 1;
>   }
> 
>   ass_set_frame_size(context->ass_renderer, link->w, link->h);
>   ass_set_margins(context->ass_renderer, context->margin, context->margin, context->margin, context->margin);
>   ass_set_use_margins(context->ass_renderer, 1);
>   ass_set_font_scale(context->ass_renderer, 1.);
>   ass_set_fonts(context->ass_renderer, NULL, "Sans");
> 
>   context->ass_track = ass_read_file(context->ass_library, context->filename, context->encoding);
>   if ( !context->ass_track ) {
>     av_log(0, AV_LOG_ERROR, "Failed to read subtitle file with ass_read_file()!\n");
>     return 1;
>   }

I don't see why this is done here rather than in init().
Also av_log should always have a context.

>   avcodec_get_chroma_sub_sample(link->format,
> 				&context->hsub, &context->vsub);
> 
>   return 0;
> }
> 
> static void start_frame(AVFilterLink *link, AVFilterPicRef *picref)
> {
>   avfilter_start_frame(link->dst->outputs[0], picref);
> }
> 
> #define _r(c)  ((c)>>24)
> #define _g(c)  (((c)>>16)&0xFF)
> #define _b(c)  (((c)>>8)&0xFF)
> #define _a(c)  ((c)&0xFF)
> #define rgba2y(c)  ( (( 263*_r(c)  + 516*_g(c) + 100*_b(c)) >> 10) + 16  )                                                                     
> #define rgba2u(c)  ( ((-152*_r(c) - 298*_g(c) + 450*_b(c)) >> 10) + 128 )                                                                      
> #define rgba2v(c)  ( (( 450*_r(c) - 376*_g(c) -  73*_b(c)) >> 10) + 128 )                                                                      

Please avoid _foo var names, they are unreadable.

I wonder if we can do such transformation using some function, if
that's not the case maybe we should implement it in the lib, otherwise
filter writers will write this again and again (see for example VHOOK).

> static void draw_ass_image(AVFilterPicRef *pic, ass_image_t *img, AssContext *context)
> {
>   unsigned char *row[4];
>   unsigned char c_y = rgba2y(img->color);
>   unsigned char c_u = rgba2u(img->color);
>   unsigned char c_v = rgba2v(img->color);
>   unsigned char opacity = 255 - _a(img->color);
>   unsigned char *src;
>   int i, j;
> 
>   unsigned char *bitmap = img->bitmap;
>   int bitmap_w = img->w;
>   int bitmap_h = img->h;
>   int dst_x = img->dst_x;
>   int dst_y = img->dst_y;
> 
>   int channel;
>   int x,y;
> 
>   src = bitmap;
> 
>   for (i = 0; i < bitmap_h; ++i) {
>     y = dst_y + i;
>     if ( y >= pic->h )
>       break;
> 
>     row[0] = pic->data[0] + y * pic->linesize[0];
> 
>     for (channel = 1; channel < 3; channel++)
>       row[channel] = pic->data[channel] +
> 	pic->linesize[channel] * (y>> context->vsub);
> 
>     for (j = 0; j < bitmap_w; ++j) {
>       unsigned k = ((unsigned)src[j]) * opacity / 255;
> 
>       x = dst_x + j;
>       if ( y >= pic->w )
> 	break;
> 
>       row[0][x] = (k*c_y + (255-k)*row[0][x]) / 255;
>       row[1][x >> context->hsub] = (k*c_u + (255-k)*row[1][x >> context->hsub]) / 255;
>       row[2][x >> context->hsub] = (k*c_v + (255-k)*row[2][x >> context->hsub]) / 255;
>     }
> 
>     src += img->stride;
>   } 
> }

It's always better to use the slice API, would be possible to
implement this overlay algorithm per-slice (I bet yes)?

> static void end_frame(AVFilterLink *link)
> {
>   AssContext *context = link->dst->priv;
>   AVFilterLink* output = link->dst->outputs[0];
>   AVFilterPicRef *pic = link->cur_pic;
> 
>   ass_image_t* img = ass_render_frame(context->ass_renderer,
> 				      context->ass_track,
> 				      pic->pts * 1000 / AV_TIME_BASE,
> 				      NULL);
> 
>   while ( img ) {
>     draw_ass_image(pic, img, context);
>     img = img->next;
>   }
> 
>   avfilter_draw_slice(output, 0, pic->h);
>   avfilter_end_frame(output);
> }
> 
> static int parse_args(AVFilterContext *ctx, AssContext *context, const char* args)
> {
>   char *arg_copy = av_strdup(args);
>   char *strtok_arg = arg_copy;
>   char *param;
> 
>   while ( param = strtok(strtok_arg, "|") ) {
>     char *tmp = param;
>     char *param_name;
>     char *param_value;
> 
>     strtok_arg = NULL;
> 
>     while ( *tmp && *tmp != ':' ) {
>       tmp++;
>     }
> 
>     if ( param == tmp || ! *tmp ) {
>       av_log(ctx, AV_LOG_ERROR, "Error while parsing arguments - must be like 'param1:value1|param2:value2'\n");
>       return 1;
>     }

I don't think this is a good syntax, ':' is already used with a
special meaning (to separate params) in most filters, if you want to
support a list of key/val values (which looks like a good idea, at
least better than a list of unnamed params) then I think it's better
to use the syntax:
param1=val1:param2=val2...:paramN=valN

>     param_name = av_malloc(tmp - param + 1);
>     memset(param_name, 0, tmp - param + 1);
>     strncpy(param_name, param, tmp-param);
> 
>     tmp++;
> 
>     if ( ! *tmp ) {
>       av_log(ctx, AV_LOG_ERROR, "Error while parsing arguments - parameter value cannot be empty\n");
>       return 1;
>     }
> 
>     param_value = av_strdup(tmp);
> 
>     if ( !strcmp("margin", param_name ) ) {
>       context->margin = atoi(param_value);
>     } else if ( !strcmp("filename", param_name ) ) {
>       context->filename = av_strdup(param_value);
>     } else if ( !strcmp("encoding", param_name ) ) {
>       context->encoding = av_strdup(param_value);
>     } else {
>       av_log(ctx, AV_LOG_ERROR, "Error while parsing arguments - unsupported parameter '%s'\n", param_name);
>       return 1;
>     }
>     av_free(param_name);
>     av_free(param_value);
>   }
> 
>   if ( ! context->filename ) {
>     av_log(ctx, AV_LOG_ERROR, "Error while parsing arguments - mandatory parameter 'filename' missing\n");
>     return 1;
>   }
>   return 0;
> }
> 
> AVFilter avfilter_vf_ass=
>   {
>     .name      = "ass",
>     .priv_size = sizeof(AssContext),
>     .init      = init,
> 
>     .query_formats   = query_formats,
>     .inputs    = (AVFilterPad[]) {{ .name            = "default",
>                                     .type            = CODEC_TYPE_VIDEO,
>                                     .start_frame     = start_frame,
>                                     .end_frame       = end_frame,
>                                     .config_props    = config_input,
>                                     .min_perms       = AV_PERM_WRITE |
> 				    AV_PERM_READ,
>                                     .rej_perms       = AV_PERM_REUSE |
> 				    AV_PERM_REUSE2},
>                                   { .name = NULL}},
>     .outputs   = (AVFilterPad[]) {{ .name            = "default",
>                                     .type            = CODEC_TYPE_VIDEO, },
>                                   { .name = NULL}},
>   };

As already noted, the patch as it is currently designed maybe won't be
accepted in SVN anyway, I think the "right" solution for the FFmpeg
standards would be to use/implement/extend somehow the native ASS
libavcodec decoder, then maybe we should also extend the lavfi API to
also support subtitle frames.

But maybe we could provide some place (multimedia wiki?) where to post
"interesting/useful" filters which for some reason or another cannot
be committed right into SVN.

Then we may design a system (a-la VHOOK, but implemented better) to
dynamically load user-provided filters, so no need to manually
integrate them each time in the native build system.

Regards, happy filtering.