[FFmpeg-devel] [PATCH] Port MPlayer 2xSaI filter to libavfilter

Fri Dec 3 01:21:53 CET 2010

On Mon, Nov 29, 2010 at 08:54:29PM +1000, Nielkie wrote:
> On Mon, Nov 29, 2010 at 5:08 PM, Nielkie <nielkie at gmail.com> wrote:
> 
> > On Mon, Nov 29, 2010 at 12:13 AM, Ronald S. Bultje <rsbultje at gmail.com>wrote:
> >
> >> Hi,
> >>
> >> On Sun, Nov 28, 2010 at 2:54 AM, Nielkie <nielkie at gmail.com> wrote:
> >> > On Sat, Nov 27, 2010 at 8:29 PM, Reimar D?ffinger
> >> > <Reimar.Doeffinger at gmx.de>wrote:
> >> >> On Sat, Nov 27, 2010 at 07:52:52PM +1000, Nielkie wrote:
> >> >> > +            /* Move color matrix forward */
> >> >> > +            for (my = 0; my < 4; my++) {
> >> >> > +                color[my][0] = color[my][1];
> >> >> > +                color[my][1] = color[my][2];
> >> >> > +                color[my][2] = color[my][3];
> >> >> > +            }
> >> >>
> >> >> Using a loop here seems likely to have a major negative impact on
> >> >> performance.
> >> >
> >> > I'm not sure, I would think the compiler would unroll it.
> >>
> >> Could you verify that please?
> >>
> >> Ronald
> >>
> >>
> > I can confirm that it is unrolled on gcc 4.2.1, at least.
> >
> 
> Updated patch to svn.

>  Changelog                   |    1 
>  configure                   |    1 
>  doc/filters.texi            |    5 
>  libavfilter/Makefile        |    1 
>  libavfilter/allfilters.c    |    1 
>  libavfilter/vf_super2xsai.c |  300 ++++++++++++++++++++++++++++++++++++++++++++
>  6 files changed, 309 insertions(+)
> 7e4751e5738dd8612bd44cc0edb93c5ffd5e50e0  super2xsai_.diff
> diff --git a/Changelog b/Changelog
> index 486978f..8b95b0c 100644
> --- a/Changelog
> +++ b/Changelog
> @@ -60,6 +60,7 @@ version <next>:
>  - rename aspect filter to setdar, and pixelaspect to setsar
>  - IEC 61937 demuxer
>  - Mobotix .mxg demuxer
> +- super2xsai filter added
>  
>  
>  version 0.6:
> diff --git a/configure b/configure
> index 7dcb50f..38d870e 100755
> --- a/configure
> +++ b/configure
> @@ -1407,6 +1407,7 @@ blackframe_filter_deps="gpl"
>  cropdetect_filter_deps="gpl"
>  frei0r_filter_deps="frei0r dlopen strtok_r"
>  ocv_smooth_filter_deps="libopencv"
> +super2xsai_filter_deps="gpl"
>  yadif_filter_deps="gpl"
>  
>  # libraries
> diff --git a/doc/filters.texi b/doc/filters.texi
> index 1cba2d6..04665a1 100644
> --- a/doc/filters.texi
> +++ b/doc/filters.texi
> @@ -635,6 +635,11 @@ not specified it will use the default value of 16.
>  Adding this in the beginning of filter chains should make filtering
>  faster due to better use of the memory cache.
>  
> + at section super2xsai
> +
> +Scale the input by 2x using the Super2xSaI pixel art scaling algorithm.
> +Useful for enlarging pixel or line art without reducing sharpness.
> +
>  @section transpose
>  
>  Transpose rows with columns in the input video and optionally flip it.
> diff --git a/libavfilter/Makefile b/libavfilter/Makefile
> index 210510f..cbcf3b4 100644
> --- a/libavfilter/Makefile
> +++ b/libavfilter/Makefile
> @@ -39,6 +39,7 @@ OBJS-$(CONFIG_SETPTS_FILTER)                 += vf_setpts.o
>  OBJS-$(CONFIG_SETSAR_FILTER)                 += vf_aspect.o
>  OBJS-$(CONFIG_SETTB_FILTER)                  += vf_settb.o
>  OBJS-$(CONFIG_SLICIFY_FILTER)                += vf_slicify.o
> +OBJS-$(CONFIG_SUPER2XSAI_FILTER)             += vf_super2xsai.o
>  OBJS-$(CONFIG_TRANSPOSE_FILTER)              += vf_transpose.o
>  OBJS-$(CONFIG_UNSHARP_FILTER)                += vf_unsharp.o
>  OBJS-$(CONFIG_VFLIP_FILTER)                  += vf_vflip.o
> diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
> index c3067b8..557fd08 100644
> --- a/libavfilter/allfilters.c
> +++ b/libavfilter/allfilters.c
> @@ -60,6 +60,7 @@ void avfilter_register_all(void)
>      REGISTER_FILTER (SETSAR,      setsar,      vf);
>      REGISTER_FILTER (SETTB,       settb,       vf);
>      REGISTER_FILTER (SLICIFY,     slicify,     vf);
> +    REGISTER_FILTER (SUPER2XSAI,  super2xsai,  vf);
>      REGISTER_FILTER (TRANSPOSE,   transpose,   vf);
>      REGISTER_FILTER (UNSHARP,     unsharp,     vf);
>      REGISTER_FILTER (VFLIP,       vflip,       vf);
> diff --git a/libavfilter/vf_super2xsai.c b/libavfilter/vf_super2xsai.c
> new file mode 100644
> index 0000000..9ca710c
> --- /dev/null
> +++ b/libavfilter/vf_super2xsai.c
> @@ -0,0 +1,300 @@
> +/*
> + * Copyright (c) 2010 Niel van der Westhuizen
> + * Copyright (c) 2002 A'rpi
> + * Copyright (c) 1997-2001 ZSNES Team ( zsknight at zsnes.com / _demo_ at zsnes.com )
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; either version 2 of the License, or
> + * (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License along
> + * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
> + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
> +*/
> +
> +/**
> + * @file
> + * Super 2xSaI video filter
> + * Ported from MPlayer libmpcodecs/vf_2xsai.c.
> + */
> +
> +#include "libavutil/intreadwrite.h"
> +#include "avfilter.h"
> +
> +typedef struct {
> +    uint32_t colorMask;
> +    uint32_t lowPixelMask;
> +    uint32_t qcolorMask;
> +    uint32_t qlowPixelMask;
> +    int bytesPerPixel;
> +} Super2xSaIContext;
> +

> +static inline uint32_t readPixel(int bytesPerPixel, uint8_t *src, int offset)

av_always_inline

> +{
> +    switch(bytesPerPixel) {
> +    case 4: return AV_RN32A(src + 4*offset);
> +    case 3: return AV_RL24 (src + 3*offset);
> +    default:
> +    case 2: return AV_RN16 (src + 2*offset);
> +    }
> +}
> +
> +#define GET_RESULT(A, B, C, D) (((B) != (C) || (B) != (D)) - ((A) != (C) || (A) != (D)))

this can apparently also be written as:
(C) == (D) ? (((B) != (C)) - ((A) != (C))) : 0
if so a benchmark would make sense

> +
> +#define INTERPOLATE(A, B) ((((A) & c->colorMask) >> 1) + (((B) & c->colorMask) >> 1) + \
> +                           ((A) & (B) & c->lowPixelMask))
> +
> +/* Interpolate two rgb colors with weights 3 and 1 */
> +#define INTERPOLATE_3_1(A, B) (((A) & c->qcolorMask) >> 2)*3 + (((B) & c->qcolorMask) >> 2) \
> +                               + (((((A) & c->qlowPixelMask)*3 + ((B) & c->qlowPixelMask)) >> 2) & c->qlowPixelMask)
> +

> +/* Reads from the current source color neighborhood. */
> +#define GET_COLOR(mx, my) (readPixel(c->bytesPerPixel, src_line[my], FFMIN(x+(mx), width-1)))

this is very significantly worse than what mplayer does in terms of speed

can you post benchmark scores of your code and mplayer ?
also does your code and mplayer match exactly in output or are there differences?

> +
> +static void Super2xSaI(Super2xSaIContext *c,
> +                       uint8_t *src, uint32_t src_pitch,
> +                       uint8_t *dst, uint32_t dst_pitch,
> +                       uint32_t width, uint32_t height)
> +{
> +    int y;
> +    uint8_t *src_line[4];
> +
> +    /* Point to the first 3 lines. */
> +    src_line[0] = src;
> +    src_line[1] = src;

> +    src_line[2] = FFMIN(src + src_pitch,     src + (height-1) * src_pitch);

src + src_pitch*FFMIN(1, height-1)
simpler and works wirth negative src_pitch

[...]
-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

Complexity theory is the science of finding the exact solution to an
approximation. Benchmarking OTOH is finding an approximation of the exact
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 198 bytes
Desc: Digital signature
URL: <http://lists.mplayerhq.hu/pipermail/ffmpeg-devel/attachments/20101203/486a3298/attachment.pgp>