[FFmpeg-devel] [PATCH] Port gradfun to libavfilter (GCI)

Mon Nov 29 13:34:51 CET 2010

Hi,

On 11/29/10 4:18 AM, Nolan L wrote:
> As part of a GCI task, I've ported the gradfun debanding filter from mplayer
> to libavfilter.
> 
> The patch includes changes to the build system to account for CPU
> optimizations that weren't present previously.
> 
> There is a SSE2 method that remains unported due to lack of SSE2 detection
> in the configure script that I wasn't quite sure how to add.
> 

That's great !
configure will detect it and HAVE_SSE is enough I believe, same for the
flag with cpudetect.

> [...]
>
> +
> +static const uint16_t __attribute__((aligned(16))) pw_7f[8] = {127,127,127,127,127,127,127,127};
> +static const uint16_t __attribute__((aligned(16))) pw_ff[8] = {255,255,255,255,255,255,255,255};

use DECLARE_ALIGNED, and use hex values.

> [...]
>
> +
> +void filter_line_c(uint8_t *dst, uint8_t *src, uint16_t *dc, int width, int thresh, const uint16_t *dithers)
> +{
> +    int x;
> +    for (x=0; x<width; x++, dc += x & 1) {
> +        int pix = src[x] << 7;
> +        int delta = dc[0] - pix;
> +        int m = abs(delta) * thresh >> 16;
> +        m = FFMAX(0, 127 - m);
> +        m = m * m * delta >> 14;
> +        pix += m + dithers[x & 7];
> +        dst[x] = av_clip_uint8(pix >> 7);
> +    }
> +}
> +
> +void blur_line_c(uint16_t *dc, uint16_t *buf, uint16_t *buf1, uint8_t *src, int sstride, int width)
> +{
> +    int x, v, old;
> +    for (x=0; x < width; x++) {
> +        v = buf1[x] + src[2 * x] + src[2 * x + 1] + src[2 * x + sstride] + src[2 * x + 1 + sstride];
> +        old = buf[x];
> +        buf[x] = v;
> +        dc[x] = v - old;
> +    }
> +}
> +
> +static void filter(GradFunContext *ctx, uint8_t *dst, uint8_t *src, int width, int height, int dstride, int sstride, int r)
> +{
> +    int bstride = ((width + 15) & ~15) / 2;
> +    int y;
> +    uint32_t dc_factor = (1 << 21) / (r * r);
> +    uint16_t *dc = ctx->buf + 16;
> +    uint16_t *buf = ctx->buf + bstride + 32;
> +    int thresh = ctx->thresh;
> +
> +    memset(dc, 0, (bstride + 16) * sizeof(*buf));
> +    for (y = 0; y < r; y++)
> +        ctx->blur_line(dc, buf + y * bstride, buf + (y - 1) * bstride, src + 2 * y * sstride, sstride, width / 2);
> +    for (;;) {
> +        if (y < height - r) {
> +            int mod = ((y + r) / 2) % r;
> +            uint16_t *buf0 = buf + mod * bstride;
> +            uint16_t *buf1 = buf + (mod ? mod - 1 : r - 1) * bstride;
> +            int x, v;
> +            ctx->blur_line(dc, buf0, buf1, src + (y + r) * sstride, sstride, width / 2);
> +            for (x = v = 0; x < r; x++)
> +                v += dc[x];
> +            for (; x < width / 2; x++) {
> +                v += dc[x] - dc[x-r];
> +                dc[x-r] = v * dc_factor >> 16;
> +            }
> +            for (; x < (width + r + 1) / 2; x++)
> +                dc[x-r] = v * dc_factor >> 16;
> +            for (x = -r / 2; x < 0; x++)
> +                dc[x] = dc[0];
> +        }
> +        if (y == r) {
> +            for (y = 0; y < r; y++)
> +                ctx->filter_line(dst + y * dstride, src + y * sstride, dc - r / 2, width, thresh, dither[y & 7]);
> +        }
> +        ctx->filter_line(dst + y * dstride, src + y * sstride, dc - r / 2, width, thresh, dither[y & 7]);
> +        if (++y >= height) break;
> +        ctx->filter_line(dst + y * dstride, src + y * sstride, dc - r / 2, width, thresh, dither[y & 7]);
> +        if (++y >= height) break;
> +    }
> +}

Can the filter use direct rendering ? If so please try to.

> [...]
>
> +
> +static av_cold void uninit(AVFilterContext *ctx)
> +{
> +    GradFunContext *gf = ctx->priv;
> +    if(gf->buf) av_free(gf->buf);

if is unneeded.

> +}
> +
> +static int query_formats(AVFilterContext *ctx)
> +{
> +    static const enum PixelFormat pix_fmts[] = {
> +        PIX_FMT_YUV410P,            PIX_FMT_YUV420P,
> +        PIX_FMT_GRAY8,              PIX_FMT_NV12,
> +        PIX_FMT_NV21,               PIX_FMT_YUV444P,
> +        PIX_FMT_YUV422P,            PIX_FMT_YUV411P,
> +        PIX_FMT_NONE
> +    };
> +
> +    avfilter_set_common_formats(ctx, avfilter_make_format_list(pix_fmts));
> +
> +    return 0;
> +}
> +
> +static int config_input(AVFilterLink *inlink)
> +{
> +    GradFunContext *gf = inlink->dst->priv;
> +    av_free(gf->buf);
> +    gf->buf = av_mallocz((((inlink->w + 15) & ~15) * (gf->radius + 1) / 2 + 32) * sizeof(uint16_t));
> +    
> +    return !gf->buf;

Errors must return -1.
Error with malloc should return AVERROR(ENOMEM)

[...]

Please factorize the asm code using templates.
See the yadif sse patch on the ml.

-- 
Baptiste COUDURIER
Key fingerprint                 8D77134D20CC9220201FC5DB0AC9325C5C1ABAAA
FFmpeg maintainer                                  http://www.ffmpeg.org