[FFmpeg-devel] [PATCH 2/2] libavutil/libavfilter: deshake opencl filter based on comments on 20130401

Stefano Sabatini stefasab at gmail.com
Mon Apr 1 12:59:47 CEST 2013


On date Monday 2013-04-01 17:52:51 +0800, Wei Gao encoded:
> 

> From 77b3811b748129c8018216989f5e9781d37d7ea0 Mon Sep 17 00:00:00 2001
> From: highgod0401 <highgod0401 at gmail.com>
> Date: Mon, 1 Apr 2013 17:48:34 +0800
> Subject: [PATCH 2/2] deshake opencl filter based on comments on 20130401
> 
> ---
>  libavfilter/Makefile            |   2 +
>  libavfilter/allfilters.c        |   3 +-
>  libavfilter/allfilters_kernel.c |  20 ++++
>  libavfilter/allfilters_kernel.h |   6 ++
>  libavfilter/deshake_kernel.h    | 211 ++++++++++++++++++++++++++++++++++++++++
>  libavfilter/deshake_opencl.c    | 174 +++++++++++++++++++++++++++++++++
>  libavfilter/deshake_opencl.h    | 107 ++++++++++++++++++++
>  libavfilter/vf_deshake.c        |  72 +++++++++++---
>  8 files changed, 581 insertions(+), 14 deletions(-)
>  create mode 100644 libavfilter/allfilters_kernel.c
>  create mode 100644 libavfilter/allfilters_kernel.h
>  create mode 100644 libavfilter/deshake_kernel.h
>  create mode 100644 libavfilter/deshake_opencl.c
>  create mode 100644 libavfilter/deshake_opencl.h
> 
> diff --git a/libavfilter/Makefile b/libavfilter/Makefile
> index 0040a33..d2cc0b3 100644
> --- a/libavfilter/Makefile
> +++ b/libavfilter/Makefile
> @@ -29,6 +29,7 @@ HEADERS = asrc_abuffer.h                                                \
>            version.h                                                     \
>  
>  OBJS = allfilters.o                                                     \
> +       allfilters_kernel.o                                              \
>         audio.o                                                          \
>         avfilter.o                                                       \
>         avfiltergraph.o                                                  \
> @@ -138,6 +139,7 @@ OBJS-$(CONFIG_NOFORMAT_FILTER)               += vf_format.o
>  OBJS-$(CONFIG_NOISE_FILTER)                  += vf_noise.o
>  OBJS-$(CONFIG_NULL_FILTER)                   += vf_null.o
>  OBJS-$(CONFIG_OCV_FILTER)                    += vf_libopencv.o
> +OBJS-$(CONFIG_OPENCL)                        += deshake_opencl.o
>  OBJS-$(CONFIG_OVERLAY_FILTER)                += vf_overlay.o
>  OBJS-$(CONFIG_PAD_FILTER)                    += vf_pad.o
>  OBJS-$(CONFIG_PERMS_FILTER)                  += f_perms.o
> diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
> index 086e6c9..f2862a8 100644
> --- a/libavfilter/allfilters.c
> +++ b/libavfilter/allfilters.c
> @@ -21,6 +21,7 @@
>  
>  #include "avfilter.h"
>  #include "config.h"
> +#include "allfilters_kernel.h"
>  
>  
>  #define REGISTER_FILTER(X, x, y)                                        \
> @@ -35,7 +36,6 @@
>          extern AVFilter avfilter_##x;                                   \
>          avfilter_register(&avfilter_##x);                               \
>      }
> -

spurious

>  void avfilter_register_all(void)
>  {
>      static int initialized;
> @@ -198,4 +198,5 @@ void avfilter_register_all(void)
>      REGISTER_FILTER_UNCONDITIONAL(vsink_buffer);
>      REGISTER_FILTER_UNCONDITIONAL(af_afifo);
>      REGISTER_FILTER_UNCONDITIONAL(vf_fifo);
> +    ff_opencl_register_filter_kernel_code_all();
>  }
> diff --git a/libavfilter/allfilters_kernel.c b/libavfilter/allfilters_kernel.c
> new file mode 100644
> index 0000000..4980314
> --- /dev/null
> +++ b/libavfilter/allfilters_kernel.c

opencl_allfilters.c
or
opencl_allkernels.c

seems a better name.

> @@ -0,0 +1,20 @@
> +#include "allfilters_kernel.h"
> +#if CONFIG_OPENCL
> +#include "libavutil/opencl.h"
> +#include "deshake_kernel.h"
> +#endif
> +
> +#define OPENCL_REGISTER_KERNEL_CODE(X, x)                                              \
> +    {                                                                                  \
> +        if (CONFIG_##X##_FILTER) {                                                     \
> +            av_opencl_register_kernel_code(ff_kernel_##x##_opencl);                    \
> +        }                                                                              \
> +    }
> +
> +void ff_opencl_register_filter_kernel_code_all(void)
> +{
> + #if CONFIG_OPENCL
> +   OPENCL_REGISTER_KERNEL_CODE(DESHAKE,     deshake);
> + #endif
> +}
> +
> diff --git a/libavfilter/allfilters_kernel.h b/libavfilter/allfilters_kernel.h
> new file mode 100644
> index 0000000..2d9c59f
> --- /dev/null
> +++ b/libavfilter/allfilters_kernel.h
> @@ -0,0 +1,6 @@
> +#include "avfilter.h"
> +#include "config.h"
> +
> +
> +void ff_opencl_register_filter_kernel_code_all(void);
> +
> diff --git a/libavfilter/deshake_kernel.h b/libavfilter/deshake_kernel.h
> new file mode 100644
> index 0000000..7c25e44
> --- /dev/null
> +++ b/libavfilter/deshake_kernel.h
> @@ -0,0 +1,211 @@
> +/*
> + * Copyright (C) 2013 Wei Gao <weigao at multicorewareinc.com>
> + *
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +#include "libavutil/opencl.h"
> +
> +const char *ff_kernel_deshake_opencl = AV_OPENCL_KERNEL(
> +
> +inline unsigned char pixel(global const unsigned char *src, float x, float y,int w, int h,int stride, unsigned char def)
> +{
> +    return (x < 0 || y < 0 || x >= w || y >= h) ? def : src[(int)x + (int)y * stride];
> +}
> +unsigned char interpolate_nearest(float x, float y, global const unsigned char *src,
> +                        int width, int height, int stride, unsigned char def)
> +{
> +    return pixel(src, (int)(x + 0.5), (int)(y + 0.5), width, height, stride, def);
> +}
> +
> +unsigned char interpolate_bilinear(float x, float y, global const unsigned char *src,
> +                        int width, int height, int stride, unsigned char def)
> +{
> +    int x_c, x_f, y_c, y_f;
> +    int v1, v2, v3, v4;
> +
> +    if (x < -1 || x > width || y < -1 || y > height) {
> +        return def;
> +    } else {
> +        x_f = (int)x;
> +        x_c = x_f + 1;
> +
> +        y_f = (int)y;
> +        y_c = y_f + 1;
> +
> +        v1 = pixel(src, x_c, y_c, width, height, stride, def);
> +        v2 = pixel(src, x_c, y_f, width, height, stride, def);
> +        v3 = pixel(src, x_f, y_c, width, height, stride, def);
> +        v4 = pixel(src, x_f, y_f, width, height, stride, def);
> +
> +        return (v1*(x - x_f)*(y - y_f) + v2*((x - x_f)*(y_c - y)) +
> +                v3*(x_c - x)*(y - y_f) + v4*((x_c - x)*(y_c - y)));
> +    }
> +}
> +
> +unsigned char interpolate_biquadratic(float x, float y, global const unsigned char *src,
> +                        int width, int height, int stride, unsigned char def)
> +{
> +    int     x_c, x_f, y_c, y_f;
> +    unsigned char v1,  v2,  v3,  v4;
> +    float   f1,  f2,  f3,  f4;
> +
> +    if (x < - 1 || x > width || y < -1 || y > height)
> +        return def;
> +    else {
> +        x_f = (int)x;
> +        x_c = x_f + 1;
> +        y_f = (int)y;
> +        y_c = y_f + 1;
> +
> +        v1 = pixel(src, x_c, y_c, width, height, stride, def);
> +        v2 = pixel(src, x_c, y_f, width, height, stride, def);
> +        v3 = pixel(src, x_f, y_c, width, height, stride, def);
> +        v4 = pixel(src, x_f, y_f, width, height, stride, def);
> +
> +        f1 = 1 - sqrt((x_c - x) * (y_c - y));
> +        f2 = 1 - sqrt((x_c - x) * (y - y_f));
> +        f3 = 1 - sqrt((x - x_f) * (y_c - y));
> +        f4 = 1 - sqrt((x - x_f) * (y - y_f));
> +        return (v1 * f1 + v2 * f2 + v3 * f3 + v4 * f4) / (f1 + f2 + f3 + f4);
> +    }
> +}
> +
> +inline const float clipf(float a, float amin, float amax)
> +{
> +    if      (a < amin) return amin;
> +    else if (a > amax) return amax;
> +    else               return a;
> +}
> +inline int mirror(int v, int m)
> +{
> +    while ((unsigned)v > (unsigned)m) {
> +        v = -v;
> +        if (v < 0)
> +            v += 2 * m;
> +    }
> +    return v;
> +}
> +
> +
> +kernel void avfilter_transform(global  unsigned char *src,
> +                               global  unsigned char *dst,
> +                               global          float *matrix,
> +                               global          float *matrix2,
> +                                                 int interpolate,
> +                                                 int fillmethod,
> +                                                 int src_stride_lu,
> +                                                 int dst_stride_lu,
> +                                                 int src_stride_ch,
> +                                                 int dst_stride_ch,
> +                                                 int height,
> +                                                 int width,
> +                                                 int ch,
> +                                                 int cw)
> +{
> +     int global_id = get_global_id(0);
> +
> +     global unsigned char *dst_y = dst;
> +     global unsigned char *dst_u = dst_y + height * dst_stride_lu;
> +     global unsigned char *dst_v = dst_u + ch * dst_stride_ch;
> +
> +     global unsigned char *src_y = src;
> +     global unsigned char *src_u = src_y + height * src_stride_lu;
> +     global unsigned char *src_v = src_u + ch * src_stride_ch;
> +
> +     global unsigned char *tempdst;
> +     global unsigned char *tempsrc;
> +
> +     int x;
> +     int y;
> +     float x_s;
> +     float y_s;
> +     int tempsrc_stride;
> +     int tempdst_stride;
> +     int temp_height;
> +     int temp_width;
> +     int curpos;
> +     unsigned char def;
> +     if (global_id < width*height) {
> +        y = global_id/width;
> +        x = global_id%width;
> +        x_s = x * matrix[0] + y * matrix[1] + matrix[2];
> +        y_s = x * matrix[3] + y * matrix[4] + matrix[5];
> +        tempdst = dst_y;
> +        tempsrc = src_y;
> +        tempsrc_stride = src_stride_lu;
> +        tempdst_stride = dst_stride_lu;
> +        temp_height = height;
> +        temp_width = width;
> +     }
> +     else if ((global_id >= width*height)&&(global_id < width*height + ch*cw)) {
> +        y = (global_id - width*height)/cw;
> +        x = (global_id - width*height)%cw;
> +        x_s = x * matrix2[0] + y * matrix2[1] + matrix2[2];
> +        y_s = x * matrix2[3] + y * matrix2[4] + matrix2[5];
> +        tempdst = dst_u;
> +        tempsrc = src_u;
> +        tempsrc_stride = src_stride_ch;
> +        tempdst_stride = dst_stride_ch;
> +        temp_height = height;
> +        temp_width = width;
> +        temp_height = ch;
> +        temp_width = cw;
> +     }
> +     else {
> +        y = (global_id - width*height - ch*cw)/cw;
> +        x = (global_id - width*height - ch*cw)%cw;
> +        x_s = x * matrix2[0] + y * matrix2[1] + matrix2[2];
> +        y_s = x * matrix2[3] + y * matrix2[4] + matrix2[5];
> +        tempdst = dst_v;
> +        tempsrc = src_v;
> +        tempsrc_stride = src_stride_ch;
> +        tempdst_stride = dst_stride_ch;
> +        temp_height = ch;
> +        temp_width = cw;
> +     }
> +     curpos = y * tempdst_stride + x;
> +     switch (fillmethod) {
> +        case 1:
> +            def = tempsrc[y*tempsrc_stride+x];
> +            break;
> +        case 2:
> +            y_s = clipf(y_s, 0, temp_height - 1);
> +            x_s = clipf(x_s, 0, temp_width - 1);
> +            def = tempsrc[(int)y_s * tempsrc_stride + (int)x_s];
> +            break;
> +        case 3:
> +            y_s = mirror(y_s,temp_height - 1);
> +            x_s = mirror(x_s,temp_width - 1);
> +            def = tempsrc[(int)y_s * tempsrc_stride + (int)x_s];
> +            break;
> +         }
> +    switch (interpolate) {
> +        case 0:
> +            tempdst[curpos] = interpolate_nearest(x_s, y_s, tempsrc, temp_width, temp_height, tempsrc_stride, def);
> +            break;
> +        case 1:
> +            tempdst[curpos] = interpolate_bilinear(x_s, y_s, tempsrc, temp_width, temp_height, tempsrc_stride, def);
> +            break;
> +        case 2:
> +            tempdst[curpos] = interpolate_biquadratic(x_s, y_s, tempsrc, temp_width, temp_height, tempsrc_stride, def);
> +            break;
> +        }
> +}
> +
> +);
> diff --git a/libavfilter/deshake_opencl.c b/libavfilter/deshake_opencl.c
> new file mode 100644
> index 0000000..6c55855
> --- /dev/null
> +++ b/libavfilter/deshake_opencl.c
> @@ -0,0 +1,174 @@
> +/*
> + * Copyright (C) 2013 Wei Gao <weigao at multicorewareinc.com>
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +/**
> + * @file
> + * transform input video
> + */
> +
> +#include "libavutil/common.h"
> +#include "libavutil/avassert.h"
> +#include "libavutil/avstring.h"
> +#include "deshake_opencl.h"
> +#include "libavutil/dict.h"
> +
> +
> +#define TRANSFORM_OPENCL_CHECK(method, ...)                                                                  \
> +    status = method(__VA_ARGS__);                                                                            \
> +    if (status != CL_SUCCESS) {                                                                              \
> +        av_log(ctx, AV_LOG_ERROR, "error %s %d\n", # method, status);                                        \
> +        return AVERROR_EXTERNAL;                                                                             \
> +    }
> +
> +#define TRANSFORM_OPENCL_SET_KERNEL_ARG(arg_ptr)                                                             \
> +    status = clSetKernelArg((kernel),(arg_no++),(sizeof(arg_ptr)),(void*)(&(arg_ptr)));                      \
> +    if (status != CL_SUCCESS) {                                                                              \

> +        av_log(ctx, AV_LOG_ERROR, "error %s %d\n", "clSetKernelArg", status );                               \

av_log(ctx, AV_LOG_ERROR, "cannot set kernel argument: %d\n", status );                               \

or even better, export the error string API so you can put the OpenCL
error (even if I noted it would be much better if the OpenCL lib
supported such API).

> +        return AVERROR_EXTERNAL;                                                                             \
> +    }
> +
> +int ff_opencl_transform(FFDeshakeOpenclEnv *opencl_env,
> +                        int width, int height, int cw, int ch,
> +                        const float *matrix_y, const float *matrix_uv,
> +                        enum InterpolateMethod interpolate,
> +                        enum FillMethod fill, AVFrame *in, AVFrame *out)
> +{
> +    int arg_no, ret = 0;

> +    size_t matrix_size = 6;//size of transfer matrix

replace this with a define:
#define MATRIX_SIZE 6


> +    const size_t global_work_size = width * height + 2 * ch * cw;
> +    cl_kernel kernel;
> +    cl_int status;
> +    AVFilterContext *ctx = opencl_env->ctx;
> +    ret = av_opencl_buffer_write(opencl_env->cl_matrix_y, (uint8_t *)matrix_y, matrix_size * sizeof(cl_float));
> +    if (ret < 0)
> +        return ret;
> +    ret = av_opencl_buffer_write(opencl_env->cl_matrix_uv, (uint8_t *)matrix_uv, matrix_size * sizeof(cl_float));
> +    if (ret < 0)
> +        return ret;
> +    kernel = opencl_env->kernel_env.kernel;
> +    arg_no = 0;
> +
> +    TRANSFORM_OPENCL_SET_KERNEL_ARG(opencl_env->cl_inbuf.cl_buf);
> +    TRANSFORM_OPENCL_SET_KERNEL_ARG(opencl_env->cl_outbuf.cl_buf);
> +    TRANSFORM_OPENCL_SET_KERNEL_ARG(opencl_env->cl_matrix_y);
> +    TRANSFORM_OPENCL_SET_KERNEL_ARG(opencl_env->cl_matrix_uv);
> +    TRANSFORM_OPENCL_SET_KERNEL_ARG(interpolate);
> +    TRANSFORM_OPENCL_SET_KERNEL_ARG(fill);
> +    TRANSFORM_OPENCL_SET_KERNEL_ARG(in->linesize[0]);
> +    TRANSFORM_OPENCL_SET_KERNEL_ARG(out->linesize[0]);
> +    TRANSFORM_OPENCL_SET_KERNEL_ARG(in->linesize[1]);
> +    TRANSFORM_OPENCL_SET_KERNEL_ARG(out->linesize[1]);
> +    TRANSFORM_OPENCL_SET_KERNEL_ARG(height);
> +    TRANSFORM_OPENCL_SET_KERNEL_ARG(width);
> +    TRANSFORM_OPENCL_SET_KERNEL_ARG(ch);
> +    TRANSFORM_OPENCL_SET_KERNEL_ARG(cw);
> +    TRANSFORM_OPENCL_CHECK(clEnqueueNDRangeKernel, opencl_env->kernel_env.command_queue, opencl_env->kernel_env.kernel, 1, NULL,
> +              &global_work_size, NULL, 0, NULL, NULL);
> +    clFinish(opencl_env->kernel_env.command_queue);
> +    ret = av_opencl_buffer_read_image(out->data, opencl_env->out_plane_size,
> +                                      opencl_env->plane_num, opencl_env->cl_outbuf.cl_buf,
> +                                      opencl_env->cl_outbuf.buf_size);
> +    if (ret < 0)
> +        return ret;
> +    return ret;
> +}
> +
> +int ff_opencl_deshake_init(AVFilterContext *ctx, FFDeshakeOpenclEnv *opencl_env)
> +{
> +    int ret = 0;
> +    AVDictionary *options = NULL;
> +    av_dict_set(&options, "build_options", "-I.", 0);
> +    ret = av_opencl_init(options, NULL);
> +    av_dict_free(&options);
> +    if (ret < 0)
> +        return ret;
> +    memset(opencl_env, 0, sizeof(FFDeshakeOpenclEnv));
> +    opencl_env->matrix_size = 6;
> +    opencl_env->plane_num   = 3;
> +    opencl_env->ctx         = ctx;
> +    ret = av_opencl_buffer_create(&opencl_env->cl_matrix_y,
> +        opencl_env->matrix_size*sizeof(cl_float), CL_MEM_READ_ONLY, NULL);
> +    if (ret < 0)
> +        return ret;
> +    ret = av_opencl_buffer_create(&opencl_env->cl_matrix_uv,
> +        opencl_env->matrix_size*sizeof(cl_float), CL_MEM_READ_ONLY, NULL);
> +    if (ret < 0)
> +        return ret;
> +    if (!opencl_env->kernel_env.kernel) {
> +        ret =  av_opencl_create_kernel(&opencl_env->kernel_env, "avfilter_transform");
> +        if (ret < 0) {
> +            av_log(ctx, AV_LOG_ERROR, "OpenCL failed create kernel for name 'avfilter_transform'\n");
> +            return ret;
> +        }
> +    }
> +    return ret;
> +}
> +
> +void ff_opencl_deshake_uninit(FFDeshakeOpenclEnv *opencl_env)
> +{
> +    av_opencl_buffer_release(&opencl_env->cl_inbuf.cl_buf);
> +    av_opencl_buffer_release(&opencl_env->cl_outbuf.cl_buf);
> +    av_opencl_buffer_release(&opencl_env->cl_matrix_y);
> +    av_opencl_buffer_release(&opencl_env->cl_matrix_uv);
> +    av_opencl_release_kernel(&opencl_env->kernel_env);
> +    av_opencl_uninit();
> +}
> +
> +
> +int ff_opencl_deshake_process_inout_buf(AVFrame *in, AVFrame *out, FFDeshakeOpenclEnv *opencl_env)
> +{
> +    int ret = 0;
> +    if ((!opencl_env->cl_inbuf.cl_buf) || (!opencl_env->cl_outbuf.cl_buf)) {
> +        opencl_env->in_plane_size[0]   = (in->linesize[0] * in->height);
> +        opencl_env->in_plane_size[1]   = (in->linesize[1] * (in->height>>1));
> +        opencl_env->in_plane_size[2]   = (in->linesize[2] * (in->height>>1));
> +        opencl_env->out_plane_size[0]  = (out->linesize[0] * out->height);
> +        opencl_env->out_plane_size[1]  = (out->linesize[1] * (out->height>>1));
> +        opencl_env->out_plane_size[2]  = (out->linesize[2] * (out->height>>1));
> +        opencl_env->cl_inbuf.buf_size  = opencl_env->in_plane_size[0] +
> +                                         opencl_env->in_plane_size[1] +
> +                                         opencl_env->in_plane_size[2];
> +        opencl_env->cl_outbuf.buf_size = opencl_env->out_plane_size[0] +
> +                                         opencl_env->out_plane_size[1] +
> +                                         opencl_env->out_plane_size[2];
> +        if (NULL == opencl_env->cl_inbuf.cl_buf) {
> +            ret = av_opencl_buffer_create(&opencl_env->cl_inbuf.cl_buf,
> +                                            opencl_env->cl_inbuf.buf_size,
> +                                            CL_MEM_READ_ONLY, NULL);
> +            if (ret < 0)
> +                return ret;
> +        }
> +        if (NULL == opencl_env->cl_outbuf.cl_buf) {
> +            ret = av_opencl_buffer_create(&opencl_env->cl_outbuf.cl_buf,
> +                                            opencl_env->cl_outbuf.buf_size,
> +                                            CL_MEM_READ_WRITE, NULL);
> +            if (ret < 0)
> +                return ret;
> +        }
> +    }
> +    ret = av_opencl_buffer_write_image(opencl_env->cl_inbuf.cl_buf,
> +                                 opencl_env->cl_inbuf.buf_size,
> +                                 0, in->data,opencl_env->in_plane_size,
> +                                 opencl_env->plane_num);
> +    if(ret < 0)
> +        return ret;
> +    return ret;
> +}
> +
> diff --git a/libavfilter/deshake_opencl.h b/libavfilter/deshake_opencl.h
> new file mode 100644
> index 0000000..ba72a51
> --- /dev/null
> +++ b/libavfilter/deshake_opencl.h
> @@ -0,0 +1,107 @@
> +/*
> + * Copyright (C) 2013 Wei Gao <weigao at multicorewareinc.com>
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +#ifndef AVFILTER_TRANSFORM_OPENCL_H
> +#define AVFILTER_TRANSFORM_OPENCL_H
> +
> +#include <stdint.h>
> +#include "avfilter.h"
> +#include "transform.h"
> +#if CONFIG_OPENCL
> +#include "libavutil/opencl.h"
> +#endif
> +
> +#if CONFIG_OPENCL
> +
> +typedef struct {
> +    size_t buf_size;
> +    cl_mem cl_buf;
> +} OpenCLFrameBuf;

For simplicity sake you should probably avoid this struct, and just
put the field in the context below.

cl_mem cl_inbuf;
size_t cl_inbuf_size;


> +
> +#endif
> +
> +typedef struct {
> +#if CONFIG_OPENCL

you can remove this nested config (if A ... if A doesn't make much
sense).

> +    AVFilterContext *ctx;
> +    size_t matrix_size;
> +    float matrix_y[9];
> +    float matrix_uv[9];
> +    OpenCLFrameBuf cl_inbuf;
> +    OpenCLFrameBuf cl_outbuf;
> +    cl_mem cl_matrix_y;
> +    cl_mem cl_matrix_uv;
> +    int in_plane_size[8];
> +    int out_plane_size[8];
> +    int plane_num;
> +    AVOpenCLKernelEnv kernel_env;
> +#endif


> +} FFDeshakeOpenclEnv;
> +
> +/**
> + * Init deshake OpenCL run environment
> + *
> + * @param opencl_env        the deshake Opencl environment which need to be initialed
> + * @param ctx                   av filter context
> + * @return  >=0 on success, a negative error code in case of failure
> + */
> +int ff_opencl_deshake_init(AVFilterContext *ctx, FFDeshakeOpenclEnv *opencl_env);

You can just pass the ctx as the first argument, the opencl context is
contained in the context so no need to specify it again.

Same below, you can use the filter context as first parameter. The
advantage is that you pass a meaningful logging context.

> +
> +/**
> + * Uninit deshake OpenCL run environment
> + *
> + * @param opencl_env       the deshake Opencl environment which need to be uninitialed,
> + * initialed by ff_opencl_deshake_init()
> + */
> +void ff_opencl_deshake_uninit(FFDeshakeOpenclEnv *opencl_env);
> +
> +/**
> + * Create Opencl input and output buffers and copy CPU memory data to OpenCL buffer
> + *
> + * @param in                    filter input frame
> + * @param out                  filter output frame
> + * @param opencl_env       the deshake Opencl environment initialed by ff_opencl_deshake_init()
> + * @return  >=0 on success, a negative error code in case of failure
> + */
> +int ff_opencl_deshake_process_inout_buf(AVFrame *in, AVFrame *out, FFDeshakeOpenclEnv *opencl_env);
> +
> +/**
> + * Do an affine transformation with the given interpolation method using OpenCL
> + *
> + * @param opencl_env        OpenCL env, inited in function  ff_opencl_deshake_init()
> + * @param width       image width in pixels
> + * @param height      image height in pixels
> + * @param cw       chroma plane width in pixels
> + * @param ch        chroma plane height in pixels
> + * @param matrix_y        9-item affine transformation matrix of luma plane in CPU memory
> + * @param matrix_uv      9-item affine transformation matrix of chroma plane in CPU memory
> + * @param interpolate pixel interpolation method
> + * @param fill        edge fill method
> + * @param in         filter input frame
> + * @param out       filter output frame

weird indent

Note: you can skip doxy for internal functions, especially when the
meaning of the parameters is obvious.

> + * @return  >=0 on success, a negative error code in case of failure
> + */
> +int ff_opencl_transform(FFDeshakeOpenclEnv *opencl_env,
> +                        int width, int height, int cw, int ch,
> +                        const float *matrix_y, const float *matrix_uv,
> +                        enum InterpolateMethod interpolate,
> +                        enum FillMethod fill, AVFrame *in, AVFrame *out);
> +
> +#endif /* AVFILTER_TRANSFORM_H */
> +
> diff --git a/libavfilter/vf_deshake.c b/libavfilter/vf_deshake.c
> index 138c25d..ad85c88 100644
> --- a/libavfilter/vf_deshake.c
> +++ b/libavfilter/vf_deshake.c
> @@ -1,6 +1,7 @@
>  /*
>   * Copyright (C) 2010 Georg Martius <georg.martius at web.de>
>   * Copyright (C) 2010 Daniel G. Taylor <dan at programmer-art.org>
> + * Copyright (C) 2013 Wei Gao <weigao at multicorewareinc.com>
>   *
>   * This file is part of FFmpeg.
>   *
> @@ -60,6 +61,7 @@
>  #include "libavcodec/dsputil.h"
>  
>  #include "transform.h"
> +#include "deshake_opencl.h"
>  
>  #define CHROMA_WIDTH(link)  -((-link->w) >> av_pix_fmt_desc_get(link->format)->log2_chroma_w)
>  #define CHROMA_HEIGHT(link) -((-link->h) >> av_pix_fmt_desc_get(link->format)->log2_chroma_h)
> @@ -106,6 +108,10 @@ typedef struct {
>      int cx;
>      int cy;
>      char *filename;            ///< Motion search detailed log filename
> +    int is_opencl;

int opencl for consistency with the option name

> +    FFDeshakeOpenclEnv opencl_env;

You can call this:
DeshakeOpenclContext opencl;

Also you should include it conditionally, like:
#if CONFIG_OPENCL
DeshakeOpenclContext opencl;
#endif

In general a possibly better approach is to put under #if all the
disabled code, so that you don't need to bloat the binary in case
OpenCL is not supported.

> +    int (* transform)(FFDeshakeOpenclEnv *, int, int, int, int, const float *, const float *,
> +               enum InterpolateMethod, enum FillMethod , AVFrame *, AVFrame *);

naming the function args may help readability

>  } DeshakeContext;
>  
>  #define OFFSET(x) offsetof(DeshakeContext, x)
> @@ -129,6 +135,7 @@ static const AVOption deshake_options[] = {
>          { "exhaustive", "exhaustive search",      0, AV_OPT_TYPE_CONST, {.i64=EXHAUSTIVE},       INT_MIN, INT_MAX, FLAGS, "smode" },
>          { "less",       "less exhaustive search", 0, AV_OPT_TYPE_CONST, {.i64=SMART_EXHAUSTIVE}, INT_MIN, INT_MAX, FLAGS, "smode" },
>      { "filename", "set motion search detailed log file name", OFFSET(filename), AV_OPT_TYPE_STRING, {.str=NULL}, .flags = FLAGS },

> +    { "opencl", "set the deshake filter using opencl", OFFSET(is_opencl), AV_OPT_TYPE_INT, {.i64=0}, 0, INT_MAX, .flags = FLAGS },

"use OpenCL filtering capabilities" ... {i64=0}, 0, 1, .flags = FLAGS },

And missing updates in doc/filters.texi.

>      { NULL }
>  };
>  
> @@ -360,6 +367,27 @@ static void find_motion(DeshakeContext *deshake, uint8_t *src1, uint8_t *src2,
>      av_free(angles);
>  }
>  
> +static int deshake_transform_c(FFDeshakeOpenclEnv *opencl_env,

Pass the filter context here, or this doesn't make any sense for the
pure-C path.

> +                                        int width, int height, int cw, int ch,
> +                                        const float *matrix_y, const float *matrix_uv,
> +                                        enum InterpolateMethod interpolate,
> +                                        enum FillMethod fill, AVFrame *in, AVFrame *out)
> +{
> +    int ret = 0;
> +    // Transform the luma plane
> +    ret = avfilter_transform(in->data[0], out->data[0], in->linesize[0], out->linesize[0], width, height, matrix_y, INTERPOLATE_BILINEAR, fill);
> +    if (ret < 0)
> +        return ret;
> +    // Transform the chroma planes
> +    ret = avfilter_transform(in->data[1], out->data[1], in->linesize[1], out->linesize[1], cw, ch, matrix_uv, INTERPOLATE_BILINEAR, fill);
> +    if (ret < 0)
> +        return ret;
> +    ret = avfilter_transform(in->data[2], out->data[2], in->linesize[2], out->linesize[2], cw, ch, matrix_uv, INTERPOLATE_BILINEAR, fill);
> +    if (ret < 0)
> +        return ret;

you can probably use a loop here.

Also you're hardcoding the interpolate method.


> +    return ret;
> +}
> +
>  static av_cold int init(AVFilterContext *ctx, const char *args)
>  {
>      int ret;
> @@ -393,7 +421,16 @@ static av_cold int init(AVFilterContext *ctx, const char *args)
>          deshake->cw += deshake->cx - (deshake->cx & ~15);
>          deshake->cx &= ~15;
>      }
> -
> +    deshake->transform = deshake_transform_c;

> +    if (!CONFIG_OPENCL && deshake->is_opencl)
> +        av_log(ctx, AV_LOG_WARNING, "OpenCL does not configure, use c code to deshake\n");

AV_LOG_ERROR, "OpenCL support was not enabled in this build, cannot be selected\n"
return AVERROR(EINVAL);

aborting is better than (silently) creating unexpected output,
especially in the case the OpenCL deshake is not bit-exact with the
C-code output.

> +
> +    if (deshake->is_opencl && CONFIG_OPENCL) {
> +        deshake->transform = ff_opencl_transform;
> +        ret = ff_opencl_deshake_init(ctx, &(deshake->opencl_env));
> +        if (ret < 0)
> +            return ret;
> +    }
>      av_log(ctx, AV_LOG_VERBOSE, "cx: %d, cy: %d, cw: %d, ch: %d, rx: %d, ry: %d, edge: %d blocksize: %d contrast: %d search: %d\n",
>             deshake->cx, deshake->cy, deshake->cw, deshake->ch,
>             deshake->rx, deshake->ry, deshake->edge, deshake->blocksize * 2, deshake->contrast, deshake->search);
> @@ -433,7 +470,9 @@ static int config_props(AVFilterLink *link)
>  static av_cold void uninit(AVFilterContext *ctx)
>  {
>      DeshakeContext *deshake = ctx->priv;
> -
> +    if (deshake->is_opencl && CONFIG_OPENCL) {
> +        ff_opencl_deshake_uninit(&(deshake->opencl_env));
> +    }
>      av_frame_free(&deshake->ref);
>      if (deshake->fp)
>          fclose(deshake->fp);
> @@ -449,9 +488,10 @@ static int filter_frame(AVFilterLink *link, AVFrame *in)
>      AVFilterLink *outlink = link->dst->outputs[0];
>      AVFrame *out;
>      Transform t = {{0},0}, orig = {{0},0};
> -    float matrix[9];
> +    float matrix_y[9], matrix_uv[9];
>      float alpha = 2.0 / deshake->refcount;
>      char tmp[256];
> +    int ret = 0;
>  
>      out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
>      if (!out) {
> @@ -460,6 +500,12 @@ static int filter_frame(AVFilterLink *link, AVFrame *in)
>      }
>      av_frame_copy_props(out, in);
>  
> +    if (deshake->is_opencl && CONFIG_OPENCL) {
> +        ret = ff_opencl_deshake_process_inout_buf(in, out, &(deshake->opencl_env));
> +        if (ret < 0)
> +            return ret;
> +    }
> +
>      if (deshake->cx < 0 || deshake->cy < 0 || deshake->cw < 0 || deshake->ch < 0) {
>          // Find the most likely global motion for the current frame
>          find_motion(deshake, (deshake->ref == NULL) ? in->data[0] : deshake->ref->data[0], in->data[0], link->w, link->h, in->linesize[0], &t);
> @@ -532,21 +578,19 @@ static int filter_frame(AVFilterLink *link, AVFrame *in)
>      deshake->last.zoom = t.zoom;
>  
>      // Generate a luma transformation matrix
> -    avfilter_get_matrix(t.vector.x, t.vector.y, t.angle, 1.0 + t.zoom / 100.0, matrix);
> -
> -    // Transform the luma plane
> -    avfilter_transform(in->data[0], out->data[0], in->linesize[0], out->linesize[0], link->w, link->h, matrix, INTERPOLATE_BILINEAR, deshake->edge);
> -
> +    avfilter_get_matrix(t.vector.x, t.vector.y, t.angle, 1.0 + t.zoom / 100.0, matrix_y);
>      // Generate a chroma transformation matrix
> -    avfilter_get_matrix(t.vector.x / (link->w / CHROMA_WIDTH(link)), t.vector.y / (link->h / CHROMA_HEIGHT(link)), t.angle, 1.0 + t.zoom / 100.0, matrix);
> -
> -    // Transform the chroma planes
> -    avfilter_transform(in->data[1], out->data[1], in->linesize[1], out->linesize[1], CHROMA_WIDTH(link), CHROMA_HEIGHT(link), matrix, INTERPOLATE_BILINEAR, deshake->edge);
> -    avfilter_transform(in->data[2], out->data[2], in->linesize[2], out->linesize[2], CHROMA_WIDTH(link), CHROMA_HEIGHT(link), matrix, INTERPOLATE_BILINEAR, deshake->edge);
> +    avfilter_get_matrix(t.vector.x / (link->w / CHROMA_WIDTH(link)), t.vector.y / (link->h / CHROMA_HEIGHT(link)), t.angle, 1.0 + t.zoom / 100.0, matrix_uv);
> +    // Transform the luma and chroma planes
> +    ret = deshake->transform(&(deshake->opencl_env), link->w, link->h, CHROMA_WIDTH(link), CHROMA_HEIGHT(link),
> +                                            matrix_y, matrix_uv, INTERPOLATE_BILINEAR, deshake->edge, in, out);
>  
>      // Cleanup the old reference frame
>      av_frame_free(&deshake->ref);
>  
> +    if (ret < 0)
> +        return ret;
> +
>      // Store the current frame as the reference frame for calculating the
>      // motion of the next frame
>      deshake->ref = in;
> @@ -583,3 +627,5 @@ AVFilter avfilter_vf_deshake = {
>      .outputs       = deshake_outputs,
>      .priv_class    = &deshake_class,
>  };

> +
> +

spurious empty lines

[...]
-- 
FFmpeg = Free Fostering Multipurpose Plastic Evanescent Geisha


More information about the ffmpeg-devel mailing list