[FFmpeg-devel] [PATCH 1/2] libavutil/libavfilter: opencl wrapper based on comments on 20130329

Michael Niedermayer michaelni at gmx.at
Fri Mar 29 16:13:14 CET 2013


On Fri, Mar 29, 2013 at 09:32:35PM +0800, Wei Gao wrote:
> 

[...]

> +int av_opencl_create_kernel(AVOpenCLKernelEnv *env, const char *kernel_name)
> +{
> +    cl_int status;
> +    int i, ret = 0;
> +    LOCK_OPENCL;
> +    if (strlen(kernel_name) > sizeof(env->kernel_name)) {

that check is off by 1
a string of 1 char needs a 2 char buffer, the second for the 0


> +        av_log(&openclutils, AV_LOG_ERROR, "Created kernel name %s is too long\n", kernel_name);
> +        ret = AVERROR(EINVAL);
> +        goto end;
> +    }
> +    if (!env->kernel) {
> +        for (i = 0;i < gpu_env.kernel_count;i++) {
> +            if (av_strcasecmp(kernel_name, gpu_env.kernel_node[i].kernel_name) == 0) {
> +                env->kernel = gpu_env.kernel_node[i].kernel;
> +                gpu_env.kernel_node[i].kernel_ref++;
> +                break;
> +            }
> +        }
> +        if (!env->kernel) {
> +            env->kernel = clCreateKernel(gpu_env.program, kernel_name, &status);
> +            if (status != CL_SUCCESS) {
> +                av_log(&openclutils, AV_LOG_ERROR, "Could not create OpenCL kernel: %s\n", opencl_errstr(status));
> +                ret = AVERROR_EXTERNAL;
> +                goto end;
> +            }
> +            av_strlcpy(gpu_env.kernel_node[gpu_env.kernel_count].kernel_name, kernel_name,
> +                       sizeof(gpu_env.kernel_node[gpu_env.kernel_count].kernel_name));
> +            gpu_env.kernel_node[gpu_env.kernel_count].kernel= env->kernel;
> +            gpu_env.kernel_node[gpu_env.kernel_count].kernel_ref++;

> +            gpu_env.kernel_count++;

this is missing a check against the size of kernel_node
or a comment why such check would not be needed

[...]
> +static int init_opencl_env(GPUEnv *gpu_env, AVOpenCLExternalInfo *ext_opencl_info)
> +{
> +    size_t device_length;
> +    cl_int status;
> +    cl_uint num_platforms, num_devices;
> +    cl_platform_id *platform_ids = NULL;
> +    cl_context_properties cps[3];
> +    char platform_name[100];
> +    int i, ret = 0;
> +    cl_device_type device_type[] = {CL_DEVICE_TYPE_GPU, CL_DEVICE_TYPE_CPU, CL_DEVICE_TYPE_DEFAULT};
> +    if (ext_opencl_info) {
> +        if (gpu_env->is_user_created)
> +            return 0;
> +        gpu_env->platform        = ext_opencl_info->platform;
> +        gpu_env->is_user_created = 1;
> +        gpu_env->command_queue   = ext_opencl_info->command_queue;
> +        gpu_env->context         = ext_opencl_info->context;
> +        gpu_env->device_ids      = ext_opencl_info->device_ids;
> +        gpu_env->device_id       = ext_opencl_info->device_id;
> +        gpu_env->device_type     = ext_opencl_info->device_type;
> +    } else {
> +        if (!gpu_env->is_user_created) {
> +            status = clGetPlatformIDs(0, NULL, &num_platforms);
> +            if (status != CL_SUCCESS) {
> +                av_log(&openclutils, AV_LOG_ERROR, "Could not get OpenCL platform ids: %s\n", opencl_errstr(status));
> +                return AVERROR_EXTERNAL;
> +            }
> +            if (num_platforms > 0) {
> +                platform_ids = av_mallocz(num_platforms * sizeof(cl_platform_id));
> +                if (!platform_ids) {
> +                    ret = AVERROR(ENOMEM);
> +                    goto end;
> +                }
> +                status = clGetPlatformIDs(num_platforms, platform_ids, NULL);
> +                if (status != CL_SUCCESS) {
> +                    av_log(&openclutils, AV_LOG_ERROR, "Could not get OpenCL platform ids: %s\n", opencl_errstr(status));
> +                    ret = AVERROR_EXTERNAL;
> +                    goto end;
> +                }
> +                for (i = 0; i < num_platforms; i++) {
> +                    status = clGetPlatformInfo(platform_ids[i], CL_PLATFORM_VENDOR,
> +                                               sizeof(platform_name), platform_name,
> +                                               NULL);
> +
> +                    if (status != CL_SUCCESS) {
> +                        av_log(&openclutils, AV_LOG_ERROR, "Could not get OpenCL platform info: %s\n", opencl_errstr(status));
> +                        ret = AVERROR_EXTERNAL;
> +                        goto end;
> +                    }
> +                    gpu_env->platform = platform_ids[i];
> +                    status = clGetDeviceIDs(gpu_env->platform, CL_DEVICE_TYPE_GPU,
> +                                            0, NULL, &num_devices);
> +                    if (status != CL_SUCCESS) {
> +                        av_log(&openclutils, AV_LOG_ERROR, "Could not get OpenCL device number:%s\n", opencl_errstr(status));
> +                        ret = AVERROR_EXTERNAL;
> +                        goto end;
> +                    }
> +                    if (num_devices == 0) {
> +                        //find CPU device
> +                        status = clGetDeviceIDs(gpu_env->platform, CL_DEVICE_TYPE_CPU,
> +                                             0, NULL, &num_devices);
> +                    }
> +                    if (status != CL_SUCCESS) {
> +                        av_log(&openclutils, AV_LOG_ERROR, "Could not get OpenCL device ids: %s\n", opencl_errstr(status));
> +                        ret = AVERROR_EXTERNAL;
> +                        goto end;
> +                    }
> +                    if (num_devices)
> +                       break;
> +
> +                }
> +            }
> +            if (!gpu_env->platform) {
> +                av_log(&openclutils, AV_LOG_ERROR, "Could not get OpenCL platforms\n");
> +                ret = AVERROR_EXTERNAL;
> +                goto end;
> +            }
> +
> +           /*
> +                 * Use available platform.
> +                 */
> +            av_log(&openclutils, AV_LOG_VERBOSE, "Platform Name: %s\n", platform_name);
> +            cps[0] = CL_CONTEXT_PLATFORM;
> +            cps[1] = (cl_context_properties)gpu_env->platform;
> +            cps[2] = 0;
> +            /* Check for GPU. */
> +
> +            for (i = 0; i < sizeof(device_type); i++) {
> +                gpu_env->device_type = device_type[i];
> +                gpu_env->context     = clCreateContextFromType(cps, gpu_env->device_type,
> +                                                               NULL, NULL, &status);
> +                if (status != CL_SUCCESS) {
> +                    av_log(&openclutils, AV_LOG_ERROR, "Could not get OpenCL context from device type: %s\n", opencl_errstr(status));
> +                    ret = AVERROR_EXTERNAL;
> +                    goto end;
> +                }
> +                if (gpu_env->context)
> +                    break;
> +            }
> +            if (!gpu_env->context) {
> +                av_log(&openclutils, AV_LOG_ERROR, "Could not get OpenCL context from device type\n");
> +                ret = AVERROR_EXTERNAL;
> +                goto end;
> +            }
> +            /* Detect OpenCL devices. */
> +            /* First, get the size of device list data */
> +            status = clGetContextInfo(gpu_env->context, CL_CONTEXT_DEVICES,
> +                                      0, NULL, &device_length);
> +            if (status != CL_SUCCESS) {
> +                av_log(&openclutils, AV_LOG_ERROR, "Could not get OpenCL device length: %s\n", opencl_errstr(status));
> +                ret = AVERROR_EXTERNAL;
> +                goto end;
> +            }
> +            if (device_length == 0) {
> +                av_log(&openclutils, AV_LOG_ERROR, "Could not get OpenCL device length\n");
> +                ret = AVERROR_EXTERNAL;
> +                goto end;
> +            }
> +            /* Now allocate memory for device list based on the size we got earlier */
> +            gpu_env->device_ids = av_mallocz(device_length);
> +            if (!gpu_env->device_ids) {
> +                ret = AVERROR(ENOMEM);
> +                goto end;
> +            }
> +            /* Now, get the device list data */
> +            status = clGetContextInfo(gpu_env->context, CL_CONTEXT_DEVICES, device_length,
> +                                      gpu_env->device_ids, NULL);
> +            if (status != CL_SUCCESS) {
> +                av_log(&openclutils, AV_LOG_ERROR, "Could not get OpenCL context info: %s\n", opencl_errstr(status));
> +                ret = AVERROR_EXTERNAL;
> +                goto end;
> +            }

> +            /* Create OpenCL command queue. */
> +            gpu_env->command_queue = clCreateCommandQueue(gpu_env->context, gpu_env->device_ids[0],
> +                                                          0, &status);

This looks like only the first device would ever be used if there are
multiple.
Seems not optimal for a system the contains multiple devices


[...]
> +int av_opencl_buffer_write(cl_mem dst_cl_buf, uint8_t *src_buf, size_t buf_size)
> +{
> +    cl_int status;
> +    void *mapped = clEnqueueMapBuffer(gpu_env.command_queue, dst_cl_buf,
> +                                      CL_TRUE,CL_MAP_WRITE, 0, sizeof(uint8_t) * buf_size,
> +                                      0, NULL, NULL, &status);
> +
> +    if (status != CL_SUCCESS) {
> +        av_log(&openclutils, AV_LOG_ERROR, "Could not map OpenCL buffer: %s\n", opencl_errstr(status));
> +        return AVERROR_EXTERNAL;
> +    }
> +    memcpy(mapped, src_buf, buf_size);
> +
> +    status = clEnqueueUnmapMemObject(gpu_env.command_queue, dst_cl_buf, mapped, 0, NULL, NULL);
> +    if (status != CL_SUCCESS) {
> +        av_log(&openclutils, AV_LOG_ERROR, "Could not unmap OpenCL buffer: %s\n", opencl_errstr(status));
> +        return AVERROR_EXTERNAL;
> +    }
> +    return 0;
> +}
> +
> +int av_opencl_buffer_read(uint8_t *dst_buf, cl_mem src_cl_buf, size_t buf_size)
> +{
> +    cl_int status;
> +    void *mapped = clEnqueueMapBuffer(gpu_env.command_queue, src_cl_buf,
> +                                      CL_TRUE,CL_MAP_READ, 0, buf_size,
> +                                      0, NULL, NULL, &status);
> +
> +    if (status != CL_SUCCESS) {
> +        av_log(&openclutils, AV_LOG_ERROR, "Could not map OpenCL buffer: %s\n", opencl_errstr(status));
> +        return AVERROR_EXTERNAL;
> +    }
> +    memcpy(dst_buf, mapped, buf_size);

Why is the buffer copied instead of the data directly used ?
this also applies to av_opencl_buffer_read_image, av_opencl_buffer_write and av_opencl_buffer_write_image


[...]
> +typedef struct AVOpenCLKernelEnv {
> +    cl_context context;
> +    cl_command_queue command_queue;
> +    cl_program program;
> +    cl_kernel kernel;
> +    char kernel_name[AV_OPENCL_MAX_KERNEL_NAME_SIZE];
> +} AVOpenCLKernelEnv;
> +

> +typedef struct AVOpenCLExternalInfo {
> +    cl_platform_id platform;
> +    cl_device_type device_type;
> +    cl_context context;
> +    cl_device_id *device_ids;
> +    cl_device_id  device_id;
> +    cl_command_queue command_queue;
> +    char *platform_name;
> +} AVOpenCLExternalInfo;

If a user (application) has to create this struct then it cannot be
extended later (would break ABI). I dont know if this could become a
problem, a solution would be to provide a dunction to allocate such
structure

Also the whole should be tested with valgrind so it does no out of
array accesses and has no memleaks


[...]

Thanks

-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

You can kill me, but you cannot change the truth.
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 198 bytes
Desc: Digital signature
URL: <http://ffmpeg.org/pipermail/ffmpeg-devel/attachments/20130329/c55ffb28/attachment.asc>


More information about the ffmpeg-devel mailing list