[FFmpeg-devel] [PATCH 2/2] dnn_backend_native_layer_conv2d.c: refine code.

Guo, Yejun yejun.guo at intel.com
Tue Sep 15 03:39:19 EEST 2020



> -----Original Message-----
> From: ffmpeg-devel <ffmpeg-devel-bounces at ffmpeg.org> On Behalf Of
> xujunzz at sjtu.edu.cn
> Sent: 2020年9月14日 19:32
> To: ffmpeg-devel at ffmpeg.org
> Cc: xujunzz at sjtu.edu.cn
> Subject: [FFmpeg-devel] [PATCH 2/2] dnn_backend_native_layer_conv2d.c:
> refine code.
> 
> From: Xu Jun <xujunzz at sjtu.edu.cn>
> 
> Move thread area allocate out of thread function into main thread.
> 
> Signed-off-by: Xu Jun <xujunzz at sjtu.edu.cn>
> ---
>  .../dnn/dnn_backend_native_layer_conv2d.c     | 29 +++++++++----------
>  1 file changed, 13 insertions(+), 16 deletions(-)
> 
> diff --git a/libavfilter/dnn/dnn_backend_native_layer_conv2d.c
> b/libavfilter/dnn/dnn_backend_native_layer_conv2d.c
> index 5ed1851512..57659a1283 100644
> --- a/libavfilter/dnn/dnn_backend_native_layer_conv2d.c
> +++ b/libavfilter/dnn/dnn_backend_native_layer_conv2d.c
> @@ -33,12 +33,11 @@ typedef struct thread_common_param{
>      const void *parameters;
>      NativeContext *ctx;
>      float *output_data;
> -    int thread_num;
>  } thread_common_param;
> 
>  typedef struct thread_param{
>      thread_common_param *thread_common_param;
> -    int thread_index;
> +    int thread_start, thread_end;
>  } thread_param;
> 
>  int dnn_load_layer_conv2d(Layer *layer, AVIOContext *model_file_context, int
> file_size, int operands_num) @@ -126,16 +125,12 @@ static void *
> dnn_execute_layer_conv2d_thread(void *threadarg)
>      int filter_size = conv_params->kernel_size * filter_linesize;
>      int pad_size = (conv_params->padding_method == VALID) ?
> (conv_params->kernel_size - 1) / 2 * conv_params->dilation : 0;
> 
> -    int thread_stride = (height - pad_size * 2) /
> thread_common_param->thread_num;
> -    int thread_start = thread_stride * thread_param->thread_index +
> pad_size;
> -    int thread_end = (thread_param->thread_index ==
> thread_common_param->thread_num - 1) ? (height - pad_size) : (thread_start
> + thread_stride);
> -
>      float *output = thread_common_param->output_data;
> -    output += (conv_params->output_num) * (width - 2 * pad_size) *
> (thread_start - pad_size);
> +    output += (conv_params->output_num) * (width - 2 * pad_size) *
> + (thread_param->thread_start - pad_size);
> 
>      av_assert0(channel == conv_params->input_num);
> 
> -    for (int y = thread_start; y < thread_end; ++y) {
> +    for (int y = thread_param->thread_start; y <
> + thread_param->thread_end; ++y) {
>          for (int x = pad_size; x < width - pad_size; ++x) {
>              for (int n_filter = 0; n_filter < conv_params->output_num;
> ++n_filter) {
>                  if (conv_params->has_bias) @@ -207,11 +202,13 @@ int
> dnn_execute_layer_conv2d(DnnOperand *operands, const int32_t
> *input_operand_
> 
>      //alloc memory
>      const ConvolutionalParams *conv_params = (const ConvolutionalParams
> *)(parameters);
> +    int height = operands[input_operand_indexes[0]].dims[1];
> +    int width = operands[input_operand_indexes[0]].dims[2];
>      int pad_size = (conv_params->padding_method == VALID) ?
> (conv_params->kernel_size - 1) / 2 * conv_params->dilation : 0;
>      DnnOperand *output_operand = &operands[output_operand_index];
>      output_operand->dims[0] = operands[input_operand_indexes[0]].dims[0];
> -    output_operand->dims[1] = operands[input_operand_indexes[0]].dims[1] -
> pad_size * 2;
> -    output_operand->dims[2] = operands[input_operand_indexes[0]].dims[2] -
> pad_size * 2;
> +    output_operand->dims[1] = height - pad_size * 2;
> +    output_operand->dims[2] = width - pad_size * 2;
>      output_operand->dims[3] = conv_params->output_num;
>      output_operand->data_type =
> operands[input_operand_indexes[0]].data_type;
>      output_operand->length =
> calculate_operand_data_length(output_operand);
> @@ -227,13 +224,13 @@ int dnn_execute_layer_conv2d(DnnOperand
> *operands, const int32_t *input_operand_
>      thread_common_param.output_data = output_operand->data;
> 
>  #if HAVE_PTHREAD_CANCEL
> -    thread_common_param.thread_num = thread_num;
> -
> +    int thread_stride = (height - pad_size * 2) / thread_num;
please fix the build warning, move 'int thread_stride' up.


More information about the ffmpeg-devel mailing list