[FFmpeg-devel] [PATCH v2 2/2] dnn_backend_native_layer_conv2d.c: refine code.
xujunzz at sjtu.edu.cn
xujunzz at sjtu.edu.cn
Tue Sep 15 11:30:11 EEST 2020
From: Xu Jun <xujunzz at sjtu.edu.cn>
Move thread area allocate out of thread function into
main thread.
Signed-off-by: Xu Jun <xujunzz at sjtu.edu.cn>
v2: fix build warnings
.../dnn/dnn_backend_native_layer_conv2d.c | 44 +++++++++----------
1 file changed, 20 insertions(+), 24 deletions(-)
diff --git a/libavfilter/dnn/dnn_backend_native_layer_conv2d.c b/libavfilter/dnn/dnn_backend_native_layer_conv2d.c
index 5ed1851512..134016be34 100644
--- a/libavfilter/dnn/dnn_backend_native_layer_conv2d.c
+++ b/libavfilter/dnn/dnn_backend_native_layer_conv2d.c
@@ -33,12 +33,11 @@ typedef struct thread_common_param{
const void *parameters;
NativeContext *ctx;
float *output_data;
- int thread_num;
} thread_common_param;
typedef struct thread_param{
thread_common_param *thread_common_param;
- int thread_index;
+ int thread_start, thread_end;
} thread_param;
int dnn_load_layer_conv2d(Layer *layer, AVIOContext *model_file_context, int file_size, int operands_num)
@@ -113,7 +112,6 @@ static void * dnn_execute_layer_conv2d_thread(void *threadarg)
thread_common_param *thread_common_param = thread_param->thread_common_param;
DnnOperand *operands = thread_common_param->operands;
int32_t input_operand_index = thread_common_param->input_operand_indexes[0];
- int number = operands[input_operand_index].dims[0];
int height = operands[input_operand_index].dims[1];
int width = operands[input_operand_index].dims[2];
int channel = operands[input_operand_index].dims[3];
@@ -126,16 +124,12 @@ static void * dnn_execute_layer_conv2d_thread(void *threadarg)
int filter_size = conv_params->kernel_size * filter_linesize;
int pad_size = (conv_params->padding_method == VALID) ? (conv_params->kernel_size - 1) / 2 * conv_params->dilation : 0;
- int thread_stride = (height - pad_size * 2) / thread_common_param->thread_num;
- int thread_start = thread_stride * thread_param->thread_index + pad_size;
- int thread_end = (thread_param->thread_index == thread_common_param->thread_num - 1) ? (height - pad_size) : (thread_start + thread_stride);
float *output = thread_common_param->output_data;
- output += (conv_params->output_num) * (width - 2 * pad_size) * (thread_start - pad_size);
+ output += (conv_params->output_num) * (width - 2 * pad_size) * (thread_param->thread_start - pad_size);
av_assert0(channel == conv_params->input_num);
- for (int y = thread_start; y < thread_end; ++y) {
+ for (int y = thread_param->thread_start; y < thread_param->thread_end; ++y) {
for (int x = pad_size; x < width - pad_size; ++x) {
for (int n_filter = 0; n_filter < conv_params->output_num; ++n_filter) {
if (conv_params->has_bias)
@@ -194,24 +188,20 @@ int dnn_execute_layer_conv2d(DnnOperand *operands, const int32_t *input_operand_
? (av_cpu_count() + 1) : (ctx->options.conv2d_threads);
pthread_t *thread_id = av_malloc(thread_num * sizeof(pthread_t));
+ int thread_stride;
thread_param **thread_param = av_malloc(thread_num * sizeof(*thread_param));
- //struct used to pass parameters
thread_common_param thread_common_param;
- thread_common_param.operands = operands;
- thread_common_param.input_operand_indexes = input_operand_indexes;
- thread_common_param.output_operand_index = output_operand_index;
- thread_common_param.parameters = parameters;
- thread_common_param.ctx = ctx;
//alloc memory
const ConvolutionalParams *conv_params = (const ConvolutionalParams *)(parameters);
+ int height = operands[input_operand_indexes[0]].dims[1];
+ int width = operands[input_operand_indexes[0]].dims[2];
int pad_size = (conv_params->padding_method == VALID) ? (conv_params->kernel_size - 1) / 2 * conv_params->dilation : 0;
DnnOperand *output_operand = &operands[output_operand_index];
output_operand->dims[0] = operands[input_operand_indexes[0]].dims[0];
- output_operand->dims[1] = operands[input_operand_indexes[0]].dims[1] - pad_size * 2;
- output_operand->dims[2] = operands[input_operand_indexes[0]].dims[2] - pad_size * 2;
+ output_operand->dims[1] = height - pad_size * 2;
+ output_operand->dims[2] = width - pad_size * 2;
output_operand->dims[3] = conv_params->output_num;
output_operand->data_type = operands[input_operand_indexes[0]].data_type;
output_operand->length = calculate_operand_data_length(output_operand);
@@ -224,16 +214,22 @@ int dnn_execute_layer_conv2d(DnnOperand *operands, const int32_t *input_operand_
av_log(ctx, AV_LOG_ERROR, "Failed to reallocate memory for output\n");
return DNN_ERROR;
thread_common_param.output_data = output_operand->data;
+ thread_common_param.operands = operands;
+ thread_common_param.input_operand_indexes = input_operand_indexes;
+ thread_common_param.output_operand_index = output_operand_index;
+ thread_common_param.parameters = parameters;
+ thread_common_param.ctx = ctx;
- thread_common_param.thread_num = thread_num;
+ thread_stride = (height - pad_size * 2) / thread_num;
//create threads
for (int i = 0; i < thread_num; i++){
thread_param[i] = av_malloc(sizeof(**thread_param));
thread_param[i]->thread_common_param = &thread_common_param;
- thread_param[i]->thread_index = i;
+ thread_param[i]->thread_start = thread_stride * i + pad_size;
+ thread_param[i]->thread_end = (i == thread_num - 1) ? (height - pad_size) : (thread_param[i]->thread_start + thread_stride);
pthread_create(&thread_id[i], NULL, dnn_execute_layer_conv2d_thread, (void *)thread_param[i]);
@@ -249,10 +245,10 @@ int dnn_execute_layer_conv2d(DnnOperand *operands, const int32_t *input_operand_
- thread_common_param.thread_num = 1;
- thread_param[0] = av_malloc(sizeof(thread_param));
+ thread_param[0] = av_malloc(sizeof(**thread_param));
thread_param[0]->thread_common_param = &thread_common_param;
- thread_param[0]->thread_index = 0;
+ thread_param[0]->thread_start = 0;
+ thread_param[0]->thread_end = height - pad_size;
dnn_execute_layer_conv2d_thread((void *)thread_param[0]);
More information about the ffmpeg-devel
mailing list