[FFmpeg-devel] [PATCH V2 1/4] dnn: add tf.nn.conv2d support for native model

Wed Oct 30 16:48:42 EET 2019

Em seg, 21 de out de 2019 às 09:44, Guo, Yejun <yejun.guo at intel.com>
escreveu:

> Unlike other tf.*.conv2d layers, tf.nn.conv2d does not create many
> nodes (within a scope) in the graph, it just acts like other layers.
> tf.nn.conv2d only creates one node in the graph, and no internal
> nodes such as 'kernel' are created.
>
> The format of native model file is also changed, a flag named
> has_bias is added, so change the version number.
>
> Signed-off-by: Guo, Yejun <yejun.guo at intel.com>
> ---
>  libavfilter/dnn/dnn_backend_native.c              |  2 +-
>  libavfilter/dnn/dnn_backend_native_layer_conv2d.c | 37 +++++++++++-----
>  libavfilter/dnn/dnn_backend_native_layer_conv2d.h |  1 +
>  tests/dnn/dnn-layer-conv2d-test.c                 |  2 +
>  tools/python/convert_from_tensorflow.py           | 54
> ++++++++++++++++++++---
>  tools/python/convert_header.py                    |  4 +-
>  6 files changed, 82 insertions(+), 18 deletions(-)
>
> diff --git a/libavfilter/dnn/dnn_backend_native.c
> b/libavfilter/dnn/dnn_backend_native.c
> index 06b010d..ff280b5 100644
> --- a/libavfilter/dnn/dnn_backend_native.c
> +++ b/libavfilter/dnn/dnn_backend_native.c
> @@ -98,7 +98,7 @@ DNNModel *ff_dnn_load_model_native(const char
> *model_filename)
>      char header_expected[] = "FFMPEGDNNNATIVE";
>      char *buf;
>      size_t size;
> -    int version, header_size, major_version_expected = 0;
> +    int version, header_size, major_version_expected = 1;
>      ConvolutionalNetwork *network = NULL;
>      AVIOContext *model_file_context;
>      int file_size, dnn_size, parsed_size;
> diff --git a/libavfilter/dnn/dnn_backend_native_layer_conv2d.c
> b/libavfilter/dnn/dnn_backend_native_layer_conv2d.c
> index 0de8902..6ec0fa7 100644
> --- a/libavfilter/dnn/dnn_backend_native_layer_conv2d.c
> +++ b/libavfilter/dnn/dnn_backend_native_layer_conv2d.c
> @@ -38,27 +38,41 @@ int dnn_load_layer_conv2d(Layer *layer, AVIOContext
> *model_file_context, int fil
>      conv_params->input_num = (int32_t)avio_rl32(model_file_context);
>      conv_params->output_num = (int32_t)avio_rl32(model_file_context);
>      conv_params->kernel_size = (int32_t)avio_rl32(model_file_context);
> +    conv_params->has_bias = (int32_t)avio_rl32(model_file_context);
> +    dnn_size += 28;
> +
>      kernel_size = conv_params->input_num * conv_params->output_num *
> -                  conv_params->kernel_size * conv_params->kernel_size;
> -    dnn_size += 24 + (kernel_size + conv_params->output_num << 2);
> +                      conv_params->kernel_size * conv_params->kernel_size;
> +    dnn_size += kernel_size * 4;
> +    if (conv_params->has_bias)
> +        dnn_size += conv_params->output_num * 4;
> +
>      if (dnn_size > file_size || conv_params->input_num <= 0 ||
>          conv_params->output_num <= 0 || conv_params->kernel_size <= 0){
>          av_freep(&conv_params);
>          return 0;
>      }
> +
>      conv_params->kernel = av_malloc(kernel_size * sizeof(float));
> -    conv_params->biases = av_malloc(conv_params->output_num *
> sizeof(float));
> -    if (!conv_params->kernel || !conv_params->biases){
> -        av_freep(&conv_params->kernel);
> -        av_freep(&conv_params->biases);
> +    if (!conv_params->kernel) {
>          av_freep(&conv_params);
>          return 0;
>      }
> -    for (int i = 0; i < kernel_size; ++i){
> +    for (int i = 0; i < kernel_size; ++i) {
>          conv_params->kernel[i] =
> av_int2float(avio_rl32(model_file_context));
>      }
> -    for (int i = 0; i < conv_params->output_num; ++i){
> -        conv_params->biases[i] =
> av_int2float(avio_rl32(model_file_context));
> +
> +    conv_params->biases = NULL;
> +    if (conv_params->has_bias) {
> +        conv_params->biases = av_malloc(conv_params->output_num *
> sizeof(float));
> +        if (!conv_params->biases){
> +            av_freep(&conv_params->kernel);
> +            av_freep(&conv_params);
> +            return 0;
> +        }
> +        for (int i = 0; i < conv_params->output_num; ++i){
> +            conv_params->biases[i] =
> av_int2float(avio_rl32(model_file_context));
> +        }
>      }
>
>      layer->params = conv_params;
> @@ -103,7 +117,10 @@ int dnn_execute_layer_conv2d(DnnOperand *operands,
> const int32_t *input_operand_
>      for (int y = pad_size; y < height - pad_size; ++y) {
>          for (int x = pad_size; x < width - pad_size; ++x) {
>              for (int n_filter = 0; n_filter < conv_params->output_num;
> ++n_filter) {
> -                output[n_filter] = conv_params->biases[n_filter];
> +                if (conv_params->has_bias)
> +                    output[n_filter] = conv_params->biases[n_filter];
> +                else
> +                    output[n_filter] = 0.f;
>
>                  for (int ch = 0; ch < conv_params->input_num; ++ch) {
>                      for (int kernel_y = 0; kernel_y <
> conv_params->kernel_size; ++kernel_y) {
> diff --git a/libavfilter/dnn/dnn_backend_native_layer_conv2d.h
> b/libavfilter/dnn/dnn_backend_native_layer_conv2d.h
> index db90b2b..bf87264 100644
> --- a/libavfilter/dnn/dnn_backend_native_layer_conv2d.h
> +++ b/libavfilter/dnn/dnn_backend_native_layer_conv2d.h
> @@ -31,6 +31,7 @@ typedef struct ConvolutionalParams{
>      DNNActivationFunc activation;
>      DNNConvPaddingParam padding_method;
>      int32_t dilation;
> +    int32_t has_bias;
>      float *kernel;
>      float *biases;
>  } ConvolutionalParams;
> diff --git a/tests/dnn/dnn-layer-conv2d-test.c
> b/tests/dnn/dnn-layer-conv2d-test.c
> index 9d13da3..2da01e5 100644
> --- a/tests/dnn/dnn-layer-conv2d-test.c
> +++ b/tests/dnn/dnn-layer-conv2d-test.c
> @@ -97,6 +97,7 @@ static int test_with_same_dilate(void)
>      float bias[2] = { -1.6574852, -0.72915393 };
>
>      params.activation = TANH;
> +    params.has_bias = 1;
>      params.biases = bias;
>      params.dilation = 2;
>      params.input_num = 3;
> @@ -196,6 +197,7 @@ static int test_with_valid(void)
>      float bias[2] = { -0.4773722, -0.19620377 };
>
>      params.activation = TANH;
> +    params.has_bias = 1;
>      params.biases = bias;
>      params.dilation = 1;
>      params.input_num = 3;
> diff --git a/tools/python/convert_from_tensorflow.py
> b/tools/python/convert_from_tensorflow.py
> index a663b34..605158a 100644
> --- a/tools/python/convert_from_tensorflow.py
> +++ b/tools/python/convert_from_tensorflow.py
> @@ -118,7 +118,7 @@ class TFConverter:
>          return knode, bnode, dnode, anode
>
>
> -    def dump_conv2d_to_file(self, node, f):
> +    def dump_complex_conv2d_to_file(self, node, f):
>          assert(node.op == 'Conv2D')
>          self.layer_number = self.layer_number + 1
>          self.converted_nodes.add(node.name)
> @@ -153,7 +153,8 @@ class TFConverter:
>          kernel = kernel.reshape(filter_height, filter_width, in_channels,
> out_channels)
>          kernel = np.transpose(kernel, [3, 0, 1, 2])
>
> -        np.array([self.op2code[node.op], dilation, padding,
> self.conv_activations[activation], in_channels, out_channels,
> filter_height], dtype=np.uint32).tofile(f)
> +        has_bias = 1
> +        np.array([self.op2code[node.op], dilation, padding,
> self.conv_activations[activation], in_channels, out_channels,
> filter_height, has_bias], dtype=np.uint32).tofile(f)
>          kernel.tofile(f)
>
>          btensor = bnode.attr['value'].tensor
> @@ -173,6 +174,41 @@ class TFConverter:
>          np.array([input_operand_index, output_operand_index],
> dtype=np.uint32).tofile(f)
>
>
> +    def dump_simple_conv2d_to_file(self, node, f):
> +        assert(node.op == 'Conv2D')
> +        self.layer_number = self.layer_number + 1
> +        self.converted_nodes.add(node.name)
> +
> +        node0 = self.name_node_dict[node.input[0]]
> +        node1 = self.name_node_dict[node.input[1]]
> +        if node0.op == 'Const':
> +            knode = node0
> +            input_name = node.input[1]
> +        else:
> +            knode = node1
> +            input_name = node.input[0]
> +
> +        ktensor = knode.attr['value'].tensor
> +        filter_height = ktensor.tensor_shape.dim[0].size
> +        filter_width = ktensor.tensor_shape.dim[1].size
> +        in_channels = ktensor.tensor_shape.dim[2].size
> +        out_channels = ktensor.tensor_shape.dim[3].size
> +        kernel = np.frombuffer(ktensor.tensor_content, dtype=np.float32)
> +        kernel = kernel.reshape(filter_height, filter_width, in_channels,
> out_channels)
> +        kernel = np.transpose(kernel, [3, 0, 1, 2])
> +
> +        has_bias = 0
> +        dilation = 1
> +        padding = node.attr['padding'].s.decode("utf-8")
> +        np.array([self.op2code[node.op], dilation,
> self.conv_paddings[padding], self.conv_activations['None'],
> +                  in_channels, out_channels, filter_height, has_bias],
> dtype=np.uint32).tofile(f)
> +        kernel.tofile(f)
> +
> +        input_operand_index = self.add_operand(input_name,
> Operand.IOTYPE_INPUT)
> +        output_operand_index = self.add_operand(node.name,
> Operand.IOTYPE_OUTPUT)
> +        np.array([input_operand_index, output_operand_index],
> dtype=np.uint32).tofile(f)
> +
> +
>      def dump_depth2space_to_file(self, node, f):
>          assert(node.op == 'DepthToSpace')
>          self.layer_number = self.layer_number + 1
> @@ -222,10 +258,12 @@ class TFConverter:
>              scope_name = TFConverter.get_scope_name(node.name)
>              if scope_name in self.conv2d_scope_names:
>                  if node.op == 'Conv2D':
> -                    self.dump_conv2d_to_file(node, f)
> +                    self.dump_complex_conv2d_to_file(node, f)
>                  continue
>
> -            if node.op == 'DepthToSpace':
> +            if node.op == 'Conv2D':
> +                self.dump_simple_conv2d_to_file(node, f)
> +            elif node.op == 'DepthToSpace':
>                  self.dump_depth2space_to_file(node, f)
>              elif node.op == 'MirrorPad':
>                  self.dump_mirrorpad_to_file(node, f)
> @@ -312,10 +350,16 @@ class TFConverter:
>
>
>      def generate_conv2d_scope_info(self):
> -        # conv2d is a sub block in graph, get the scope name
> +        # mostly, conv2d is a sub block in graph, get the scope name
>          for node in self.nodes:
>              if node.op == 'Conv2D':
>                  scope = TFConverter.get_scope_name(node.name)
> +                # for the case tf.nn.conv2d is called directly
> +                if scope == '':
> +                    continue
> +                # for the case tf.nn.conv2d is called within a scope
> +                if scope + '/kernel' not in self.name_node_dict:
> +                    continue
>                  self.conv2d_scope_names.add(scope)
>
>          # get the input name to the conv2d sub block
> diff --git a/tools/python/convert_header.py
> b/tools/python/convert_header.py
> index 3c2acd5..67672b2 100644
> --- a/tools/python/convert_header.py
> +++ b/tools/python/convert_header.py
> @@ -20,7 +20,7 @@
>  str = 'FFMPEGDNNNATIVE'
>
>  # increase major and reset minor when we have to re-convert the model file
> -major = 0
> +major = 1
>
>  # increase minor when we don't have to re-convert the model file
> -minor = 2
> +minor = 0
> --
> 2.7.4
>

LGTM
Should push soon.

>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel at ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request at ffmpeg.org with subject "unsubscribe".