[FFmpeg-devel] [PATCH V2 1/2] dnn/native: add native support for avg_pool

Thu Jul 30 04:43:23 EEST 2020

> -----Original Message-----
> From: ffmpeg-devel <ffmpeg-devel-bounces at ffmpeg.org> On Behalf Of Ting Fu
> Sent: Wednesday, July 29, 2020 10:11 PM
> To: ffmpeg-devel at ffmpeg.org
> Subject: [FFmpeg-devel] [PATCH V2 1/2] dnn/native: add native support for
> avg_pool
> 
> Not support pooling strides in channel dimension now.
> It can be tested with the model generated with below python script:
> 
> import tensorflow as tf
> import numpy as np
> import imageio
> 
> in_img = imageio.imread('input_odd.jpg') in_img =
> in_img.astype(np.float32)/255.0 in_data = in_img[np.newaxis, :]
> 
> x = tf.placeholder(tf.float32, shape=[1, None, None, 3], name='dnn_in') x_pool
> = tf.nn.avg_pool(x, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME') #please
> alter the params as needed y = tf.identity(x_pool, name='dnn_out')
> 
> sess=tf.Session()
> sess.run(tf.global_variables_initializer())
> 
> graph_def = tf.graph_util.convert_variables_to_constants(sess, sess.graph_def,
> ['dnn_out']) tf.train.write_graph(graph_def, '.', 'image_process.pb',
> as_text=False)
> 
> print("image_process.pb generated, please use \
> path_to_ffmpeg/tools/python/convert.py to generate image_process.model\n")
> 
> output = sess.run(y, feed_dict={x: in_data}) imageio.imsave("out.jpg",
> np.squeeze(output))
> 
> Signed-off-by: Ting Fu <ting.fu at intel.com>
> ---
>  libavfilter/dnn/Makefile                      |   1 +
>  libavfilter/dnn/dnn_backend_native.h          |   2 +
>  .../dnn/dnn_backend_native_layer_avgpool.c    | 147 ++++++++++++++++++
>  .../dnn/dnn_backend_native_layer_avgpool.h    |  35 +++++
>  .../dnn/dnn_backend_native_layer_conv2d.h     |   3 +-
>  libavfilter/dnn/dnn_backend_native_layers.c   |   2 +
>  tools/python/convert_from_tensorflow.py       |  35 ++++-
>  7 files changed, 222 insertions(+), 3 deletions(-)  create mode 100644
> libavfilter/dnn/dnn_backend_native_layer_avgpool.c
>  create mode 100644 libavfilter/dnn/dnn_backend_native_layer_avgpool.h
> 
> diff --git a/libavfilter/dnn/Makefile b/libavfilter/dnn/Makefile index
> d90137ec42..e0957073ee 100644
> --- a/libavfilter/dnn/Makefile
> +++ b/libavfilter/dnn/Makefile
> @@ -1,6 +1,7 @@
>  OBJS-$(CONFIG_DNN)                           += dnn/dnn_interface.o
>  OBJS-$(CONFIG_DNN)                           += dnn/dnn_backend_native.o
>  OBJS-$(CONFIG_DNN)                           += dnn/dnn_backend_native_layers.o
> +OBJS-$(CONFIG_DNN)                           +=
> dnn/dnn_backend_native_layer_avgpool.o
>  OBJS-$(CONFIG_DNN)                           += dnn/dnn_backend_native_layer_pad.o
>  OBJS-$(CONFIG_DNN)                           +=
> dnn/dnn_backend_native_layer_conv2d.o
>  OBJS-$(CONFIG_DNN)                           +=
> dnn/dnn_backend_native_layer_depth2space.o
[...]
> 
> 
> +    def dump_avg_pool_to_file(self, node, f):
> +        assert(node.op == 'AvgPool')
> +        self.layer_number = self.layer_number + 1
> +        self.converted_nodes.add(node.name)
> +        node0 = self.name_node_dict[node.input[0]]
> +        strides = node.attr['strides']
> +        assert(strides.list.i[1]==strides.list.i[2])
> +        assert(strides.list.i[0]==1)
> +        assert(strides.list.i[3]==1)

Since the tensorflow do not support pooling strides in batch dimension, and current do not support pooling in channel dimension,
added two assert here.

> +        strides = strides.list.i[1]
> +        filter_node = node.attr['ksize']
> +        input_name = node.input[0]
> +
> +        assert(filter_node.list.i[0]==1)
> +        assert(filter_node.list.i[3]==1)

Same as above, the tensorflow do not support pooling ksize in both batch dimension and channel dimension.

> +        filter_height = filter_node.list.i[1]
> +        filter_width = filter_node.list.i[2]
> +
> +        in_channels = node0.attr['shape'].shape.dim[3].size
> +        out_channels = in_channels
> +        padding = node.attr['padding'].s.decode("utf-8")
> +        np.array([self.op2code[node.op], strides, self.pool_paddings[padding],
> in_channels, out_channels,
> +                  filter_height],dtype=np.uint32).tofile(f)
> +
> +        input_operand_index = self.add_operand(input_name,
> Operand.IOTYPE_INPUT)
> +        output_operand_index = self.add_operand(node.name,
> Operand.IOTYPE_OUTPUT)
> +        np.array([input_operand_index,
> + output_operand_index],dtype=np.uint32).tofile(f)
> +
> +
>      def dump_layers_to_file(self, f):
>          for node in self.nodes:
>              if node.name in self.converted_nodes:
> @@ -311,6 +342,8 @@ class TFConverter:
> 
>              if node.op == 'Conv2D':
>                  self.dump_simple_conv2d_to_file(node, f)
> +            if node.op == 'AvgPool':
> +                self.dump_avg_pool_to_file(node, f)
>              elif node.op == 'DepthToSpace':
>                  self.dump_depth2space_to_file(node, f)
>              elif node.op == 'MirrorPad':
> --
> 2.17.1
> 
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel at ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
> 
> To unsubscribe, visit link above, or email ffmpeg-devel-request at ffmpeg.org
> with subject "unsubscribe".