[FFmpeg-devel] [PATCH 3/3] dnn: export operand info in python script and load in c code

Tue Aug 27 17:45:35 EEST 2019

hi,

Em ter, 20 de ago de 2019 às 05:54, Guo, Yejun <yejun.guo at intel.com> escreveu:
>
> Signed-off-by: Guo, Yejun <yejun.guo at intel.com>
> ---
>  libavfilter/dnn/dnn_backend_native.c    |  49 +++++++++++---
>  libavfilter/dnn/dnn_backend_native.h    |   2 +-
>  libavfilter/dnn_interface.h             |   2 +-
>  tools/python/convert_from_tensorflow.py | 111 +++++++++++++++++++++++++++++---
>  4 files changed, 142 insertions(+), 22 deletions(-)
>
> diff --git a/libavfilter/dnn/dnn_backend_native.c b/libavfilter/dnn/dnn_backend_native.c
> index 0ba4e44..eeae711 100644
> --- a/libavfilter/dnn/dnn_backend_native.c
> +++ b/libavfilter/dnn/dnn_backend_native.c
> @@ -72,7 +72,6 @@ DNNModel *ff_dnn_load_model_native(const char *model_filename)
>      ConvolutionalParams *conv_params;
>      DepthToSpaceParams *depth_to_space_params;
>      LayerPadParams *pad_params;
> -    int32_t operand_index = 0;
>
>      model = av_malloc(sizeof(DNNModel));
>      if (!model){
> @@ -93,9 +92,10 @@ DNNModel *ff_dnn_load_model_native(const char *model_filename)
>      }
>      model->model = (void *)network;
>
> -    avio_seek(model_file_context, file_size - 4, SEEK_SET);
> +    avio_seek(model_file_context, file_size - 8, SEEK_SET);
>      network->layers_num = (int32_t)avio_rl32(model_file_context);
> -    dnn_size = 4;
> +    network->operands_num = (int32_t)avio_rl32(model_file_context);
> +    dnn_size = 8;
>      avio_seek(model_file_context, 0, SEEK_SET);
>
I think it is worth adding some means to assert the input file is
indeed a dnn file, the code as is may alloc an undefined amout of
memory if the file passed is malformed or corrupted.
Maybe adding a magic number + the file size (or something else) at the
beginning of the file and early skip parsing it if it does not match?
however it may require two passes to generate the file which goes
against your previous patch.

Otherwise I can push it as is, as this behavior was already there
before the patch.

>      network->layers = av_mallocz(network->layers_num * sizeof(Layer));
> @@ -105,11 +105,6 @@ DNNModel *ff_dnn_load_model_native(const char *model_filename)
>          return NULL;
>      }
>
> -    /**
> -     * Operands should be read from model file, the whole change will be huge.
> -     * to make things step by step, we first mock the operands, instead of reading from model file.
> -     */
> -    network->operands_num = network->layers_num + 1;
>      network->operands = av_mallocz(network->operands_num * sizeof(DnnOperand));
>      if (!network->operands){
>          avio_closep(&model_file_context);
> @@ -120,8 +115,6 @@ DNNModel *ff_dnn_load_model_native(const char *model_filename)
>      for (layer = 0; layer < network->layers_num; ++layer){
>          layer_type = (int32_t)avio_rl32(model_file_context);
>          dnn_size += 4;
> -        network->layers[layer].input_operand_indexes[0] = operand_index++;
> -        network->layers[layer].output_operand_index = operand_index;
>          switch (layer_type){
>          case CONV:
>              conv_params = av_malloc(sizeof(ConvolutionalParams));
> @@ -162,6 +155,9 @@ DNNModel *ff_dnn_load_model_native(const char *model_filename)
>              for (i = 0; i < conv_params->output_num; ++i){
>                  conv_params->biases[i] = av_int2float(avio_rl32(model_file_context));
>              }
> +            network->layers[layer].input_operand_indexes[0] = (int32_t)avio_rl32(model_file_context);
> +            network->layers[layer].output_operand_index = (int32_t)avio_rl32(model_file_context);
> +            dnn_size += 8;
>              network->layers[layer].type = CONV;
>              network->layers[layer].params = conv_params;
>              break;
> @@ -174,6 +170,9 @@ DNNModel *ff_dnn_load_model_native(const char *model_filename)
>              }
>              depth_to_space_params->block_size = (int32_t)avio_rl32(model_file_context);
>              dnn_size += 4;
> +            network->layers[layer].input_operand_indexes[0] = (int32_t)avio_rl32(model_file_context);
> +            network->layers[layer].output_operand_index = (int32_t)avio_rl32(model_file_context);
> +            dnn_size += 8;
>              network->layers[layer].type = DEPTH_TO_SPACE;
>              network->layers[layer].params = depth_to_space_params;
>              break;
> @@ -191,6 +190,9 @@ DNNModel *ff_dnn_load_model_native(const char *model_filename)
>                  pad_params->paddings[i][1] = avio_rl32(model_file_context);
>                  dnn_size += 8;
>              }
> +            network->layers[layer].input_operand_indexes[0] = (int32_t)avio_rl32(model_file_context);
> +            network->layers[layer].output_operand_index = (int32_t)avio_rl32(model_file_context);
> +            dnn_size += 8;
>              network->layers[layer].type = MIRROR_PAD;
>              network->layers[layer].params = pad_params;
>              break;
> @@ -201,6 +203,33 @@ DNNModel *ff_dnn_load_model_native(const char *model_filename)
>          }
>      }
>
> +    for (int32_t i = 0; i < network->operands_num; ++i){
> +        DnnOperand *oprd;
> +        int32_t name_len;
> +        int32_t operand_index = (int32_t)avio_rl32(model_file_context);
> +        dnn_size += 4;
> +
> +        oprd = &network->operands[operand_index];
> +        name_len = (int32_t)avio_rl32(model_file_context);
> +        dnn_size += 4;
> +
> +        avio_get_str(model_file_context, name_len, oprd->name, sizeof(oprd->name));
> +        dnn_size += name_len;
> +
> +        oprd->type = (int32_t)avio_rl32(model_file_context);
> +        dnn_size += 4;
> +
> +        oprd->data_type = (int32_t)avio_rl32(model_file_context);
> +        dnn_size += 4;
> +
> +        for (int32_t dim = 0; dim < 4; ++dim) {
> +            oprd->dims[dim] = (int32_t)avio_rl32(model_file_context);
> +            dnn_size += 4;
> +        }
> +
> +        oprd->isNHWC = 1;
> +    }
> +
>      avio_closep(&model_file_context);
>
>      if (dnn_size != file_size){
> diff --git a/libavfilter/dnn/dnn_backend_native.h b/libavfilter/dnn/dnn_backend_native.h
> index d7737ac..172e1e7 100644
> --- a/libavfilter/dnn/dnn_backend_native.h
> +++ b/libavfilter/dnn/dnn_backend_native.h
> @@ -36,7 +36,7 @@ typedef enum {RELU, TANH, SIGMOID, NONE, LEAKY_RELU} DNNActivationFunc;
>
>  typedef enum {VALID, SAME, SAME_CLAMP_TO_EDGE} DNNConvPaddingParam;
>
> -typedef enum {DOT_INPUT, DOT_INTERMEDIATE, DOT_OUTPUT} DNNOperandType;
> +typedef enum {DOT_INPUT = 1, DOT_OUTPUT = 2, DOT_INTERMEDIATE = DOT_INPUT | DOT_INPUT} DNNOperandType;
>
>  typedef struct Layer{
>      DNNLayerType type;
> diff --git a/libavfilter/dnn_interface.h b/libavfilter/dnn_interface.h
> index c24df0e..057005f 100644
> --- a/libavfilter/dnn_interface.h
> +++ b/libavfilter/dnn_interface.h
> @@ -32,7 +32,7 @@ typedef enum {DNN_SUCCESS, DNN_ERROR} DNNReturnType;
>
>  typedef enum {DNN_NATIVE, DNN_TF} DNNBackendType;
>
> -typedef enum {DNN_FLOAT, DNN_UINT8} DNNDataType;
> +typedef enum {DNN_FLOAT = 1, DNN_UINT8 = 4} DNNDataType;
>
>  typedef struct DNNInputData{
>      void *data;
> diff --git a/tools/python/convert_from_tensorflow.py b/tools/python/convert_from_tensorflow.py
> index cbc76a9..bab11a5 100644
> --- a/tools/python/convert_from_tensorflow.py
> +++ b/tools/python/convert_from_tensorflow.py
> @@ -23,6 +23,37 @@ import sys, struct
>
>  __all__ = ['convert_from_tensorflow']
>
> +class Operand(object):
> +    IOTYPE_INPUT = 1
> +    IOTYPE_OUTPUT = 2
> +    IOTYPE_INTERMEDIATE = IOTYPE_INPUT | IOTYPE_OUTPUT
> +    DTYPE_FLOAT = 1
> +    DTYPE_UINT8 = 4
> +    index = 0
> +    def __init__(self, name, dtype, dims):
> +        self.name = name
> +        self.dtype = dtype
> +        self.dims = dims
> +        self.iotype = 0
> +        self.used_count = 0
> +        self.index = Operand.index
> +        Operand.index = Operand.index + 1
> +        self.iotype2str = {Operand.IOTYPE_INPUT: 'in', Operand.IOTYPE_OUTPUT: 'out', Operand.IOTYPE_INTERMEDIATE: 'inout'}
> +        self.dtype2str = {Operand.DTYPE_FLOAT: 'DT_FLOAT', Operand.DTYPE_UINT8: 'DT_UINT8'}
> +
> +    def add_iotype(self, iotype):
> +        self.iotype = self.iotype | iotype
> +        if iotype == Operand.IOTYPE_INPUT:
> +            self.used_count = self.used_count + 1
> +
> +    def __str__(self):
> +        return "{}: (name: {}, iotype: {}, dtype: {}, dims: ({},{},{},{}) used_count: {})".format(self.index,
> +                            self.name, self.iotype2str[self.iotype], self.dtype2str[self.dtype],
> +                            self.dims[0], self.dims[1], self.dims[2], self.dims[3], self.used_count)
> +
> +    def __lt__(self, other):
> +        return self.index < other.index
> +
>  class TFConverter:
>      def __init__(self, graph_def, nodes, outfile, dump4tb):
>          self.graph_def = graph_def
> @@ -37,8 +68,28 @@ class TFConverter:
>          self.conv_paddings = {'VALID':0, 'SAME':1}
>          self.converted_nodes = set()
>          self.conv2d_scope_names = set()
> +        self.conv2d_scopename_inputname_dict = {}
>          self.op2code = {'Conv2D':1, 'DepthToSpace':2, 'MirrorPad':3}
>          self.mirrorpad_mode = {'CONSTANT':0, 'REFLECT':1, 'SYMMETRIC':2}
> +        self.name_operand_dict = {}
> +
> +
> +    def add_operand(self, name, type):
> +        node = self.name_node_dict[name]
> +        if name not in self.name_operand_dict:
> +            dtype = node.attr['dtype'].type
> +            if dtype == 0:
> +                dtype = node.attr['T'].type
> +            dims = [-1,-1,-1,-1]
> +            if 'shape' in node.attr:
> +                dims[0] = node.attr['shape'].shape.dim[0].size
> +                dims[1] = node.attr['shape'].shape.dim[1].size
> +                dims[2] = node.attr['shape'].shape.dim[2].size
> +                dims[3] = node.attr['shape'].shape.dim[3].size
> +            operand = Operand(name, dtype, dims)
> +            self.name_operand_dict[name] = operand;
> +        self.name_operand_dict[name].add_iotype(type)
> +        return self.name_operand_dict[name].index
>
>
>      def dump_for_tensorboard(self):
> @@ -60,11 +111,10 @@ class TFConverter:
>          # the BiasAdd name is possible be changed into the output name,
>          # if activation is None, and BiasAdd.next is the last op which is Identity
>          if conv2d_scope_name + '/BiasAdd' in self.edges:
> -            activation = self.edges[conv2d_scope_name + '/BiasAdd'][0]
> -            activation = activation.op
> +            anode = self.edges[conv2d_scope_name + '/BiasAdd'][0]
>          else:
> -            activation = 'None'
> -        return knode, bnode, dnode, activation
> +            anode = None
> +        return knode, bnode, dnode, anode
>
>
>      def dump_conv2d_to_file(self, node, f):
> @@ -73,16 +123,21 @@ class TFConverter:
>          self.converted_nodes.add(node.name)
>
>          scope_name = TFConverter.get_scope_name(node.name)
> -        #knode for kernel, bnode for bias, dnode for dilation
> -        knode, bnode, dnode, activation = self.get_conv2d_params(scope_name)
> +        #knode for kernel, bnode for bias, dnode for dilation, anode for activation
> +        knode, bnode, dnode, anode = self.get_conv2d_params(scope_name)
>
>          if dnode is not None:
>              dilation = struct.unpack('i', dnode.attr['value'].tensor.tensor_content[0:4])[0]
>          else:
>              dilation = 1
>
> +        if anode is not None:
> +            activation = anode.op
> +        else:
> +            activation = 'None'
> +
>          padding = node.attr['padding'].s.decode("utf-8")
> -        # conv2d with dilation > 1 generates tens of nodes, not easy to parse them, so use tricky.
> +        # conv2d with dilation > 1 generates tens of nodes, not easy to parse them, so use this tricky method.
>          if dilation > 1 and scope_name + '/stack' in self.name_node_dict:
>              if self.name_node_dict[scope_name + '/stack'].op == "Const":
>                  padding = 'SAME'
> @@ -107,6 +162,15 @@ class TFConverter:
>              bias = btensor.tensor_content
>          f.write(bias)
>
> +        input_name = self.conv2d_scopename_inputname_dict[scope_name]
> +        input_operand_index = self.add_operand(input_name, Operand.IOTYPE_INPUT)
> +
> +        if anode is not None:
> +            output_operand_index = self.add_operand(anode.name, Operand.IOTYPE_OUTPUT)
> +        else:
> +            output_operand_index = self.add_operand(self.edges[bnode.name][0].name, Operand.IOTYPE_OUTPUT)
> +        np.array([input_operand_index, output_operand_index], dtype=np.uint32).tofile(f)
> +
>
>      def dump_depth2space_to_file(self, node, f):
>          assert(node.op == 'DepthToSpace')
> @@ -114,6 +178,9 @@ class TFConverter:
>          block_size = node.attr['block_size'].i
>          np.array([self.op2code[node.op], block_size], dtype=np.uint32).tofile(f)
>          self.converted_nodes.add(node.name)
> +        input_operand_index = self.add_operand(node.input[0], Operand.IOTYPE_INPUT)
> +        output_operand_index = self.add_operand(node.name, Operand.IOTYPE_OUTPUT)
> +        np.array([input_operand_index, output_operand_index], dtype=np.uint32).tofile(f)
>
>
>      def dump_mirrorpad_to_file(self, node, f):
> @@ -127,6 +194,9 @@ class TFConverter:
>          paddings = pnode.attr['value'].tensor.tensor_content
>          f.write(paddings)
>          self.converted_nodes.add(node.name)
> +        input_operand_index = self.add_operand(node.input[0], Operand.IOTYPE_INPUT)
> +        output_operand_index = self.add_operand(node.name, Operand.IOTYPE_OUTPUT)
> +        np.array([input_operand_index, output_operand_index], dtype=np.uint32).tofile(f)
>
>
>      def dump_layers_to_file(self, f):
> @@ -147,10 +217,21 @@ class TFConverter:
>                  self.dump_mirrorpad_to_file(node, f)
>
>
> +    def dump_operands_to_file(self, f):
> +            operands = sorted(self.name_operand_dict.values())
> +            for operand in operands:
> +                #print('{}'.format(operand))
> +                np.array([operand.index, len(operand.name)], dtype=np.uint32).tofile(f)
> +                f.write(operand.name.encode('utf-8'))
> +                np.array([operand.iotype, operand.dtype], dtype=np.uint32).tofile(f)
> +                np.array([operand.dims[0], operand.dims[1], operand.dims[2], operand.dims[3]], dtype=np.uint32).tofile(f)
> +
> +
>      def dump_to_file(self):
>          with open(self.outfile, 'wb') as f:
>              self.dump_layers_to_file(f)
> -            np.array([self.layer_number], dtype=np.uint32).tofile(f)
> +            self.dump_operands_to_file(f)
> +            np.array([self.layer_number, len(self.name_operand_dict)], dtype=np.uint32).tofile(f)
>
>
>      def generate_name_node_dict(self):
> @@ -212,19 +293,29 @@ class TFConverter:
>          return name[0:index]
>
>
> -    def generate_conv2d_scope_names(self):
> +    def generate_conv2d_scope_info(self):
> +        # conv2d is a sub block in graph, get the scope name
>          for node in self.nodes:
>              if node.op == 'Conv2D':
>                  scope = TFConverter.get_scope_name(node.name)
>                  self.conv2d_scope_names.add(scope)
>
> +        # get the input name to the conv2d sub block
> +        for node in self.nodes:
> +            scope = TFConverter.get_scope_name(node.name)
> +            if scope in self.conv2d_scope_names:
> +                if node.op == 'Conv2D' or node.op == 'Shape':
> +                    for inp in node.input:
> +                        if TFConverter.get_scope_name(inp) != scope:
> +                            self.conv2d_scopename_inputname_dict[scope] = inp
> +
>
>      def run(self):
>          self.generate_name_node_dict()
>          self.generate_output_names()
>          self.remove_identity()
>          self.generate_edges()
> -        self.generate_conv2d_scope_names()
> +        self.generate_conv2d_scope_info()
>
>          if self.dump4tb:
>              self.dump_for_tensorboard()
> --
> 2.7.4
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel at ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request at ffmpeg.org with subject "unsubscribe".