[FFmpeg-devel] [PATCH 2/2] avfilter/dnn_processing: Add TensorRT backend

Xiaowei Wang xiaoweiw at nvidia.com
Sun Jul 25 14:58:43 EEST 2021


The backend can be called as:
-vf dnn_processing=dnn_backend=tensorrt:model="model":input=:output=

As TensorRT provides C++ API rather than C, the TensorRT implementation is
separated into a wrapper.
The wrapper is placed in https://github.com/DutchPiPi/nv-tensorrt-wrapper
Please build & install the wrapper before compiling ffmpeg.
Please see https://github.com/DutchPiPi/FFmpeg-trt-backend-test for how to
configure ffmpeg and generate a TensorRT engine for tests.

Signed-off-by: Xiaowei Wang <xiaoweiw at nvidia.com>
---
 libavfilter/dnn/Makefile               |   2 +-
 libavfilter/dnn/dnn_backend_tensorrt.c |  97 +++-
 libavfilter/dnn/dnn_backend_tensorrt.h |  40 +-
 libavfilter/dnn/dnn_io_proc_trt.cu     |  55 --
 libavfilter/dnn/trt_class_wrapper.cpp  | 731 -------------------------
 libavfilter/dnn/trt_class_wrapper.h    |  49 --
 6 files changed, 109 insertions(+), 865 deletions(-)
 delete mode 100644 libavfilter/dnn/dnn_io_proc_trt.cu
 delete mode 100644 libavfilter/dnn/trt_class_wrapper.cpp
 delete mode 100644 libavfilter/dnn/trt_class_wrapper.h

diff --git a/libavfilter/dnn/Makefile b/libavfilter/dnn/Makefile
index f9ea7ca386..4661d3b2cb 100644
--- a/libavfilter/dnn/Makefile
+++ b/libavfilter/dnn/Makefile
@@ -16,6 +16,6 @@ OBJS-$(CONFIG_DNN)                           += dnn/dnn_backend_native_layer_mat
 
 DNN-OBJS-$(CONFIG_LIBTENSORFLOW)             += dnn/dnn_backend_tf.o
 DNN-OBJS-$(CONFIG_LIBOPENVINO)               += dnn/dnn_backend_openvino.o
-DNN-OBJS-$(CONFIG_LIBTENSORRT)               += dnn/dnn_backend_tensorrt.o dnn/trt_class_wrapper.o dnn/dnn_io_proc_trt.ptx.o
+DNN-OBJS-$(CONFIG_LIBTENSORRT)               += dnn/dnn_backend_tensorrt.o
 
 OBJS-$(CONFIG_DNN)                           += $(DNN-OBJS-yes)
diff --git a/libavfilter/dnn/dnn_backend_tensorrt.c b/libavfilter/dnn/dnn_backend_tensorrt.c
index b45b770a77..e50ebc6c99 100644
--- a/libavfilter/dnn/dnn_backend_tensorrt.c
+++ b/libavfilter/dnn/dnn_backend_tensorrt.c
@@ -25,45 +25,119 @@
  * DNN TensorRT backend implementation.
  */
 
-#include "trt_class_wrapper.h"
 #include "dnn_backend_tensorrt.h"
 
-#include "libavutil/mem.h"
 #include "libavformat/avio.h"
+#include "libavutil/mem.h"
 #include "libavutil/avassert.h"
 #include "libavutil/opt.h"
 #include "libavutil/avstring.h"
+#include "libavutil/buffer.h"
+#include "libavutil/pixfmt.h"
+#include "libavutil/pixdesc.h"
+
 #include "dnn_io_proc.h"
 #include "../internal.h"
-#include "libavutil/buffer.h"
+#include "trt_class_wrapper.h"
+
+#include <stdio.h>
+#include <dlfcn.h>
+#include <libavutil/log.h>
 #include <stdint.h>
 
 #define OFFSET(x) offsetof(TRTContext, x)
 #define FLAGS AV_OPT_FLAG_FILTERING_PARAM
 static const AVOption dnn_tensorrt_options[] = {
-    { "device", "index of the GPU to run model", OFFSET(options.device), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, FLAGS },
+    { "device", "index of the GPU to run model", OFFSET(options.device),    AV_OPT_TYPE_INT,    { .i64 = 0 }, 0, INT_MAX, FLAGS },
+    { "plugin", "path to the plugin so",         OFFSET(options.plugin_so), AV_OPT_TYPE_STRING, { .str = NULL}, 0, 0,     FLAGS },
     { NULL }
 };
 AVFILTER_DEFINE_CLASS(dnn_tensorrt);
 
-DNNModel *ff_dnn_load_model_trt(const char *model_filename,DNNFunctionType func_type, 
+static TRTWrapper *wrapper = NULL;
+
+static int load_trt_backend_lib(TRTWrapper *w, const char *so_path, int mode)
+{
+    w->so_handle = dlopen("libnvtensorrt.so", mode);
+    if (!w->so_handle)
+    {
+        return AVERROR(EIO);
+    }
+
+    w->load_model_func = (tloadModelTrt*)dlsym(w->so_handle, "load_model_trt");
+    w->execute_model_func = (texecuteModelTrt*)dlsym(w->so_handle, "execute_model_trt");
+    w->free_model_func = (tfreeModelTrt*)dlsym(w->so_handle, "free_model_trt");
+    if (!w->load_model_func || !w->execute_model_func || !w->free_model_func)
+    {
+        return AVERROR(EIO);
+    }
+
+    return 0;
+}
+
+DNNModel *ff_dnn_load_model_trt(const char *model_filename,DNNFunctionType func_type,
                                 const char *options, AVFilterContext *filter_ctx)
 {
+    char id_buf[64];
+    AVBufferRef *device_ref = NULL;
+    TRTContext *ctx = (TRTContext*)av_mallocz(sizeof(TRTContext));
+
+    int ret = 0;
+
     DNNModel *model = NULL;
     model = (DNNModel*)av_mallocz(sizeof(DNNModel));
     if (!model){
         return NULL;
     }
+    wrapper = av_mallocz(sizeof(TRTWrapper));
+    wrapper->ctx = ctx;
+    if (load_trt_backend_lib(wrapper, "libnvtensorrt.so", RTLD_LAZY) != 0)
+    {
+        av_log(ctx, AV_LOG_ERROR, "Cannot load wrapper functions. Please check if libnvtensorrt.so is installed\n");
+        return NULL;
+    }
+    ctx->av_class = &dnn_tensorrt_class;
+    av_opt_set_defaults(ctx);
+    if (av_opt_set_from_string(ctx, options, NULL, "=", "&") < 0)
+    {
+        av_log(ctx, AV_LOG_ERROR, "Failed to parse options \"%s\"\n", options);
+        return NULL;
+    }
+    snprintf(id_buf, sizeof(id_buf), "%d", ctx->options.device);
+
+    if (ctx->options.plugin_so)
+    {
+        if (dlopen(ctx->options.plugin_so, RTLD_LAZY))
+        {
+            av_log(ctx, AV_LOG_INFO, "Loaded plugin library\n");
+        }
+        else
+        {
+            av_log(ctx, AV_LOG_ERROR, "Error loading plugin library\n");
+            return NULL;
+        }
+    }
 
-    trt_load_model(model, model_filename, &dnn_tensorrt_class, options);
+    av_log(ctx, AV_LOG_INFO, "Load trt engine\n");
+
+    ret = wrapper->load_model_func(model, ctx, model_filename);
+
+    ctx->hwdevice = device_ref;
+    model->options = options;
 
     return model;
 }
 
 DNNReturnType ff_dnn_execute_model_trt(const DNNModel *model, DNNExecBaseParams *exec_params)
 {
-    execute_model_trt(model, exec_params->input_name, exec_params->in_frame, 
-                    exec_params->output_names, exec_params->nb_output, exec_params->out_frame);
+    AVFrame *in_frame = exec_params->in_frame;
+    AVFrame *out_frame = exec_params->out_frame;
+    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get((enum AVPixelFormat)in_frame->format);
+    int packed = (desc->flags & AV_PIX_FMT_FLAG_PLANAR) ? 0 : 1;
+
+    wrapper->execute_model_func(model, in_frame->data, in_frame->linesize, in_frame->width, in_frame->height,
+                                out_frame->data, out_frame->linesize, out_frame->width, out_frame->height,
+                                packed, 0);
     return DNN_SUCCESS;
 }
 
@@ -71,7 +145,10 @@ void ff_dnn_free_model_trt(DNNModel **model)
 {
     if (*model)
     {
-        free_model_trt(*model);
-        av_freep(model);
+        wrapper->free_model_func(*model);
+        dlclose(wrapper->so_handle);
+
+        av_freep(&wrapper->ctx);
+        av_freep(&wrapper);
     }
 }
\ No newline at end of file
diff --git a/libavfilter/dnn/dnn_backend_tensorrt.h b/libavfilter/dnn/dnn_backend_tensorrt.h
index d700cb247f..02e26ce032 100644
--- a/libavfilter/dnn/dnn_backend_tensorrt.h
+++ b/libavfilter/dnn/dnn_backend_tensorrt.h
@@ -37,28 +37,30 @@ extern "C"
     #include "libavutil/hwcontext.h"
     #include "libavutil/hwcontext_cuda_internal.h"
 
-    typedef struct TRTOptions{
-        int device;
-    } TRTOptions;
+    // typedef struct TRTOptions{
+    //     int device;
+    // } TRTOptions;
 
-    typedef struct TRTContext{
-        const AVClass *av_class;
-        TRTOptions options;
-        AVBufferRef *hwdevice;
-        // Host memory pointer to input/output image data
-        void *host_in, *host_out;
-        // Device memory pointer to the fp32 CHW input/output of the model
-        // The device memory is only allocated once and reused during inference
-        // Multiple input/output is not supported
-        CUdeviceptr trt_in, trt_out;
-        // Device memory pointer to 8-bit image data
-        CUdeviceptr frame_in, frame_out;
+    // typedef struct TRTContext{
+    //     const AVClass *av_class;
+    //     TRTOptions options;
+    //     AVBufferRef *hwdevice;
+    //     // Host memory pointer to input/output image data
+    //     void *host_in, *host_out;
+    //     // Device memory pointer to the fp32 CHW input/output of the model
+    //     // The device memory is only allocated once and reused during inference
+    //     // Multiple input/output is not supported
+    //     CUdeviceptr trt_in, trt_out;
+    //     // Device memory pointer to 8-bit image data
+    //     CUdeviceptr frame_in, frame_out;
 
-        CUmodule cu_module;
-        CUfunction cu_func_frame_to_dnn, cu_func_dnn_to_frame;
+    //     CUmodule cu_module;
+    //     CUfunction cu_func_frame_to_dnn, cu_func_dnn_to_frame;
 
-        int channels;
-    } TRTContext;
+    //     CUcontext cuda_ctx;
+
+    //     int channels, packed;
+    // } TRTContext;
     
     DNNModel *ff_dnn_load_model_trt(const char *model_filename,DNNFunctionType func_type, 
                                     const char *options, AVFilterContext *filter_ctx);
diff --git a/libavfilter/dnn/dnn_io_proc_trt.cu b/libavfilter/dnn/dnn_io_proc_trt.cu
deleted file mode 100644
index 030cfd2f60..0000000000
--- a/libavfilter/dnn/dnn_io_proc_trt.cu
+++ /dev/null
@@ -1,55 +0,0 @@
-#include <bits/stdint-uintn.h>
-extern "C" {
-
-__global__ void frame_to_dnn(uint8_t *src, int src_linesize, float *dst, int dst_linesize, 
-                             int width, int height, int unpack_rgb)
-{
-    int x = blockIdx.x * blockDim.x + threadIdx.x;
-    int y = blockIdx.y * blockDim.y + threadIdx.y;
-
-    if (x >= width || y >= height)
-        return;
-    
-    if (unpack_rgb)
-    {
-        uchar3 rgb = *((uchar3 *)(src + y * src_linesize) + x);
-        dst[y * dst_linesize + x] = (float)rgb.x;
-        dst[y * dst_linesize + x + dst_linesize * height] = (float)rgb.y;
-        dst[y * dst_linesize + x + 2 * dst_linesize * height] = (float)rgb.z;
-    }
-    else
-    {
-        dst[y * dst_linesize + x] = (float)src[y * src_linesize + x];
-    }
-}
-
-__device__ static float clamp(float x, float lower, float upper) {
-    return x < lower ? lower : (x > upper ? upper : x);
-}
-
-__global__ void dnn_to_frame(float *src, int src_linesize, uint8_t *dst, int dst_linesize, 
-                            int width, int height, int pack_rgb)
-{
-    int x = blockIdx.x * blockDim.x + threadIdx.x;
-    int y = blockIdx.y * blockDim.y + threadIdx.y;
-
-    if (x >= width || y >= height)
-        return;
-
-    if (pack_rgb)
-    {
-        uint8_t r = (uint8_t)clamp(src[y * src_linesize + x], .0f, 255.0f);
-        uint8_t g = (uint8_t)clamp(src[y * src_linesize + x + src_linesize * height], .0f, 255.0f);
-        uint8_t b = (uint8_t)clamp(src[y * src_linesize + x + 2 * src_linesize * height], .0f, 255.0f);
-
-        uchar3 rgb = make_uchar3(r, g, b);
-
-        *((uchar3*)(dst + y * dst_linesize) + x) = rgb;
-    }
-    else
-    {
-        dst[y * dst_linesize + x] = (uint8_t)clamp(src[y * src_linesize + x], .0f, 255.0f);
-    }
-}
-
-}
\ No newline at end of file
diff --git a/libavfilter/dnn/trt_class_wrapper.cpp b/libavfilter/dnn/trt_class_wrapper.cpp
deleted file mode 100644
index dac433b690..0000000000
--- a/libavfilter/dnn/trt_class_wrapper.cpp
+++ /dev/null
@@ -1,731 +0,0 @@
-/*
-* Copyright (c) 2021 NVIDIA CORPORATION. All rights reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice shall be included in
-* all copies or substantial portions of the Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-* DEALINGS IN THE SOFTWARE.
- */
- 
-/**
- * @file
- * DNN TensorRT backend C++ wrapper.
- */
-
-#include "trt_class_wrapper.h"
-#include "dnn_backend_tensorrt.h"
-
-#include <vector>
-#include <map>
-#include <iostream>
-#include <fstream>
-#include <iomanip>
-#include <string>
-#include <chrono>
-#include <sstream>
-#include <mutex>
-
-#ifdef __cplusplus
-extern "C"
-{
-#endif
-    
-    #include "libavutil/buffer.h"
-    #include "libavutil/hwcontext.h"
-    #include "libavutil/cuda_check.h"
-    #include "libavutil/log.h"
-    #include "libavutil/opt.h"
-    #include "libavformat/avio.h"
-    #include "dnn_io_proc.h"
-    #include "libavutil/frame.h"
-    #include "libavutil/pixdesc.h"
-    #include "libavutil/pixfmt.h"
-    #include "libavutil/mem.h"
-
-#ifdef __cplusplus
-}
-#endif
-
-#include <sys/stat.h>
-#include <time.h>
-#include <unistd.h>
-#include <sys/socket.h>
-#include <netinet/in.h>
-#include <arpa/inet.h>
-#define SOCKET int
-#define INVALID_SOCKET -1
-
-#include <cuda_runtime.h>
-#include <NvInfer.h>
-
-using namespace nvinfer1;
-using namespace std;
-
-#define DIV_UP(a, b) ( ((a) + (b) - 1) / (b) )
-#define BLOCKX 32
-#define BLOCKY 16
-
-// Self-defined CUDA check functions as cuda_check.h is not available for cpp due to void* function pointers
-inline bool check(CUresult e, TRTContext *ctx, CudaFunctions* cu, int iLine, const char *szFile) {
-    if (e != CUDA_SUCCESS) {
-        const char* pStr;
-        cu->cuGetErrorName(e, &pStr);
-        av_log(ctx, AV_LOG_ERROR, "CUDA driver API error: %s, at line %d in file %s\n",
-        pStr, iLine, szFile);
-        return false;
-    }
-    return true;
-}
-
-inline bool check(cudaError_t e, TRTContext *ctx, int iLine, const char *szFile) {
-    if (e != cudaSuccess) {
-        av_log(ctx, AV_LOG_ERROR, "CUDA runtime API error: %s, at line %d in file %s\n",
-            cudaGetErrorName(e), iLine, szFile);
-        return false;
-    }
-    return true;
-}
-
-inline bool check(bool bSuccess, TRTContext *ctx, int iLine, const char *szFile) {
-    if (!bSuccess) {
-        av_log(ctx, AV_LOG_ERROR, "Error at line %d in file %s\n", iLine, szFile);
-        return false;
-    }
-    return true;
-}
-
-#define ck(call, ctx) check(call, ctx, __LINE__, __FILE__)
-#define ck_cu(call) check(call, ctx, cu, __LINE__, __FILE__)
-
-inline std::string to_string(nvinfer1::Dims const &dim) {
-    std::ostringstream oss;
-    oss << "(";
-    for (int i = 0; i < dim.nbDims; i++) {
-        oss << dim.d[i] << ", ";
-    }
-    oss << ")";
-    return oss.str();
-}
-
-typedef ICudaEngine *(*BuildEngineProcType)(IBuilder *builder, void *pData);
-
-struct IOInfo {
-    string name;
-    bool bInput;
-    nvinfer1::Dims dim;
-    nvinfer1::DataType dataType;
-
-    string GetDimString() {
-        return ::to_string(dim);
-    }
-    string GetDataTypeString() {
-        static string aTypeName[] = {"float", "half", "int8", "int32", "bool"};
-        return aTypeName[(int)dataType];
-    }
-    size_t GetNumBytes() {
-        static int aSize[] = {4, 2, 1, 4, 1};
-        size_t nSize = aSize[(int)dataType];
-        for (int i = 0; i < dim.nbDims; i++) {
-            nSize *= dim.d[i];
-        }
-        return nSize;
-    }
-    string to_string() {
-        ostringstream oss;
-        oss << setw(6) << (bInput ? "input" : "output") 
-            << " | " << setw(5) << GetDataTypeString() 
-            << " | " << GetDimString() 
-            << " | " << "size=" << GetNumBytes()
-            << " | " << name;
-        return oss.str();
-    }
-};
-
-class TrtLogger : public nvinfer1::ILogger {
-public:
-    TrtLogger(TRTContext *ctx) : ctx(ctx) {}
-    void log(Severity severity, const char* msg) override {
-        int log_level = AV_LOG_INFO;
-        switch (severity){
-            case nvinfer1::ILogger::Severity::kERROR:
-            log_level = AV_LOG_ERROR;
-            break;
-            case nvinfer1::ILogger::Severity::kWARNING:
-            log_level = AV_LOG_WARNING;
-            break;
-            case nvinfer1::ILogger::Severity::kINFO:
-            log_level = AV_LOG_INFO;
-            break;
-            case nvinfer1::ILogger::Severity::kVERBOSE:
-            log_level = AV_LOG_DEBUG;
-            break;
-            case nvinfer1::ILogger::Severity::kINTERNAL_ERROR:
-            log_level = AV_LOG_FATAL;
-            break;
-        }
-        av_log(ctx, log_level, "%s\n", msg);
-    }
-private:
-    TRTContext *ctx = nullptr;
-};
-    
-class TrtLite {
-public:
-    TrtLite(const char *szEnginePath, TRTContext *trt_ctx) : ctx(trt_ctx) {
-        uint8_t *pBuf = nullptr;
-        uint32_t nSize = 0;
-
-        trt_logger = new TrtLogger(trt_ctx);
-        
-        read_engine(&pBuf, &nSize, szEnginePath);
-        IRuntime *runtime = createInferRuntime(*trt_logger);
-        engine = runtime->deserializeCudaEngine(pBuf, nSize);
-        runtime->destroy();
-        if (!engine) {
-            av_log(ctx, AV_LOG_ERROR, "No engine generated\n");
-            return;
-        }
-        av_freep(&pBuf);
-    }
-    virtual ~TrtLite() {
-        if (context) {
-            context->destroy();
-        }
-        if (engine) {
-            engine->destroy();
-        }
-    }
-    ICudaEngine *GetEngine() {
-        return engine;
-    }
-    void Execute(int nBatch, vector<void *> &vdpBuf, cudaStream_t stm = 0, cudaEvent_t* evtInputConsumed = nullptr) {
-        if (!engine) {
-            av_log(ctx, AV_LOG_ERROR, "No engine\n");
-            return;
-        }
-        if (!engine->hasImplicitBatchDimension() && nBatch > 1) {
-            av_log(ctx, AV_LOG_ERROR, 
-                "Engine was built with explicit batch but is executed with batch size != 1. Results may be incorrect.\n");
-            return;
-        }
-        if (engine->getNbBindings() != vdpBuf.size()) {
-            av_log(ctx, AV_LOG_ERROR, "Number of bindings conflicts with input and output\n");
-            return;
-        }
-        if (!context) {
-            context = engine->createExecutionContext();
-            if (!context) {
-                av_log(ctx, AV_LOG_ERROR, "createExecutionContext() failed\n");
-                return;
-            }
-        }
-        ck(context->enqueue(nBatch, vdpBuf.data(), stm, evtInputConsumed), ctx);
-    }
-    void Execute(map<int, Dims> i2shape, vector<void *> &vdpBuf, cudaStream_t stm = 0, cudaEvent_t* evtInputConsumed = nullptr) {
-        if (!engine) {
-            av_log(ctx, AV_LOG_ERROR, "No engine\n");
-            return;
-        }
-        if (engine->hasImplicitBatchDimension()) {
-            av_log(ctx, AV_LOG_ERROR, "Engine was built with static-shaped input\n");
-            return;
-        }
-        if (engine->getNbBindings() != vdpBuf.size()) {
-            av_log(ctx, AV_LOG_ERROR, "Number of bindings conflicts with input and output\n");
-            return;
-        }
-        if (!context) {
-            context = engine->createExecutionContext();
-            if (!context) {
-                av_log(ctx, AV_LOG_ERROR, "createExecutionContext() failed\n");
-                return;
-            }
-        }
-        for (auto &it : i2shape) {
-            context->setBindingDimensions(it.first, it.second);
-        }
-        ck(context->enqueueV2(vdpBuf.data(), stm, evtInputConsumed), ctx);
-    }
-
-    vector<IOInfo> ConfigIO(int nBatchSize) {
-        vector<IOInfo> vInfo;
-        if (!engine) {
-            av_log(ctx, AV_LOG_ERROR, "No engine\n");
-            return vInfo;
-        }
-        if (!engine->hasImplicitBatchDimension()) {
-            av_log(ctx, AV_LOG_ERROR, "Engine must be built with implicit batch size (and static shape)\n");
-            return vInfo;
-        }
-        for (int i = 0; i < engine->getNbBindings(); i++) {
-            vInfo.push_back({string(engine->getBindingName(i)), engine->bindingIsInput(i), 
-                MakeDim(nBatchSize, engine->getBindingDimensions(i)), engine->getBindingDataType(i)});
-        }
-        return vInfo;
-    }
-    vector<IOInfo> ConfigIO(map<int, Dims> i2shape) {
-        vector<IOInfo> vInfo;
-        if (!engine) {
-            av_log(ctx, AV_LOG_ERROR, "No engine\n");
-            return vInfo;
-        }
-        if (engine->hasImplicitBatchDimension()) {
-            av_log(ctx, AV_LOG_ERROR, "Engine must be built with explicit batch size (to enable dynamic shape)\n");
-            return vInfo;
-        }
-        if (!context) {
-            context = engine->createExecutionContext();
-            if (!context) {
-                av_log(ctx, AV_LOG_ERROR, "createExecutionContext() failed\n");
-                return vInfo;
-            }
-        }
-        for (auto &it : i2shape) {
-            context->setBindingDimensions(it.first, it.second);
-        }
-        if (!context->allInputDimensionsSpecified()) {
-            av_log(ctx, AV_LOG_ERROR, "Not all binding shape are specified\n");
-            return vInfo;
-        }
-        for (int i = 0; i < engine->getNbBindings(); i++) {
-            vInfo.push_back({string(engine->getBindingName(i)), engine->bindingIsInput(i), 
-                context->getBindingDimensions(i), engine->getBindingDataType(i)});
-        }
-        return vInfo;
-    }
-
-    void PrintInfo() {
-        if (!engine) {
-            av_log(ctx, AV_LOG_ERROR, "No engine\n");
-            return;
-        }
-        av_log(ctx, AV_LOG_INFO, "nbBindings: %d\n", engine->getNbBindings());
-        // Only contains engine-level IO information: if dynamic shape is used,
-        // dimension -1 will be printed
-        for (int i = 0; i < engine->getNbBindings(); i++) {
-            av_log(ctx, AV_LOG_INFO, "#%d: %s\n", i, IOInfo{string(engine->getBindingName(i)), engine->bindingIsInput(i),
-                engine->getBindingDimensions(i), engine->getBindingDataType(i)}.to_string().c_str());
-        }
-    }
-
-    TRTContext *ctx = nullptr;
-    
-private:
-    void read_engine(uint8_t **engine_buf, uint32_t *engine_size, const char *engine_filename) {
-        AVIOContext *engine_file_ctx;
-        *engine_buf = nullptr;
-
-        if (avio_open(&engine_file_ctx, engine_filename, AVIO_FLAG_READ) < 0){
-            av_log(ctx, AV_LOG_ERROR, "Error reading engine file from disk!\n");
-            return;
-        }
-
-        uint32_t size = avio_size(engine_file_ctx);
-        uint8_t *buffer = (uint8_t*)av_malloc(size);
-        if (!buffer){
-            avio_closep(&engine_file_ctx);
-            av_log(ctx, AV_LOG_ERROR, "Error allocating memory for TRT engine.\n");
-            return;
-        }
-        uint32_t bytes_read = avio_read(engine_file_ctx, buffer, size);
-        avio_closep(&engine_file_ctx);
-        if (bytes_read != size){
-            av_freep(&buffer);
-            av_log(ctx, AV_LOG_ERROR, "Engine file size (%d) does not equal to read size (%d)\n", size, bytes_read);
-            return;
-        }
-
-        *engine_buf = buffer;
-        *engine_size = size;
-
-        return;
-    }
-    static size_t GetBytesOfBinding(int iBinding, ICudaEngine *engine, IExecutionContext *context = nullptr) {
-        size_t aValueSize[] = {4, 2, 1, 4, 1};
-        size_t nSize = aValueSize[(int)engine->getBindingDataType(iBinding)];
-        const Dims &dims = context ? context->getBindingDimensions(iBinding) : engine->getBindingDimensions(iBinding);
-        for (int i = 0; i < dims.nbDims; i++) {
-            nSize *= dims.d[i];
-        }
-        return nSize;
-    }
-    static nvinfer1::Dims MakeDim(int nBatchSize, nvinfer1::Dims dim) {
-        nvinfer1::Dims ret(dim);
-        for (int i = ret.nbDims; i > 0; i--) {
-            ret.d[i] = ret.d[i - 1];
-        }
-        ret.d[0] = nBatchSize;
-        ret.nbDims++;
-        return ret;
-    }
-
-    ICudaEngine *engine = nullptr;
-    IExecutionContext *context = nullptr;
-    TrtLogger *trt_logger = nullptr;
-};
-
-#define BATCH 1
-
-#ifdef __cplusplus
-extern "C"
-{
-#endif
-
-static DNNReturnType frame_to_dnn(AVFrame *inframe, TRTContext *ctx, int num_bytes)
-{
-    AVHWDeviceContext *hw_device = (AVHWDeviceContext*)ctx->hwdevice->data;
-    AVCUDADeviceContext *hw_ctx = (AVCUDADeviceContext*)hw_device->hwctx;
-    CudaFunctions *cu = hw_ctx->internal->cuda_dl;
-
-    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get((enum AVPixelFormat)inframe->format);
-    int unpack = (desc->flags & AV_PIX_FMT_FLAG_PLANAR) ? 0 : 1;
-    void *frame_to_dnn_args[] = {&ctx->frame_in, inframe->linesize, &ctx->trt_in, &inframe->width,
-                                &inframe->width, &inframe->height, &unpack};
-
-    CUDA_MEMCPY2D copy_param;
-    memset(&copy_param, 0, sizeof(copy_param));
-    copy_param.dstMemoryType = CU_MEMORYTYPE_DEVICE;
-    copy_param.dstDevice = ctx->frame_in;
-    copy_param.dstPitch = inframe->linesize[0];
-    copy_param.srcMemoryType = CU_MEMORYTYPE_HOST;
-    copy_param.srcHost = inframe->data[0];
-    copy_param.srcPitch = inframe->linesize[0];
-    copy_param.WidthInBytes = inframe->linesize[0];
-    copy_param.Height = inframe->height;
-
-    ck_cu(cu->cuMemcpy2DAsync(&copy_param, hw_ctx->stream));
-    ck_cu(cu->cuLaunchKernel(ctx->cu_func_frame_to_dnn, 
-                        DIV_UP(inframe->width, BLOCKX), DIV_UP(inframe->height, BLOCKY), 
-                        1, BLOCKX, BLOCKY, 1, 0, hw_ctx->stream, frame_to_dnn_args, NULL));
-
-    return DNN_SUCCESS;
-}
-
-static DNNReturnType dnn_to_frame(AVFrame *outframe, TRTContext *ctx, int num_bytes)
-{
-    AVHWDeviceContext *hw_device = (AVHWDeviceContext*)ctx->hwdevice->data;
-    AVCUDADeviceContext *hw_ctx = (AVCUDADeviceContext*)hw_device->hwctx;
-    CudaFunctions *cu = hw_ctx->internal->cuda_dl;
-
-    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get((enum AVPixelFormat)outframe->format);
-    int pack = (desc->flags & AV_PIX_FMT_FLAG_PLANAR) ? 0 : 1;
-    void *dnn_to_frame_args[] = {&ctx->trt_out, &outframe->width, &ctx->frame_out, &outframe->linesize[0],
-                                &outframe->width, &outframe->height, &pack};
-
-    CUDA_MEMCPY2D copy_param;
-    memset(&copy_param, 0, sizeof(copy_param));
-    copy_param.dstMemoryType = CU_MEMORYTYPE_HOST;
-    copy_param.dstHost = outframe->data[0];
-    copy_param.dstPitch = outframe->linesize[0];
-    copy_param.srcMemoryType = CU_MEMORYTYPE_DEVICE;
-    copy_param.srcDevice = ctx->frame_out;
-    copy_param.srcPitch = outframe->linesize[0];
-    copy_param.WidthInBytes = outframe->linesize[0];
-    copy_param.Height = outframe->height;
-
-    ck_cu(cu->cuLaunchKernel(ctx->cu_func_dnn_to_frame, 
-                        DIV_UP(outframe->width, BLOCKX), DIV_UP(outframe->height, BLOCKY), 
-                        1, BLOCKX, BLOCKY, 1, 0, hw_ctx->stream, dnn_to_frame_args, NULL));
-    ck_cu(cu->cuMemcpy2DAsync(&copy_param, hw_ctx->stream));
-
-    ck_cu(cu->cuStreamSynchronize(hw_ctx->stream));
-
-    return DNN_SUCCESS;
-}
-
-DNNReturnType trt_load_model(DNNModel *model, const char *model_filename, const AVClass *av_class, const char *options)
-{    
-    int ret = 0;
-    char id_buf[64] = { 0 };
-    AVBufferRef *device_ref = NULL;
-    TRTContext *ctx = (TRTContext*)av_mallocz(sizeof(TRTContext));
-    AVHWDeviceContext *hw_device;
-    AVCUDADeviceContext *hw_ctx;
-    CudaFunctions *cu;
-    CUcontext dummy, cuda_ctx;
-
-    ctx->av_class = av_class;
-    av_opt_set_defaults(ctx);
-    if (av_opt_set_from_string(ctx, options, NULL, "=", "&") < 0)
-    {
-        av_log(ctx, AV_LOG_ERROR, "Failed to parse options \"%s\"\n", options);
-        return DNN_ERROR;
-    }
-    snprintf(id_buf, sizeof(id_buf), "%d", ctx->options.device);
-    
-    // TODO: Add device index option
-    ret = av_hwdevice_ctx_create(&device_ref, AV_HWDEVICE_TYPE_CUDA, id_buf, NULL, 1);
-    if (ret < 0)
-    {
-        av_log(ctx, AV_LOG_ERROR, "Error creating device context\n");
-        return DNN_ERROR;
-    }
-
-    hw_device = (AVHWDeviceContext*)device_ref->data;
-    hw_ctx = (AVCUDADeviceContext*)hw_device->hwctx;
-    cu = hw_ctx->internal->cuda_dl;
-    cuda_ctx = hw_ctx->cuda_ctx;
-
-    ck_cu(cu->cuCtxPushCurrent(cuda_ctx));
-
-    TrtLite *trt_model= new TrtLite{model_filename, ctx};
-    if (trt_model == nullptr)
-    {
-        return DNN_ERROR;
-    }
-
-    ctx->hwdevice = device_ref;
-
-    ck_cu(cu->cuCtxPopCurrent(&dummy));
-
-    trt_model->PrintInfo();
-
-
-    model->model = trt_model;
-    model->get_input = &get_input_trt;
-    model->get_output = &get_output_trt;
-    model->options = options;
-    av_log(ctx, AV_LOG_INFO, "Load trt engine\n");
-
-    return DNN_SUCCESS;
-}
-
-DNNReturnType get_input_trt(void *model, DNNData *input, const char *input_name)
-{
-    TrtLite* trt_model = (TrtLite*)model;
-    TRTContext *ctx = trt_model->ctx;
-    AVHWDeviceContext *hw_device = (AVHWDeviceContext*)ctx->hwdevice->data;
-    AVCUDADeviceContext *hw_ctx = (AVCUDADeviceContext*)hw_device->hwctx;
-    CudaFunctions *cu = hw_ctx->internal->cuda_dl;
-
-    CUcontext dummy, cuda_ctx = hw_ctx->cuda_ctx;
-
-    av_log(ctx, AV_LOG_INFO, "Get TRT input\n");
-
-    // For dynamic shape, input dimensions are set to -1,
-    // trt input is initialized in get_output_trt() along with trt output
-    if (!trt_model->GetEngine()->hasImplicitBatchDimension())
-    {
-        av_log(ctx, AV_LOG_INFO, "Model supports dynamic shape\n");
-        for (int i = 0; i < trt_model->GetEngine()->getNbBindings(); i++) {
-            if (trt_model->GetEngine()->bindingIsInput(i))
-            {
-                ctx->channels = trt_model->GetEngine()->getBindingDimensions(i).d[1];
-                if (ctx->channels == -1)
-                {
-                    av_log(ctx, AV_LOG_ERROR, "Do not support dynamic channel size\n");
-                    return DNN_ERROR;
-                }
-                input->channels = ctx->channels;
-            }
-        }
-        input->height = -1;
-        input->width = -1;
-        input->dt = DNN_FLOAT;
-
-        return DNN_SUCCESS;
-    }
-
-    vector<IOInfo> v_info = trt_model->ConfigIO(BATCH);
-    for (auto info: v_info)
-    {
-        if (info.bInput)
-        {
-            input->channels = info.dim.d[1];
-            input->height = info.dim.d[2];
-            input->width = info.dim.d[3];
-            input->dt = DNN_FLOAT;
-
-            ctx->host_in = new uint8_t[info.GetNumBytes()];
-
-            ck_cu(cu->cuCtxPushCurrent(cuda_ctx));
-
-            ck_cu(cu->cuMemAlloc(&ctx->trt_in, info.GetNumBytes()));
-            ck_cu(cu->cuMemAlloc(&ctx->frame_in, info.GetNumBytes() / sizeof(float)));
-
-            ck_cu(cu->cuCtxPopCurrent(&dummy));
-            
-            return DNN_SUCCESS;
-        }
-    }
-    av_log(ctx, AV_LOG_ERROR, "No input found in the model\n");
-    return DNN_ERROR;
-}
-
-DNNReturnType get_output_trt(void *model, const char *input_name, int input_width, int input_height,
-                                const char *output_name, int *output_width, int *output_height)
-{
-    TrtLite* trt_model = (TrtLite*)model;
-    TRTContext *ctx = trt_model->ctx;
-    AVHWDeviceContext *hw_device = (AVHWDeviceContext*)ctx->hwdevice->data;
-    AVCUDADeviceContext *hw_ctx = (AVCUDADeviceContext*)hw_device->hwctx;
-    CudaFunctions *cu = hw_ctx->internal->cuda_dl;
-
-    CUcontext dummy, cuda_ctx = hw_ctx->cuda_ctx;
-    extern char dnn_io_proc_trt_ptx[];
-
-    av_log(ctx, AV_LOG_INFO, "Get TRT output\n");
-
-    vector<IOInfo> v_info;
-    if (!trt_model->GetEngine()->hasImplicitBatchDimension())
-    {
-        map<int, Dims> i2shape;
-        i2shape.insert(make_pair(0, Dims{4, {BATCH, ctx->channels, input_height, input_width}}));
-        v_info = trt_model->ConfigIO(i2shape);
-    }
-    else
-    {
-        v_info = trt_model->ConfigIO(BATCH);
-    }
-    
-    ck_cu(cu->cuCtxPushCurrent(cuda_ctx));
-
-    for (auto info: v_info)
-    {
-        // For dynamic shape, inputs are initialized here
-        if (info.bInput && (!trt_model->GetEngine()->hasImplicitBatchDimension()))
-        {
-            ctx->host_in = new uint8_t[info.GetNumBytes()];
-            ck_cu(cu->cuMemAlloc(&ctx->trt_in, info.GetNumBytes()));
-            ck_cu(cu->cuMemAlloc(&ctx->frame_in, info.GetNumBytes() / sizeof(float)));
-        }
-        if (!info.bInput)
-        {
-            *output_height = info.dim.d[2];
-            *output_width = info.dim.d[3];
-
-            ctx->host_out = new uint8_t[info.GetNumBytes()];
-            ck_cu(cu->cuMemAlloc(&ctx->trt_out, info.GetNumBytes()));
-            ck_cu(cu->cuMemAlloc(&ctx->frame_out, info.GetNumBytes() / sizeof(float)));
-        }
-    }
-
-    ck_cu(cu->cuModuleLoadData(&ctx->cu_module, dnn_io_proc_trt_ptx));
-    ck_cu(cu->cuModuleGetFunction(&ctx->cu_func_frame_to_dnn, ctx->cu_module, "frame_to_dnn"));
-    ck_cu(cu->cuModuleGetFunction(&ctx->cu_func_dnn_to_frame, ctx->cu_module, "dnn_to_frame"));
-
-    ck_cu(cu->cuCtxPopCurrent(&dummy));
-
-    return DNN_SUCCESS;
-}
-
-DNNReturnType execute_model_trt(const DNNModel *model, const char *input_name, AVFrame *in_frame,
-                                      const char **output_names, uint32_t nb_output, AVFrame *out_frame)
-{
-    TrtLite* trt_model = reinterpret_cast<TrtLite*>(model->model);
-    TRTContext *ctx = trt_model->ctx;
-    AVHWDeviceContext *hw_device = (AVHWDeviceContext*)ctx->hwdevice->data;
-    AVCUDADeviceContext *hw_ctx = (AVCUDADeviceContext*)hw_device->hwctx;
-    CudaFunctions *cu = hw_ctx->internal->cuda_dl;
-
-    CUcontext dummy, cuda_ctx = hw_ctx->cuda_ctx;
-
-    DNNData input, output;
-    vector<void*> buf_vec, device_buf_vec;
-    int ret = 0;
-
-    int input_height = in_frame->height;
-    int input_width = in_frame->width;
-    int input_channels = ctx->channels;
-    vector<IOInfo> IO_info_vec;
-    map<int, Dims> i2shape;
-    if (!trt_model->GetEngine()->hasImplicitBatchDimension())
-    {
-        i2shape.insert(make_pair(0, Dims{4, {BATCH, input_channels, input_height, input_width}}));
-        IO_info_vec = trt_model->ConfigIO(i2shape);
-    }
-    else
-    {
-        IO_info_vec = trt_model->ConfigIO(BATCH);
-    }
-
-    ck_cu(cu->cuCtxPushCurrent(cuda_ctx));
-
-    for (auto info : IO_info_vec)
-    {
-
-        if (info.bInput)
-        {   
-            input.height = info.dim.d[2];
-            input.width = info.dim.d[3];
-            input.channels = info.dim.d[1];
-            input.data = ctx->host_in;
-            input.dt = DNN_FLOAT;
-            ret = frame_to_dnn(in_frame, ctx, info.GetNumBytes() / sizeof(float));
-            
-            if (ret < 0)
-                return DNN_ERROR;
-    
-            device_buf_vec.push_back((void*)ctx->trt_in);
-            continue;
-        }
-        else
-        {
-            device_buf_vec.push_back((void*)ctx->trt_out);
-        }
-    }
-
-    if (!trt_model->GetEngine()->hasImplicitBatchDimension())
-    {
-        trt_model->Execute(i2shape, device_buf_vec, hw_ctx->stream);
-    }
-    else
-    {
-        trt_model->Execute(BATCH, device_buf_vec, hw_ctx->stream);
-    }
-
-    for (uint32_t i = 0; i < IO_info_vec.size(); i++)
-    {
-        if (!IO_info_vec[i].bInput)
-        {
-            output.height = IO_info_vec[i].dim.d[2];
-            output.width = IO_info_vec[i].dim.d[3];
-            output.channels = IO_info_vec[i].dim.d[1];
-            output.data = ctx->host_out;
-            output.dt = DNN_FLOAT;
-            ret = dnn_to_frame(out_frame, ctx, IO_info_vec[i].GetNumBytes() / sizeof(float));
-        }
-    }
-
-    ck_cu(cu->cuCtxPopCurrent(&dummy));
-
-    return DNN_SUCCESS;
-}
-
-DNNReturnType free_model_trt(DNNModel *model)
-{
-    TrtLite* trt_model = reinterpret_cast<TrtLite*>(model->model);
-    TRTContext *ctx = trt_model->ctx;
-    AVHWDeviceContext *hw_device = (AVHWDeviceContext*)ctx->hwdevice->data;
-    AVCUDADeviceContext *hw_ctx = (AVCUDADeviceContext*)hw_device->hwctx;
-    CudaFunctions *cu = hw_ctx->internal->cuda_dl;
-
-    delete[]((uint8_t*)ctx->host_in);
-    delete[]((uint8_t*)ctx->host_out);
-    ck_cu(cu->cuMemFree(ctx->trt_in));
-    ck_cu(cu->cuMemFree(ctx->trt_out));
-    
-    delete(trt_model);
-    
-    av_buffer_unref(&ctx->hwdevice);
-    av_free(ctx);
-    model->model = NULL;
-    
-    return DNN_SUCCESS;
-}
-#ifdef __cplusplus
-}
-#endif
diff --git a/libavfilter/dnn/trt_class_wrapper.h b/libavfilter/dnn/trt_class_wrapper.h
deleted file mode 100644
index 18815fadae..0000000000
--- a/libavfilter/dnn/trt_class_wrapper.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
-* Copyright (c) 2021 NVIDIA CORPORATION. All rights reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice shall be included in
-* all copies or substantial portions of the Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-* DEALINGS IN THE SOFTWARE.
- */
- 
-/**
- * @file
- * TensorRT wrapper header for dnn_backend in ffmpeg.
- */
-
-#ifndef TRT_CLASS_WRAPPER_H
-#define TRT_CLASS_WRAPPER_H
-
-#ifdef __cplusplus
-extern "C"
-{
-#endif
-
-    #include "../dnn_interface.h"
-
-    DNNReturnType free_model_trt(DNNModel *model);
-    DNNReturnType execute_model_trt(const DNNModel *model, const char *input_name, AVFrame *in_frame,
-                                           const char **output_names, uint32_t nb_output, AVFrame *out_frame);
-    DNNReturnType get_output_trt(void *model, const char *input_name, int input_width, int input_height,
-                                        const char *output_name, int *output_width, int *output_height);
-    DNNReturnType get_input_trt(void *model, DNNData *input, const char *input_name);
-    DNNReturnType trt_load_model(DNNModel *model, const char *model_filename, const AVClass *av_class, const char *options);
-
-#ifdef __cplusplus
-}
-#endif
-#endif
-- 
2.17.1



More information about the ffmpeg-devel mailing list