[FFmpeg-cvslog] avcodec/nvenc: surface allocation reduction

Ben Chang git at videolan.org
Wed Apr 26 23:37:30 EEST 2017


ffmpeg | branch: master | Ben Chang <benc at nvidia.com> | Tue Apr 25 14:57:56 2017 -0700| [8de3458a07376b0a96772e586b6dba5e93432f52] | committer: Timo Rothenpieler

avcodec/nvenc: surface allocation reduction

This patch aims to reduce the number of input/output surfaces
NVENC allocates per session. Previous default sets allocated surfaces to 32
(unless there is user specified param or lookahead involved). Having large
number of surfaces consumes extra video memory (esp for higher resolution
encoding). The patch changes the surfaces calculation for default, B-frames,
lookahead scenario respectively.

The other change involves surface selection. Previously, if a session
allocates x surfaces, only x-1 surfaces are used (due to combination
of output delay and lock toggle logic). To prevent unused surfaces,
changing surface rotation to using predefined fifo.

Signed-off-by: Timo Rothenpieler <timo at rothenpieler.org>

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=8de3458a07376b0a96772e586b6dba5e93432f52
---

 libavcodec/nvenc.c      | 55 ++++++++++++++++++++++++++++++++++---------------
 libavcodec/nvenc.h      |  2 +-
 libavcodec/nvenc_h264.c |  4 ++--
 libavcodec/nvenc_hevc.c |  4 ++--
 4 files changed, 43 insertions(+), 22 deletions(-)

diff --git a/libavcodec/nvenc.c b/libavcodec/nvenc.c
index cf054550c1..00766c25d0 100644
--- a/libavcodec/nvenc.c
+++ b/libavcodec/nvenc.c
@@ -644,16 +644,34 @@ static void nvenc_override_rate_control(AVCodecContext *avctx)
 static av_cold int nvenc_recalc_surfaces(AVCodecContext *avctx)
 {
     NvencContext *ctx = avctx->priv_data;
-    int nb_surfaces = 0;
+    // default minimum of 4 surfaces
+    // multiply by 2 for number of NVENCs on gpu (hardcode to 2)
+    // another multiply by 2 to avoid blocking next PBB group
+    int nb_surfaces = FFMAX(4, ctx->encode_config.frameIntervalP * 2 * 2);
 
+    // lookahead enabled
     if (ctx->rc_lookahead > 0) {
-        nb_surfaces = ctx->rc_lookahead + ((ctx->encode_config.frameIntervalP > 0) ? ctx->encode_config.frameIntervalP : 0) + 1 + 4;
-        if (ctx->nb_surfaces < nb_surfaces) {
+        // +1 is to account for lkd_bound calculation later
+        // +4 is to allow sufficient pipelining with lookahead
+        nb_surfaces = FFMAX(1, FFMAX(nb_surfaces, ctx->rc_lookahead + ctx->encode_config.frameIntervalP + 1 + 4));
+        if (nb_surfaces > ctx->nb_surfaces && ctx->nb_surfaces > 0)
+        {
             av_log(avctx, AV_LOG_WARNING,
                    "Defined rc_lookahead requires more surfaces, "
                    "increasing used surfaces %d -> %d\n", ctx->nb_surfaces, nb_surfaces);
-            ctx->nb_surfaces = nb_surfaces;
         }
+        ctx->nb_surfaces = FFMAX(nb_surfaces, ctx->nb_surfaces);
+    } else {
+        if (ctx->encode_config.frameIntervalP > 1 && ctx->nb_surfaces < nb_surfaces && ctx->nb_surfaces > 0)
+        {
+            av_log(avctx, AV_LOG_WARNING,
+                   "Defined b-frame requires more surfaces, "
+                   "increasing used surfaces %d -> %d\n", ctx->nb_surfaces, nb_surfaces);
+            ctx->nb_surfaces = FFMAX(ctx->nb_surfaces, nb_surfaces);
+        }
+        else if (ctx->nb_surfaces <= 0)
+            ctx->nb_surfaces = nb_surfaces;
+        // otherwise use user specified value
     }
 
     ctx->nb_surfaces = FFMAX(1, FFMIN(MAX_REGISTERED_FRAMES, ctx->nb_surfaces));
@@ -1086,6 +1104,7 @@ static av_cold int nvenc_alloc_surface(AVCodecContext *avctx, int idx)
     NvencContext *ctx = avctx->priv_data;
     NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
     NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->nvenc_funcs;
+    NvencSurface* tmp_surface = &ctx->surfaces[idx];
 
     NVENCSTATUS nv_status;
     NV_ENC_CREATE_BITSTREAM_BUFFER allocOut = { 0 };
@@ -1121,8 +1140,6 @@ static av_cold int nvenc_alloc_surface(AVCodecContext *avctx, int idx)
         ctx->surfaces[idx].height = allocSurf.height;
     }
 
-    ctx->surfaces[idx].lockCount = 0;
-
     /* 1MB is large enough to hold most output frames.
      * NVENC increases this automaticaly if it is not enough. */
     allocOut.size = 1024 * 1024;
@@ -1141,6 +1158,8 @@ static av_cold int nvenc_alloc_surface(AVCodecContext *avctx, int idx)
     ctx->surfaces[idx].output_surface = allocOut.bitstreamBuffer;
     ctx->surfaces[idx].size = allocOut.size;
 
+    av_fifo_generic_write(ctx->unused_surface_queue, &tmp_surface, sizeof(tmp_surface), NULL);
+
     return 0;
 }
 
@@ -1156,6 +1175,11 @@ static av_cold int nvenc_setup_surfaces(AVCodecContext *avctx)
     ctx->timestamp_list = av_fifo_alloc(ctx->nb_surfaces * sizeof(int64_t));
     if (!ctx->timestamp_list)
         return AVERROR(ENOMEM);
+
+    ctx->unused_surface_queue = av_fifo_alloc(ctx->nb_surfaces * sizeof(NvencSurface*));
+    if (!ctx->unused_surface_queue)
+        return AVERROR(ENOMEM);
+
     ctx->output_surface_queue = av_fifo_alloc(ctx->nb_surfaces * sizeof(NvencSurface*));
     if (!ctx->output_surface_queue)
         return AVERROR(ENOMEM);
@@ -1222,6 +1246,7 @@ av_cold int ff_nvenc_encode_close(AVCodecContext *avctx)
     av_fifo_freep(&ctx->timestamp_list);
     av_fifo_freep(&ctx->output_surface_ready_queue);
     av_fifo_freep(&ctx->output_surface_queue);
+    av_fifo_freep(&ctx->unused_surface_queue);
 
     if (ctx->surfaces && avctx->pix_fmt == AV_PIX_FMT_CUDA) {
         for (i = 0; i < ctx->nb_surfaces; ++i) {
@@ -1305,16 +1330,14 @@ av_cold int ff_nvenc_encode_init(AVCodecContext *avctx)
 
 static NvencSurface *get_free_frame(NvencContext *ctx)
 {
-    int i;
+    NvencSurface *tmp_surf;
 
-    for (i = 0; i < ctx->nb_surfaces; i++) {
-        if (!ctx->surfaces[i].lockCount) {
-            ctx->surfaces[i].lockCount = 1;
-            return &ctx->surfaces[i];
-        }
-    }
+    if (!(av_fifo_size(ctx->unused_surface_queue) > 0))
+        // queue empty
+        return NULL;
 
-    return NULL;
+    av_fifo_generic_read(ctx->unused_surface_queue, &tmp_surf, sizeof(tmp_surf), NULL);
+    return tmp_surf;
 }
 
 static int nvenc_copy_frame(AVCodecContext *avctx, NvencSurface *nv_surface,
@@ -1712,7 +1735,6 @@ int ff_nvenc_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
         }
 
         if (res) {
-            inSurf->lockCount = 0;
             return res;
         }
 
@@ -1790,8 +1812,7 @@ int ff_nvenc_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
         if (res)
             return res;
 
-        av_assert0(tmpoutsurf->lockCount);
-        tmpoutsurf->lockCount--;
+        av_fifo_generic_write(ctx->unused_surface_queue, &tmpoutsurf, sizeof(tmpoutsurf), NULL);
 
         *got_packet = 1;
     } else {
diff --git a/libavcodec/nvenc.h b/libavcodec/nvenc.h
index 7dec5cc685..763647b283 100644
--- a/libavcodec/nvenc.h
+++ b/libavcodec/nvenc.h
@@ -44,7 +44,6 @@ typedef struct NvencSurface
     NV_ENC_OUTPUT_PTR output_surface;
     NV_ENC_BUFFER_FORMAT format;
     int size;
-    int lockCount;
 } NvencSurface;
 
 typedef struct NvencDynLoadFunctions
@@ -110,6 +109,7 @@ typedef struct NvencContext
     int nb_surfaces;
     NvencSurface *surfaces;
 
+    AVFifoBuffer *unused_surface_queue;
     AVFifoBuffer *output_surface_queue;
     AVFifoBuffer *output_surface_ready_queue;
     AVFifoBuffer *timestamp_list;
diff --git a/libavcodec/nvenc_h264.c b/libavcodec/nvenc_h264.c
index 2c55b60789..8d44b1f350 100644
--- a/libavcodec/nvenc_h264.c
+++ b/libavcodec/nvenc_h264.c
@@ -79,8 +79,8 @@ static const AVOption options[] = {
                                                             0,                    AV_OPT_TYPE_CONST, { .i64 = NV_ENC_PARAMS_RC_2_PASS_FRAMESIZE_CAP }, 0, 0, VE, "rc" },
     { "vbr_2pass",    "Multi-pass variable bitrate mode",   0,                    AV_OPT_TYPE_CONST, { .i64 = NV_ENC_PARAMS_RC_2_PASS_VBR },           0, 0, VE, "rc" },
     { "rc-lookahead", "Number of frames to look ahead for rate-control",
-                                                            OFFSET(rc_lookahead), AV_OPT_TYPE_INT,   { .i64 = -1 }, -1, INT_MAX, VE },
-    { "surfaces",     "Number of concurrent surfaces",      OFFSET(nb_surfaces),  AV_OPT_TYPE_INT,   { .i64 = 32 },  0, MAX_REGISTERED_FRAMES, VE },
+                                                            OFFSET(rc_lookahead), AV_OPT_TYPE_INT,   { .i64 = 0 }, 0, INT_MAX, VE },
+    { "surfaces",     "Number of concurrent surfaces",      OFFSET(nb_surfaces),  AV_OPT_TYPE_INT,   { .i64 = 0 }, 0, MAX_REGISTERED_FRAMES, VE },
     { "cbr",          "Use cbr encoding mode",              OFFSET(cbr),          AV_OPT_TYPE_BOOL,  { .i64 = 0 },   0, 1, VE },
     { "2pass",        "Use 2pass encoding mode",            OFFSET(twopass),      AV_OPT_TYPE_BOOL,  { .i64 = -1 }, -1, 1, VE },
     { "gpu",          "Selects which NVENC capable GPU to use. First GPU is 0, second is 1, and so on.",
diff --git a/libavcodec/nvenc_hevc.c b/libavcodec/nvenc_hevc.c
index c32ba4220b..6d6750a3d4 100644
--- a/libavcodec/nvenc_hevc.c
+++ b/libavcodec/nvenc_hevc.c
@@ -78,8 +78,8 @@ static const AVOption options[] = {
                                                             0,                    AV_OPT_TYPE_CONST, { .i64 = NV_ENC_PARAMS_RC_2_PASS_FRAMESIZE_CAP }, 0, 0, VE, "rc" },
     { "vbr_2pass",    "Multi-pass variable bitrate mode",   0,                    AV_OPT_TYPE_CONST, { .i64 = NV_ENC_PARAMS_RC_2_PASS_VBR },           0, 0, VE, "rc" },
     { "rc-lookahead", "Number of frames to look ahead for rate-control",
-                                                            OFFSET(rc_lookahead), AV_OPT_TYPE_INT,   { .i64 = -1 }, -1, INT_MAX, VE },
-    { "surfaces",     "Number of concurrent surfaces",      OFFSET(nb_surfaces),  AV_OPT_TYPE_INT,   { .i64 = 32 },  0, MAX_REGISTERED_FRAMES, VE },
+                                                            OFFSET(rc_lookahead), AV_OPT_TYPE_INT,   { .i64 = 0 }, 0, INT_MAX, VE },
+    { "surfaces",     "Number of concurrent surfaces",      OFFSET(nb_surfaces),  AV_OPT_TYPE_INT,   { .i64 = 0 }, 0, MAX_REGISTERED_FRAMES, VE },
     { "cbr",          "Use cbr encoding mode",              OFFSET(cbr),          AV_OPT_TYPE_BOOL,  { .i64 = 0 },   0, 1, VE },
     { "2pass",        "Use 2pass encoding mode",            OFFSET(twopass),      AV_OPT_TYPE_BOOL,  { .i64 = -1 }, -1, 1, VE },
     { "gpu",          "Selects which NVENC capable GPU to use. First GPU is 0, second is 1, and so on.",



More information about the ffmpeg-cvslog mailing list