[FFmpeg-cvslog] avcodec/nvenc: allow configuring number of surfaces

Andrey Turkin git at videolan.org
Tue May 31 15:52:47 CEST 2016


ffmpeg | branch: master | Andrey Turkin <andrey.turkin at gmail.com> | Sun May 29 13:07:34 2016 +0300| [f052ef30ef6534c811b9f349e8f17e32afbc9148] | committer: Timo Rothenpieler

avcodec/nvenc: allow configuring number of surfaces

Signed-off-by: Timo Rothenpieler <timo at rothenpieler.org>

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=f052ef30ef6534c811b9f349e8f17e32afbc9148
---

 libavcodec/nvenc.c      |   34 +++++++++++++++-------------------
 libavcodec/nvenc.h      |    4 ++--
 libavcodec/nvenc_h264.c |    3 ++-
 libavcodec/nvenc_hevc.c |    3 ++-
 4 files changed, 21 insertions(+), 23 deletions(-)

diff --git a/libavcodec/nvenc.c b/libavcodec/nvenc.c
index 600054f..76a54a1 100644
--- a/libavcodec/nvenc.c
+++ b/libavcodec/nvenc.c
@@ -784,7 +784,6 @@ static av_cold int nvenc_setup_encoder(AVCodecContext *avctx)
     NV_ENC_PRESET_CONFIG preset_config = { 0 };
     NVENCSTATUS nv_status = NV_ENC_SUCCESS;
     AVCPBProperties *cpb_props;
-    int num_mbs;
     int res = 0;
     int dw, dh;
 
@@ -842,12 +841,6 @@ static av_cold int nvenc_setup_encoder(AVCodecContext *avctx)
     ctx->init_encode_params.frameRateNum = avctx->time_base.den;
     ctx->init_encode_params.frameRateDen = avctx->time_base.num * avctx->ticks_per_frame;
 
-    num_mbs = ((avctx->width + 15) >> 4) * ((avctx->height + 15) >> 4);
-    ctx->max_surface_count = (num_mbs >= 8160) ? 32 : 48;
-
-    if (ctx->buffer_delay >= ctx->max_surface_count)
-        ctx->buffer_delay = ctx->max_surface_count - 1;
-
     ctx->init_encode_params.enableEncodeAsync = 0;
     ctx->init_encode_params.enablePTD = 1;
 
@@ -976,24 +969,27 @@ static av_cold int nvenc_setup_surfaces(AVCodecContext *avctx)
 {
     NvencContext *ctx = avctx->priv_data;
     int i, res;
+    int num_mbs = ((avctx->width + 15) >> 4) * ((avctx->height + 15) >> 4);
+    ctx->nb_surfaces = FFMAX((num_mbs >= 8160) ? 32 : 48,
+                             ctx->nb_surfaces);
+    ctx->async_depth = FFMIN(ctx->async_depth, ctx->nb_surfaces - 1);
 
-    ctx->surfaces = av_malloc(ctx->max_surface_count * sizeof(*ctx->surfaces));
 
-    if (!ctx->surfaces) {
+    ctx->surfaces = av_mallocz_array(ctx->nb_surfaces, sizeof(*ctx->surfaces));
+    if (!ctx->surfaces)
         return AVERROR(ENOMEM);
-    }
 
-    ctx->timestamp_list = av_fifo_alloc(ctx->max_surface_count * sizeof(int64_t));
+    ctx->timestamp_list = av_fifo_alloc(ctx->nb_surfaces * sizeof(int64_t));
     if (!ctx->timestamp_list)
         return AVERROR(ENOMEM);
-    ctx->output_surface_queue = av_fifo_alloc(ctx->max_surface_count * sizeof(NvencSurface*));
+    ctx->output_surface_queue = av_fifo_alloc(ctx->nb_surfaces * sizeof(NvencSurface*));
     if (!ctx->output_surface_queue)
         return AVERROR(ENOMEM);
-    ctx->output_surface_ready_queue = av_fifo_alloc(ctx->max_surface_count * sizeof(NvencSurface*));
+    ctx->output_surface_ready_queue = av_fifo_alloc(ctx->nb_surfaces * sizeof(NvencSurface*));
     if (!ctx->output_surface_ready_queue)
         return AVERROR(ENOMEM);
 
-    for (i = 0; i < ctx->max_surface_count; i++) {
+    for (i = 0; i < ctx->nb_surfaces; i++) {
         if ((res = nvenc_alloc_surface(avctx, i)) < 0)
             return res;
     }
@@ -1054,7 +1050,7 @@ av_cold int ff_nvenc_encode_close(AVCodecContext *avctx)
     av_fifo_freep(&ctx->output_surface_queue);
 
     if (ctx->surfaces && avctx->pix_fmt == AV_PIX_FMT_CUDA) {
-        for (i = 0; i < ctx->max_surface_count; ++i) {
+        for (i = 0; i < ctx->nb_surfaces; ++i) {
             if (ctx->surfaces[i].input_surface) {
                  p_nvenc->nvEncUnmapInputResource(ctx->nvencoder, ctx->surfaces[i].in_map.mappedResource);
             }
@@ -1067,7 +1063,7 @@ av_cold int ff_nvenc_encode_close(AVCodecContext *avctx)
     }
 
     if (ctx->surfaces) {
-        for (i = 0; i < ctx->max_surface_count; ++i) {
+        for (i = 0; i < ctx->nb_surfaces; ++i) {
             if (avctx->pix_fmt != AV_PIX_FMT_CUDA)
                 p_nvenc->nvEncDestroyInputBuffer(ctx->nvencoder, ctx->surfaces[i].input_surface);
             av_frame_free(&ctx->surfaces[i].in_ref);
@@ -1075,7 +1071,7 @@ av_cold int ff_nvenc_encode_close(AVCodecContext *avctx)
         }
     }
     av_freep(&ctx->surfaces);
-    ctx->max_surface_count = 0;
+    ctx->nb_surfaces = 0;
 
     if (ctx->nvencoder)
         p_nvenc->nvEncDestroyEncoder(ctx->nvencoder);
@@ -1140,7 +1136,7 @@ static NvencSurface *get_free_frame(NvencContext *ctx)
 {
     int i;
 
-    for (i = 0; i < ctx->max_surface_count; ++i) {
+    for (i = 0; i < ctx->nb_surfaces; ++i) {
         if (!ctx->surfaces[i].lockCount) {
             ctx->surfaces[i].lockCount = 1;
             return &ctx->surfaces[i];
@@ -1470,7 +1466,7 @@ static int output_ready(NvencContext *ctx, int flush)
 
     nb_ready   = av_fifo_size(ctx->output_surface_ready_queue)   / sizeof(NvencSurface*);
     nb_pending = av_fifo_size(ctx->output_surface_queue)         / sizeof(NvencSurface*);
-    return nb_ready > 0 && (flush || nb_ready + nb_pending >= ctx->buffer_delay);
+    return nb_ready > 0 && (flush || nb_ready + nb_pending >= ctx->async_depth);
 }
 
 int ff_nvenc_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
diff --git a/libavcodec/nvenc.h b/libavcodec/nvenc.h
index fd44ebc..2c1d8d4 100644
--- a/libavcodec/nvenc.h
+++ b/libavcodec/nvenc.h
@@ -144,7 +144,7 @@ typedef struct NvencContext
     CUcontext cu_context;
     CUcontext cu_context_internal;
 
-    int max_surface_count;
+    int nb_surfaces;
     NvencSurface *surfaces;
 
     AVFifoBuffer *output_surface_queue;
@@ -175,7 +175,7 @@ typedef struct NvencContext
     int twopass;
     int gpu;
     int flags;
-    int buffer_delay;
+    int async_depth;
 } NvencContext;
 
 int ff_nvenc_encode_init(AVCodecContext *avctx);
diff --git a/libavcodec/nvenc_h264.c b/libavcodec/nvenc_h264.c
index 19709c6..2fab3bf 100644
--- a/libavcodec/nvenc_h264.c
+++ b/libavcodec/nvenc_h264.c
@@ -76,10 +76,11 @@ static const AVOption options[] = {
     { "ll_2pass_quality", "Multi-pass optimized for image quality (only for low-latency presets)",       0, AV_OPT_TYPE_CONST,  { .i64 = NV_ENC_PARAMS_RC_2_PASS_QUALITY },       0, 0, VE, "rc" },
     { "ll_2pass_size",    "Multi-pass optimized for constant frame size (only for low-latency presets)", 0, AV_OPT_TYPE_CONST,  { .i64 = NV_ENC_PARAMS_RC_2_PASS_FRAMESIZE_CAP }, 0, 0, VE, "rc" },
     { "vbr_2pass",        "Multi-pass variable bitrate mode",                                            0, AV_OPT_TYPE_CONST,  { .i64 = NV_ENC_PARAMS_RC_2_PASS_VBR },           0, 0, VE, "rc" },
+    { "surfaces", "Number of concurrent surfaces",        OFFSET(nb_surfaces), AV_OPT_TYPE_INT,    { .i64 = 32 },                   0, INT_MAX, VE },
     { "cbr", "Use cbr encoding mode", OFFSET(cbr), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE },
     { "2pass", "Use 2pass encoding mode", OFFSET(twopass), AV_OPT_TYPE_BOOL, { .i64 = -1 }, -1, 1, VE },
     { "gpu", "Selects which NVENC capable GPU to use. First GPU is 0, second is 1, and so on.", OFFSET(gpu), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
-    { "delay", "Delays frame output by the given amount of frames.", OFFSET(buffer_delay), AV_OPT_TYPE_INT, { .i64 = INT_MAX }, 0, INT_MAX, VE },
+    { "delay",    "Delay frame output by the given amount of frames", OFFSET(async_depth), AV_OPT_TYPE_INT, { .i64 = INT_MAX }, 0, INT_MAX, VE },
     { NULL }
 };
 
diff --git a/libavcodec/nvenc_hevc.c b/libavcodec/nvenc_hevc.c
index 659e1c9..a1c88db 100644
--- a/libavcodec/nvenc_hevc.c
+++ b/libavcodec/nvenc_hevc.c
@@ -73,10 +73,11 @@ static const AVOption options[] = {
     { "ll_2pass_quality", "Multi-pass optimized for image quality (only for low-latency presets)",       0, AV_OPT_TYPE_CONST,  { .i64 = NV_ENC_PARAMS_RC_2_PASS_QUALITY },       0, 0, VE, "rc" },
     { "ll_2pass_size",    "Multi-pass optimized for constant frame size (only for low-latency presets)", 0, AV_OPT_TYPE_CONST,  { .i64 = NV_ENC_PARAMS_RC_2_PASS_FRAMESIZE_CAP }, 0, 0, VE, "rc" },
     { "vbr_2pass",        "Multi-pass variable bitrate mode",                                            0, AV_OPT_TYPE_CONST,  { .i64 = NV_ENC_PARAMS_RC_2_PASS_VBR },           0, 0, VE, "rc" },
+    { "surfaces", "Number of concurrent surfaces",        OFFSET(nb_surfaces), AV_OPT_TYPE_INT,    { .i64 = 32 },                   0, INT_MAX, VE },
     { "cbr", "Use cbr encoding mode", OFFSET(cbr), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE },
     { "2pass", "Use 2pass encoding mode", OFFSET(twopass), AV_OPT_TYPE_BOOL, { .i64 = -1 }, -1, 1, VE },
     { "gpu", "Selects which NVENC capable GPU to use. First GPU is 0, second is 1, and so on.", OFFSET(gpu), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
-    { "delay", "Delays frame output by the given amount of frames.", OFFSET(buffer_delay), AV_OPT_TYPE_INT, { .i64 = INT_MAX }, 0, INT_MAX, VE },
+    { "delay",    "Delay frame output by the given amount of frames", OFFSET(async_depth), AV_OPT_TYPE_INT, { .i64 = INT_MAX }, 0, INT_MAX, VE },
     { NULL }
 };
 



More information about the ffmpeg-cvslog mailing list