[FFmpeg-devel] [PATCH 3/3] libavcodec/vaapi_encode: Add async_depth to vaapi_encoder to increase performance

Wenbin Chen wenbin.chen at intel.com
Wed Oct 27 11:57:05 EEST 2021


Add async_depth to increase encoder's performance. Reuse encode_fifo as
async buffer. Encoder puts all reordered frame to HW and then check
fifo size. If fifo < async_depth and the top frame is not ready, it will
return AVERROR(EAGAIN) to require more frames.

1080p transcoding (no B frames) with -async_depth=4 can increase 20%
performance on my environment.
The async increases performance but also introduces frame delay.

Signed-off-by: Wenbin Chen <wenbin.chen at intel.com>
---
 libavcodec/vaapi_encode.c | 20 +++++++++++++++-----
 libavcodec/vaapi_encode.h | 12 ++++++++++--
 2 files changed, 25 insertions(+), 7 deletions(-)

diff --git a/libavcodec/vaapi_encode.c b/libavcodec/vaapi_encode.c
index db0ae136a1..616fb7c089 100644
--- a/libavcodec/vaapi_encode.c
+++ b/libavcodec/vaapi_encode.c
@@ -1158,7 +1158,8 @@ static int vaapi_encode_send_frame(AVCodecContext *avctx, AVFrame *frame)
         if (ctx->input_order == ctx->decode_delay)
             ctx->dts_pts_diff = pic->pts - ctx->first_pts;
         if (ctx->output_delay > 0)
-            ctx->ts_ring[ctx->input_order % (3 * ctx->output_delay)] = pic->pts;
+            ctx->ts_ring[ctx->input_order %
+                        (3 * ctx->output_delay + ctx->async_depth)] = pic->pts;
 
         pic->display_order = ctx->input_order;
         ++ctx->input_order;
@@ -1212,7 +1213,8 @@ int ff_vaapi_encode_receive_packet(AVCodecContext *avctx, AVPacket *pkt)
             return AVERROR(EAGAIN);
     }
 
-    while (av_fifo_size(ctx->encode_fifo) <= MAX_PICTURE_REFERENCES * sizeof(VAAPIEncodePicture *)) {
+    while (av_fifo_size(ctx->encode_fifo) <
+            MAX_ASYNC_DEPTH * sizeof(VAAPIEncodePicture *)) {
         pic = NULL;
         err = vaapi_encode_pick_next(avctx, &pic);
         if (err < 0)
@@ -1234,6 +1236,14 @@ int ff_vaapi_encode_receive_packet(AVCodecContext *avctx, AVPacket *pkt)
     if (!av_fifo_size(ctx->encode_fifo))
         return err;
 
+    if (av_fifo_size(ctx->encode_fifo) < ctx->async_depth * sizeof(VAAPIEncodePicture *) &&
+        !ctx->end_of_stream) {
+        av_fifo_generic_peek(ctx->encode_fifo, &pic, sizeof(pic), NULL);
+        err = vaapi_encode_wait(avctx, pic, 0);
+        if (err < 0)
+            return err;
+    }
+
     av_fifo_generic_read(ctx->encode_fifo, &pic, sizeof(pic), NULL);
     ctx->encode_order = pic->encode_order + 1;
 
@@ -1252,7 +1262,7 @@ int ff_vaapi_encode_receive_packet(AVCodecContext *avctx, AVPacket *pkt)
             pkt->dts = ctx->ts_ring[pic->encode_order] - ctx->dts_pts_diff;
     } else {
         pkt->dts = ctx->ts_ring[(pic->encode_order - ctx->decode_delay) %
-                                (3 * ctx->output_delay)];
+                                (3 * ctx->output_delay + ctx->async_depth)];
     }
     av_log(avctx, AV_LOG_DEBUG, "Output packet: pts %"PRId64" dts %"PRId64".\n",
            pkt->pts, pkt->dts);
@@ -2566,8 +2576,8 @@ av_cold int ff_vaapi_encode_init(AVCodecContext *avctx)
         }
     }
 
-    ctx->encode_fifo = av_fifo_alloc((MAX_PICTURE_REFERENCES + 1) *
-                                      sizeof(VAAPIEncodePicture *));
+    ctx->encode_fifo = av_fifo_alloc(MAX_ASYNC_DEPTH *
+                                     sizeof(VAAPIEncodePicture *));
     if (!ctx->encode_fifo)
         return AVERROR(ENOMEM);
 
diff --git a/libavcodec/vaapi_encode.h b/libavcodec/vaapi_encode.h
index 89fe8de466..1bf5d7c337 100644
--- a/libavcodec/vaapi_encode.h
+++ b/libavcodec/vaapi_encode.h
@@ -48,6 +48,7 @@ enum {
     MAX_TILE_ROWS          = 22,
     // A.4.1: table A.6 allows at most 20 tile columns for any level.
     MAX_TILE_COLS          = 20,
+    MAX_ASYNC_DEPTH        = 64,
 };
 
 extern const AVCodecHWConfigInternal *const ff_vaapi_encode_hw_configs[];
@@ -298,7 +299,8 @@ typedef struct VAAPIEncodeContext {
     // Timestamp handling.
     int64_t         first_pts;
     int64_t         dts_pts_diff;
-    int64_t         ts_ring[MAX_REORDER_DELAY * 3];
+    int64_t         ts_ring[MAX_REORDER_DELAY * 3 +
+                            MAX_ASYNC_DEPTH];
 
     // Slice structure.
     int slice_block_rows;
@@ -348,6 +350,8 @@ typedef struct VAAPIEncodeContext {
     AVFrame         *frame;
 
     AVFifoBuffer *encode_fifo;
+
+    int async_depth;
 } VAAPIEncodeContext;
 
 enum {
@@ -458,7 +462,11 @@ int ff_vaapi_encode_close(AVCodecContext *avctx);
     { "b_depth", \
       "Maximum B-frame reference depth", \
       OFFSET(common.desired_b_depth), AV_OPT_TYPE_INT, \
-      { .i64 = 1 }, 1, INT_MAX, FLAGS }
+      { .i64 = 1 }, 1, INT_MAX, FLAGS }, \
+    { "async_depth", "Maximum processing parallelism. " \
+      "Increase this to improve single channel performance", \
+      OFFSET(common.async_depth), AV_OPT_TYPE_INT, \
+      { .i64 = 4 }, 0, MAX_ASYNC_DEPTH, FLAGS }
 
 #define VAAPI_ENCODE_RC_MODE(name, desc) \
     { #name, desc, 0, AV_OPT_TYPE_CONST, { .i64 = RC_MODE_ ## name }, \
-- 
2.25.1



More information about the ffmpeg-devel mailing list