[FFmpeg-devel] [PATCH 1/2] ffv1enc_vulkan: switch to out of place encoding

Lynne dev at lynne.ee
Wed Nov 20 09:27:01 EET 2024


The issue is that the trial buffer for encoding needs to be
really huge due to the possibility that encoding will require
more space than it would take for the raw image data.

Giving the framework the buffer directly is what we'd like,
since the first slice is already in place, but the issue is
that each step afterwards may reference that huge buffer and
so waste RAM and/or VRAM.

The best solution is to simply allocate a new buffer once
the final compressed size is known, and use the GPU to copy
the data directly.
---
 libavcodec/ffv1enc_vulkan.c  | 138 +++++++++++++++++++++++------------
 libavutil/vulkan_functions.h |   1 +
 2 files changed, 91 insertions(+), 48 deletions(-)

diff --git a/libavcodec/ffv1enc_vulkan.c b/libavcodec/ffv1enc_vulkan.c
index 93ec5cafb9..3c1db9fd14 100644
--- a/libavcodec/ffv1enc_vulkan.c
+++ b/libavcodec/ffv1enc_vulkan.c
@@ -43,6 +43,9 @@ typedef struct VulkanEncodeFFv1Context {
     FFVkQueueFamilyCtx qf;
     FFVkExecPool exec_pool;
 
+    FFVkQueueFamilyCtx transfer_qf;
+    FFVkExecPool transfer_exec_pool;
+
     FFVulkanShader setup;
     FFVulkanShader reset;
     FFVulkanShader rct;
@@ -59,6 +62,7 @@ typedef struct VulkanEncodeFFv1Context {
 
     /* Output data buffer */
     AVBufferPool *out_data_pool;
+    AVBufferPool *pkt_data_pool;
 
     /* Temporary data buffer */
     AVBufferPool *tmp_data_pool;
@@ -298,7 +302,10 @@ static int vulkan_encode_ffv1_frame(AVCodecContext *avctx, AVPacket *pkt,
     size_t maxsize;
     AVBufferRef *out_data_ref;
     FFVkBuffer *out_data_buf;
-    uint8_t *buf_p;
+
+    /* Packet data */
+    AVBufferRef *pkt_data_ref;
+    FFVkBuffer *pkt_data_buf;
 
     /* Results data */
     AVBufferRef *results_data_ref;
@@ -314,14 +321,13 @@ static int vulkan_encode_ffv1_frame(AVCodecContext *avctx, AVPacket *pkt,
     AVFrame *enc_in = (AVFrame *)pict;
     VkImageView *enc_in_views = in_views;
 
-    VkMappedMemoryRange invalidate_data[2];
-    int nb_invalidate_data = 0;
-
     VkImageMemoryBarrier2 img_bar[37];
     int nb_img_bar = 0;
     VkBufferMemoryBarrier2 buf_bar[8];
     int nb_buf_bar = 0;
 
+    VkBufferCopy buf_regions[1024];
+
     if (!pict)
         return 0;
 
@@ -417,19 +423,14 @@ static int vulkan_encode_ffv1_frame(AVCodecContext *avctx, AVPacket *pkt,
                                   VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
                                   VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
                                   NULL, maxsize,
-                                  VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
-                                  VK_MEMORY_PROPERTY_HOST_CACHED_BIT);
+                                  VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
     if (err < 0)
         return err;
-
     out_data_buf = (FFVkBuffer *)out_data_ref->data;
-    pkt->data = out_data_buf->mapped_mem;
-    pkt->size = out_data_buf->size;
-    pkt->buf = out_data_ref;
 
     /* Add dependencies */
     ff_vk_exec_add_dep_buf(&fv->s, exec, &tmp_data_ref, 1, 0);
-    ff_vk_exec_add_dep_buf(&fv->s, exec, &results_data_ref, 1, 0);
+    ff_vk_exec_add_dep_buf(&fv->s, exec, &results_data_ref, 1, 1);
     ff_vk_exec_add_dep_buf(&fv->s, exec, &slice_data_ref, 1, has_inter);
     ff_vk_exec_add_dep_buf(&fv->s, exec, &out_data_ref, 1, 1);
     RET(ff_vk_exec_add_dep_frame(&fv->s, exec, enc_in,
@@ -653,61 +654,89 @@ static int vulkan_encode_ffv1_frame(AVCodecContext *avctx, AVPacket *pkt,
     av_frame_free(&intermediate_frame);
 
     /* Invalidate slice/output data if needed */
-    if (!(results_data_buf->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT))
-        invalidate_data[nb_invalidate_data++] = (VkMappedMemoryRange) {
+    if (!(results_data_buf->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) {
+        VkMappedMemoryRange invalidate_data = {
             .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
             .memory = results_data_buf->mem,
             .offset = 0,
             .size = VK_WHOLE_SIZE,
         };
-    if (!(out_data_buf->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT))
-        invalidate_data[nb_invalidate_data++] = (VkMappedMemoryRange) {
-            .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
-            .memory = out_data_buf->mem,
-            .offset = 0,
-            .size = VK_WHOLE_SIZE,
-        };
-    if (nb_invalidate_data)
         vk->InvalidateMappedMemoryRanges(fv->s.hwctx->act_dev,
-                                         nb_invalidate_data, invalidate_data);
-
-    /* First slice is in-place */
-    buf_p = pkt->data;
-    sc = &((uint64_t *)results_data_buf->mapped_mem)[0];
-    av_log(avctx, AV_LOG_DEBUG, "Slice size = %"PRIu64" (max %i), src offset = %"PRIu64"\n",
-           sc[0], pkt->size / f->slice_count, sc[1]);
-    av_assert0(sc[0] < pd.slice_size_max);
-    av_assert0(sc[0] < (1 << 24));
-    buf_p += sc[0];
-
-    /* We have to copy the rest */
-    for (int i = 1; i < f->slice_count; i++) {
-        uint64_t bytes;
-        uint8_t *bs_start;
+                                         1, &invalidate_data);
+    }
 
+    /* Calculate final size */
+    pkt->size = 0;
+    for (int i = 0; i < f->slice_count; i++) {
         sc = &((uint64_t *)results_data_buf->mapped_mem)[i*2];
-        bytes = sc[0];
-        bs_start = pkt->data + sc[1];
-
         av_log(avctx, AV_LOG_DEBUG, "Slice %i size = %"PRIu64" (max %"PRIu64"), "
                                     "src offset = %"PRIu64"\n",
-               i, bytes, pd.slice_size_max, sc[1]);
-        av_assert0(bytes < pd.slice_size_max);
-        av_assert0(bytes < (1 << 24));
+               i, sc[0], pd.slice_size_max, sc[1]);
+
+        buf_regions[i] = (VkBufferCopy) {
+            .srcOffset = sc[1],
+            .dstOffset = pkt->size,
+            .size = sc[0],
+        };
+        pkt->size += sc[0];
+    }
+    av_log(avctx, AV_LOG_VERBOSE, "Total data = %i\n", pkt->size);
+    av_buffer_unref(&results_data_ref); /* No need for this buffer anymore */
 
-        memmove(buf_p, bs_start, bytes);
+    /* Allocate packet buffer */
+    err = ff_vk_get_pooled_buffer(&fv->s, &fv->pkt_data_pool,
+                                  &pkt_data_ref,
+                                  VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
+                                  VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
+                                  NULL, pkt->size,
+                                  VK_MEMORY_PROPERTY_HOST_CACHED_BIT |
+                                  VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
+    if (err < 0)
+        return err;
 
-        buf_p += bytes;
+    pkt_data_buf = (FFVkBuffer *)pkt_data_ref->data;
+    pkt->data = pkt_data_buf->mapped_mem;
+    pkt->buf = pkt_data_ref;
+
+    /* Transfer the slices */
+    exec = ff_vk_exec_get(&fv->s, &fv->transfer_exec_pool);
+    ff_vk_exec_start(&fv->s, exec);
+
+    ff_vk_exec_add_dep_buf(&fv->s, exec, &out_data_ref, 1, 0);
+    ff_vk_exec_add_dep_buf(&fv->s, exec, &pkt_data_ref, 1, 1);
+
+    for (int i = 0; i < f->slice_count; i++) {
+        sc = &((uint64_t *)results_data_buf->mapped_mem)[i*2];
+    }
+
+    vk->CmdCopyBuffer(exec->buf,
+                      out_data_buf->buf, pkt_data_buf->buf,
+                      f->slice_count, buf_regions);
+
+    /* Submit */
+    err = ff_vk_exec_submit(&fv->s, exec);
+    if (err < 0)
+        return err;
+
+    /* We need the encoded data immediately */
+    ff_vk_exec_wait(&fv->s, exec);
+
+    /* Invalidate slice/output data if needed */
+    if (!(out_data_buf->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) {
+        VkMappedMemoryRange invalidate_data = {
+            .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
+            .memory = pkt_data_buf->mem,
+            .offset = 0,
+            .size = VK_WHOLE_SIZE,
+        };
+        vk->InvalidateMappedMemoryRanges(fv->s.hwctx->act_dev,
+                                         1, &invalidate_data);
     }
 
     f->picture_number++;
-    pkt->size = buf_p - pkt->data;
     pkt->flags |= AV_PKT_FLAG_KEY * f->key_frame;
     *got_packet = 1;
 
-    av_log(avctx, AV_LOG_VERBOSE, "Total data = %i\n",
-           pkt->size);
-
 fail:
     /* Frames added as a dep are always referenced, so we only need to
      * clean this up. */
@@ -1433,6 +1462,18 @@ static av_cold int vulkan_encode_ffv1_init(AVCodecContext *avctx)
     if (err < 0)
         return err;
 
+    err = ff_vk_qf_init(&fv->s, &fv->transfer_qf, VK_QUEUE_TRANSFER_BIT);
+    if (err < 0) {
+        av_log(avctx, AV_LOG_ERROR, "Device has no transfer queues!\n");
+        return err;
+    }
+
+    err = ff_vk_exec_pool_init(&fv->s, &fv->transfer_qf, &fv->transfer_exec_pool,
+                               fv->transfer_qf.nb_queues,
+                               0, 0, 0, NULL);
+    if (err < 0)
+        return err;
+
     spv = ff_vk_spirv_init();
     if (!spv) {
         av_log(avctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n");
@@ -1515,6 +1556,7 @@ static av_cold int vulkan_encode_ffv1_close(AVCodecContext *avctx)
     av_buffer_pool_uninit(&fv->results_data_pool);
 
     av_buffer_pool_uninit(&fv->out_data_pool);
+    av_buffer_pool_uninit(&fv->pkt_data_pool);
     av_buffer_pool_uninit(&fv->tmp_data_pool);
 
     av_buffer_unref(&fv->keyframe_slice_data_ref);
diff --git a/libavutil/vulkan_functions.h b/libavutil/vulkan_functions.h
index b1ae4d181e..eb6f6b01c3 100644
--- a/libavutil/vulkan_functions.h
+++ b/libavutil/vulkan_functions.h
@@ -146,6 +146,7 @@ typedef uint64_t FFVulkanExtensions;
     MACRO(1, 1, FF_VK_EXT_NO_FLAG,              CmdPipelineBarrier)                      \
     MACRO(1, 1, FF_VK_EXT_NO_FLAG,              CmdCopyBufferToImage)                    \
     MACRO(1, 1, FF_VK_EXT_NO_FLAG,              CmdCopyImageToBuffer)                    \
+    MACRO(1, 1, FF_VK_EXT_NO_FLAG,              CmdCopyBuffer)                                         \
                                                                                          \
     /* Buffer */                                                                         \
     MACRO(1, 1, FF_VK_EXT_NO_FLAG,              GetBufferMemoryRequirements2)            \
-- 
2.45.2.753.g447d99e1c3b


More information about the ffmpeg-devel mailing list