[FFmpeg-devel] [PATCH v2 05/12] vulkan_decode: support multiple image views

Mon Feb 24 10:04:18 EET 2025

Enables non-monochrome video decoding using all our existing functions
in the context of an SDR decoder.
---
 libavcodec/vulkan_av1.c    |  4 +-
 libavcodec/vulkan_decode.c | 90 ++++++++++++++++++++------------------
 libavcodec/vulkan_decode.h | 12 ++---
 libavcodec/vulkan_h264.c   |  4 +-
 libavcodec/vulkan_hevc.c   |  4 +-
 5 files changed, 60 insertions(+), 54 deletions(-)

diff --git a/libavcodec/vulkan_av1.c b/libavcodec/vulkan_av1.c
index 6659f9d812..7dd7b204d7 100644
--- a/libavcodec/vulkan_av1.c
+++ b/libavcodec/vulkan_av1.c
@@ -123,7 +123,7 @@ static int vk_av1_fill_pict(AVCodecContext *avctx, const AV1Frame **ref_src,
         .codedExtent = (VkExtent2D){ pic->f->width, pic->f->height },
         .baseArrayLayer = ((has_grain || dec->dedicated_dpb) && ctx->common.layered_dpb) ?
                           hp->frame_id : 0,
-        .imageViewBinding = vkpic->img_view_ref,
+        .imageViewBinding = vkpic->view.ref[0],
     };
 
     *ref_slot = (VkVideoReferenceSlotInfoKHR) {
@@ -346,7 +346,7 @@ static int vk_av1_start_frame(AVCodecContext          *avctx,
             .codedOffset = (VkOffset2D){ 0, 0 },
             .codedExtent = (VkExtent2D){ pic->f->width, pic->f->height },
             .baseArrayLayer = 0,
-            .imageViewBinding = vp->img_view_out,
+            .imageViewBinding = vp->view.out[0],
         },
     };
 
diff --git a/libavcodec/vulkan_decode.c b/libavcodec/vulkan_decode.c
index 084563e8e9..9eaafa6495 100644
--- a/libavcodec/vulkan_decode.c
+++ b/libavcodec/vulkan_decode.c
@@ -130,9 +130,11 @@ static void init_frame(FFVulkanDecodeContext *dec, FFVulkanDecodePicture *vkpic)
     FFVulkanFunctions *vk = &ctx->s.vkfn;
 
     vkpic->dpb_frame     = NULL;
-    vkpic->img_view_ref  = VK_NULL_HANDLE;
-    vkpic->img_view_out  = VK_NULL_HANDLE;
-    vkpic->img_view_dest = VK_NULL_HANDLE;
+    for (int i = 0; i < AV_NUM_DATA_POINTERS; i++) {
+        vkpic->view.ref[i]  = VK_NULL_HANDLE;
+        vkpic->view.out[i]  = VK_NULL_HANDLE;
+        vkpic->view.dst[i]  = VK_NULL_HANDLE;
+    }
 
     vkpic->destroy_image_view = vk->DestroyImageView;
     vkpic->wait_semaphores = vk->WaitSemaphores;
@@ -149,14 +151,14 @@ int ff_vk_decode_prepare_frame(FFVulkanDecodeContext *dec, AVFrame *pic,
 
     /* If the decoder made a blank frame to make up for a missing ref, or the
      * frame is the current frame so it's missing one, create a re-representation */
-    if (vkpic->img_view_ref)
+    if (vkpic->view.ref[0])
         return 0;
 
     init_frame(dec, vkpic);
 
     if (ctx->common.layered_dpb && alloc_dpb) {
-        vkpic->img_view_ref = ctx->common.layered_view;
-        vkpic->img_aspect_ref = ctx->common.layered_aspect;
+        vkpic->view.ref[0] = ctx->common.layered_view;
+        vkpic->view.aspect_ref[0] = ctx->common.layered_aspect;
     } else if (alloc_dpb) {
         AVHWFramesContext *dpb_frames = (AVHWFramesContext *)ctx->common.dpb_hwfc_ref->data;
         AVVulkanFramesContext *dpb_hwfc = dpb_frames->hwctx;
@@ -166,13 +168,13 @@ int ff_vk_decode_prepare_frame(FFVulkanDecodeContext *dec, AVFrame *pic,
             return AVERROR(ENOMEM);
 
         err = ff_vk_create_view(&ctx->s, &ctx->common,
-                                &vkpic->img_view_ref, &vkpic->img_aspect_ref,
+                                &vkpic->view.ref[0], &vkpic->view.aspect_ref[0],
                                 (AVVkFrame *)vkpic->dpb_frame->data[0],
                                 dpb_hwfc->format[0], !is_current);
         if (err < 0)
             return err;
 
-        vkpic->img_view_dest = vkpic->img_view_ref;
+        vkpic->view.dst[0] = vkpic->view.ref[0];
     }
 
     if (!alloc_dpb || is_current) {
@@ -180,15 +182,15 @@ int ff_vk_decode_prepare_frame(FFVulkanDecodeContext *dec, AVFrame *pic,
         AVVulkanFramesContext *hwfc = frames->hwctx;
 
         err = ff_vk_create_view(&ctx->s, &ctx->common,
-                                &vkpic->img_view_out, &vkpic->img_aspect,
+                                &vkpic->view.out[0], &vkpic->view.aspect[0],
                                 (AVVkFrame *)pic->data[0],
                                 hwfc->format[0], !is_current);
         if (err < 0)
             return err;
 
         if (!alloc_dpb) {
-            vkpic->img_view_ref = vkpic->img_view_out;
-            vkpic->img_aspect_ref = vkpic->img_aspect;
+            vkpic->view.ref[0] = vkpic->view.out[0];
+            vkpic->view.aspect_ref[0] = vkpic->view.aspect[0];
         }
     }
 
@@ -201,41 +203,41 @@ int ff_vk_decode_prepare_frame_sdr(FFVulkanDecodeContext *dec, AVFrame *pic,
 {
     int err;
     FFVulkanDecodeShared *ctx = dec->shared_ctx;
+    AVHWFramesContext *frames = (AVHWFramesContext *)pic->hw_frames_ctx->data;
 
     vkpic->slices_size = 0;
 
-    if (vkpic->img_view_ref)
+    if (vkpic->view.ref[0])
         return 0;
 
     init_frame(dec, vkpic);
 
-    if (ctx->common.layered_dpb && alloc_dpb) {
-        vkpic->img_view_ref = ctx->common.layered_view;
-        vkpic->img_aspect_ref = ctx->common.layered_aspect;
-    } else if (alloc_dpb) {
-        vkpic->dpb_frame = vk_get_dpb_pool(ctx);
-        if (!vkpic->dpb_frame)
-            return AVERROR(ENOMEM);
+    for (int i = 0; i < av_pix_fmt_count_planes(frames->sw_format); i++) {
+        if (alloc_dpb) {
+            vkpic->dpb_frame = vk_get_dpb_pool(ctx);
+            if (!vkpic->dpb_frame)
+                return AVERROR(ENOMEM);
 
-        err = ff_vk_create_imageview(&ctx->s,
-                                     &vkpic->img_view_ref, &vkpic->img_aspect_ref,
-                                     vkpic->dpb_frame, 0, rep_fmt);
-        if (err < 0)
-            return err;
+            err = ff_vk_create_imageview(&ctx->s,
+                                         &vkpic->view.ref[i], &vkpic->view.aspect_ref[i],
+                                         vkpic->dpb_frame, i, rep_fmt);
+            if (err < 0)
+                return err;
 
-        vkpic->img_view_dest = vkpic->img_view_ref;
-    }
+            vkpic->view.dst[i] = vkpic->view.ref[i];
+        }
 
-    if (!alloc_dpb || is_current) {
-        err = ff_vk_create_imageview(&ctx->s,
-                                     &vkpic->img_view_out, &vkpic->img_aspect,
-                                     pic, 0, rep_fmt);
-        if (err < 0)
-            return err;
+        if (!alloc_dpb || is_current) {
+            err = ff_vk_create_imageview(&ctx->s,
+                                         &vkpic->view.out[i], &vkpic->view.aspect[i],
+                                         pic, i, rep_fmt);
+            if (err < 0)
+                return err;
 
-        if (!alloc_dpb) {
-            vkpic->img_view_ref = vkpic->img_view_out;
-            vkpic->img_aspect_ref = vkpic->img_aspect;
+            if (!alloc_dpb) {
+                vkpic->view.ref[i] = vkpic->view.out[i];
+                vkpic->view.aspect_ref[i] = vkpic->view.aspect[i];
+            }
         }
     }
 
@@ -467,7 +469,7 @@ int ff_vk_decode_frame(AVCodecContext *avctx,
         .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
         .image = vkf->img[0],
         .subresourceRange = (VkImageSubresourceRange) {
-            .aspectMask = vp->img_aspect,
+            .aspectMask = vp->view.aspect[0],
             .layerCount = 1,
             .levelCount = 1,
         },
@@ -523,7 +525,7 @@ int ff_vk_decode_frame(AVCodecContext *avctx,
                     .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
                     .image = rvkf->img[0],
                     .subresourceRange = (VkImageSubresourceRange) {
-                        .aspectMask = rvp->img_aspect_ref,
+                        .aspectMask = rvp->view.aspect_ref[0],
                         .layerCount = 1,
                         .levelCount = 1,
                     },
@@ -533,7 +535,7 @@ int ff_vk_decode_frame(AVCodecContext *avctx,
             }
         }
     } else if (vp->decode_info.referenceSlotCount ||
-               vp->img_view_out != vp->img_view_ref) {
+               vp->view.out[0] != vp->view.ref[0]) {
         /* Single barrier for a single layered ref */
         err = ff_vk_exec_add_dep_frame(&ctx->s, exec, ctx->common.layered_frame,
                                        VK_PIPELINE_STAGE_2_VIDEO_DECODE_BIT_KHR,
@@ -580,12 +582,14 @@ void ff_vk_decode_free_frame(AVHWDeviceContext *dev_ctx, FFVulkanDecodePicture *
     av_buffer_unref(&vp->slices_buf);
 
     /* Destroy image view (out) */
-    if (vp->img_view_out && vp->img_view_out != vp->img_view_dest)
-        vp->destroy_image_view(hwctx->act_dev, vp->img_view_out, hwctx->alloc);
+    for (int i = 0; i < AV_NUM_DATA_POINTERS; i++) {
+        if (vp->view.out[i] && vp->view.out[i] != vp->view.dst[i])
+            vp->destroy_image_view(hwctx->act_dev, vp->view.out[i], hwctx->alloc);
 
-    /* Destroy image view (ref, unlayered) */
-    if (vp->img_view_dest)
-        vp->destroy_image_view(hwctx->act_dev, vp->img_view_dest, hwctx->alloc);
+        /* Destroy image view (ref, unlayered) */
+        if (vp->view.dst[i])
+            vp->destroy_image_view(hwctx->act_dev, vp->view.dst[i], hwctx->alloc);
+    }
 
     av_frame_free(&vp->dpb_frame);
 }
diff --git a/libavcodec/vulkan_decode.h b/libavcodec/vulkan_decode.h
index 9a11a80f95..4051732e49 100644
--- a/libavcodec/vulkan_decode.h
+++ b/libavcodec/vulkan_decode.h
@@ -84,11 +84,13 @@ typedef struct FFVulkanDecodeContext {
 typedef struct FFVulkanDecodePicture {
     AVFrame                        *dpb_frame;      /* Only used for out-of-place decoding. */
 
-    VkImageView                     img_view_ref;   /* Image representation view (reference) */
-    VkImageView                     img_view_out;   /* Image representation view (output-only) */
-    VkImageView                     img_view_dest;  /* Set to img_view_out if no layered refs are used */
-    VkImageAspectFlags              img_aspect;     /* Image plane mask bits */
-    VkImageAspectFlags              img_aspect_ref; /* Only used for out-of-place decoding */
+    struct {
+        VkImageView                     ref[AV_NUM_DATA_POINTERS];        /* Image representation view (reference) */
+        VkImageView                     out[AV_NUM_DATA_POINTERS];        /* Image representation view (output-only) */
+        VkImageView                     dst[AV_NUM_DATA_POINTERS];        /* Set to img_view_out if no layered refs are used */
+        VkImageAspectFlags              aspect[AV_NUM_DATA_POINTERS];     /* Image plane mask bits */
+        VkImageAspectFlags              aspect_ref[AV_NUM_DATA_POINTERS]; /* Only used for out-of-place decoding */
+    } view;
 
     VkSemaphore                     sem;
     uint64_t                        sem_value;
diff --git a/libavcodec/vulkan_h264.c b/libavcodec/vulkan_h264.c
index 1df8f0a208..71cf2c3ad7 100644
--- a/libavcodec/vulkan_h264.c
+++ b/libavcodec/vulkan_h264.c
@@ -98,7 +98,7 @@ static int vk_h264_fill_pict(AVCodecContext *avctx, H264Picture **ref_src,
         .codedOffset = (VkOffset2D){ 0, 0 },
         .codedExtent = (VkExtent2D){ pic->f->width, pic->f->height },
         .baseArrayLayer = ctx->common.layered_dpb ? dpb_slot_index : 0,
-        .imageViewBinding = vkpic->img_view_ref,
+        .imageViewBinding = vkpic->view.ref[0],
     };
 
     *ref_slot = (VkVideoReferenceSlotInfoKHR) {
@@ -471,7 +471,7 @@ static int vk_h264_start_frame(AVCodecContext          *avctx,
             .codedOffset = (VkOffset2D){ 0, 0 },
             .codedExtent = (VkExtent2D){ pic->f->width, pic->f->height },
             .baseArrayLayer = 0,
-            .imageViewBinding = vp->img_view_out,
+            .imageViewBinding = vp->view.out[0],
         },
     };
 
diff --git a/libavcodec/vulkan_hevc.c b/libavcodec/vulkan_hevc.c
index 589c3de83d..a5bcd88e2d 100644
--- a/libavcodec/vulkan_hevc.c
+++ b/libavcodec/vulkan_hevc.c
@@ -164,7 +164,7 @@ static int vk_hevc_fill_pict(AVCodecContext *avctx, HEVCFrame **ref_src,
         .codedOffset = (VkOffset2D){ 0, 0 },
         .codedExtent = (VkExtent2D){ pic->f->width, pic->f->height },
         .baseArrayLayer = ctx->common.layered_dpb ? pic_id : 0,
-        .imageViewBinding = vkpic->img_view_ref,
+        .imageViewBinding = vkpic->view.ref[0],
     };
 
     *ref_slot = (VkVideoReferenceSlotInfoKHR) {
@@ -823,7 +823,7 @@ static int vk_hevc_start_frame(AVCodecContext          *avctx,
             .codedOffset = (VkOffset2D){ 0, 0 },
             .codedExtent = (VkExtent2D){ pic->f->width, pic->f->height },
             .baseArrayLayer = 0,
-            .imageViewBinding = vp->img_view_out,
+            .imageViewBinding = vp->view.out[0],
         },
     };
 
-- 
2.47.2