[FFmpeg-cvslog] hwcontext_vulkan: rewrite queue picking system for the new API
Lynne
git at videolan.org
Sun Aug 11 06:20:47 EEST 2024
ffmpeg | branch: master | Lynne <dev at lynne.ee> | Tue Jul 16 17:14:42 2024 +0200| [8790a30882d9e48c20b61b3a7ac1ff3225bf649c] | committer: Lynne
hwcontext_vulkan: rewrite queue picking system for the new API
This allows us to support different video ops on different queues,
as well as any other arbitrary queues we need.
> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=8790a30882d9e48c20b61b3a7ac1ff3225bf649c
---
libavutil/hwcontext_vulkan.c | 262 +++++++++++++++++++++++++++----------------
1 file changed, 167 insertions(+), 95 deletions(-)
diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
index 33d856ddd3..5baf68660a 100644
--- a/libavutil/hwcontext_vulkan.c
+++ b/libavutil/hwcontext_vulkan.c
@@ -1028,16 +1028,51 @@ end:
}
/* Picks the least used qf with the fewest unneeded flags, or -1 if none found */
-static inline int pick_queue_family(VkQueueFamilyProperties *qf, uint32_t num_qf,
+static inline int pick_queue_family(VkQueueFamilyProperties2 *qf, uint32_t num_qf,
VkQueueFlagBits flags)
{
int index = -1;
uint32_t min_score = UINT32_MAX;
for (int i = 0; i < num_qf; i++) {
- const VkQueueFlagBits qflags = qf[i].queueFlags;
+ VkQueueFlagBits qflags = qf[i].queueFamilyProperties.queueFlags;
+
+ /* Per the spec, reporting transfer caps is optional for these 2 types */
+ if ((flags & VK_QUEUE_TRANSFER_BIT) &&
+ (qflags & (VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT)))
+ qflags |= VK_QUEUE_TRANSFER_BIT;
+
if (qflags & flags) {
- uint32_t score = av_popcount(qflags) + qf[i].timestampValidBits;
+ uint32_t score = av_popcount(qflags) + qf[i].queueFamilyProperties.timestampValidBits;
+ if (score < min_score) {
+ index = i;
+ min_score = score;
+ }
+ }
+ }
+
+ if (index > -1)
+ qf[index].queueFamilyProperties.timestampValidBits++;
+
+ return index;
+}
+
+static inline int pick_video_queue_family(VkQueueFamilyProperties2 *qf,
+ VkQueueFamilyVideoPropertiesKHR *qf_vid, uint32_t num_qf,
+ VkVideoCodecOperationFlagBitsKHR flags)
+{
+ int index = -1;
+ uint32_t min_score = UINT32_MAX;
+
+ for (int i = 0; i < num_qf; i++) {
+ const VkQueueFlagBits qflags = qf[i].queueFamilyProperties.queueFlags;
+ const VkQueueFlagBits vflags = qf_vid[i].videoCodecOperations;
+
+ if (!(qflags & (VK_QUEUE_VIDEO_ENCODE_BIT_KHR | VK_QUEUE_VIDEO_DECODE_BIT_KHR)))
+ continue;
+
+ if (vflags & flags) {
+ uint32_t score = av_popcount(vflags) + qf[i].queueFamilyProperties.timestampValidBits;
if (score < min_score) {
index = i;
min_score = score;
@@ -1046,7 +1081,7 @@ static inline int pick_queue_family(VkQueueFamilyProperties *qf, uint32_t num_qf
}
if (index > -1)
- qf[index].timestampValidBits++;
+ qf[index].queueFamilyProperties.timestampValidBits++;
return index;
}
@@ -1054,12 +1089,12 @@ static inline int pick_queue_family(VkQueueFamilyProperties *qf, uint32_t num_qf
static int setup_queue_families(AVHWDeviceContext *ctx, VkDeviceCreateInfo *cd)
{
uint32_t num;
- float *weights;
- VkQueueFamilyProperties *qf = NULL;
VulkanDevicePriv *p = ctx->hwctx;
AVVulkanDeviceContext *hwctx = &p->p;
FFVulkanFunctions *vk = &p->vkctx.vkfn;
- int graph_index, comp_index, tx_index, enc_index, dec_index;
+
+ VkQueueFamilyProperties2 *qf = NULL;
+ VkQueueFamilyVideoPropertiesKHR *qf_vid = NULL;
/* First get the number of queue families */
vk->GetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &num, NULL);
@@ -1069,118 +1104,155 @@ static int setup_queue_families(AVHWDeviceContext *ctx, VkDeviceCreateInfo *cd)
}
/* Then allocate memory */
- qf = av_malloc_array(num, sizeof(VkQueueFamilyProperties));
+ qf = av_malloc_array(num, sizeof(VkQueueFamilyProperties2));
if (!qf)
return AVERROR(ENOMEM);
+ qf_vid = av_malloc_array(num, sizeof(VkQueueFamilyVideoPropertiesKHR));
+ if (!qf_vid)
+ return AVERROR(ENOMEM);
+
+ for (uint32_t i = 0; i < num; i++) {
+ qf_vid[i] = (VkQueueFamilyVideoPropertiesKHR) {
+ .sType = VK_STRUCTURE_TYPE_QUEUE_FAMILY_VIDEO_PROPERTIES_KHR,
+ };
+ qf[i] = (VkQueueFamilyProperties2) {
+ .sType = VK_STRUCTURE_TYPE_QUEUE_FAMILY_PROPERTIES_2,
+ .pNext = &qf_vid[i],
+ };
+ }
+
/* Finally retrieve the queue families */
- vk->GetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &num, qf);
+ vk->GetPhysicalDeviceQueueFamilyProperties2(hwctx->phys_dev, &num, qf);
av_log(ctx, AV_LOG_VERBOSE, "Queue families:\n");
for (int i = 0; i < num; i++) {
av_log(ctx, AV_LOG_VERBOSE, " %i:%s%s%s%s%s%s%s (queues: %i)\n", i,
- ((qf[i].queueFlags) & VK_QUEUE_GRAPHICS_BIT) ? " graphics" : "",
- ((qf[i].queueFlags) & VK_QUEUE_COMPUTE_BIT) ? " compute" : "",
- ((qf[i].queueFlags) & VK_QUEUE_TRANSFER_BIT) ? " transfer" : "",
- ((qf[i].queueFlags) & VK_QUEUE_VIDEO_ENCODE_BIT_KHR) ? " encode" : "",
- ((qf[i].queueFlags) & VK_QUEUE_VIDEO_DECODE_BIT_KHR) ? " decode" : "",
- ((qf[i].queueFlags) & VK_QUEUE_SPARSE_BINDING_BIT) ? " sparse" : "",
- ((qf[i].queueFlags) & VK_QUEUE_PROTECTED_BIT) ? " protected" : "",
- qf[i].queueCount);
+ ((qf[i].queueFamilyProperties.queueFlags) & VK_QUEUE_GRAPHICS_BIT) ? " graphics" : "",
+ ((qf[i].queueFamilyProperties.queueFlags) & VK_QUEUE_COMPUTE_BIT) ? " compute" : "",
+ ((qf[i].queueFamilyProperties.queueFlags) & VK_QUEUE_TRANSFER_BIT) ? " transfer" : "",
+ ((qf[i].queueFamilyProperties.queueFlags) & VK_QUEUE_VIDEO_ENCODE_BIT_KHR) ? " encode" : "",
+ ((qf[i].queueFamilyProperties.queueFlags) & VK_QUEUE_VIDEO_DECODE_BIT_KHR) ? " decode" : "",
+ ((qf[i].queueFamilyProperties.queueFlags) & VK_QUEUE_SPARSE_BINDING_BIT) ? " sparse" : "",
+ ((qf[i].queueFamilyProperties.queueFlags) & VK_QUEUE_PROTECTED_BIT) ? " protected" : "",
+ qf[i].queueFamilyProperties.queueCount);
/* We use this field to keep a score of how many times we've used that
* queue family in order to make better choices. */
- qf[i].timestampValidBits = 0;
+ qf[i].queueFamilyProperties.timestampValidBits = 0;
}
+ hwctx->nb_qf = 0;
+
/* Pick each queue family to use */
- graph_index = pick_queue_family(qf, num, VK_QUEUE_GRAPHICS_BIT);
- comp_index = pick_queue_family(qf, num, VK_QUEUE_COMPUTE_BIT);
- tx_index = pick_queue_family(qf, num, VK_QUEUE_TRANSFER_BIT);
- enc_index = pick_queue_family(qf, num, VK_QUEUE_VIDEO_ENCODE_BIT_KHR);
- dec_index = pick_queue_family(qf, num, VK_QUEUE_VIDEO_DECODE_BIT_KHR);
+#define PICK_QF(type, vid_op) \
+ do { \
+ uint32_t i; \
+ uint32_t idx; \
+ \
+ if (vid_op) \
+ idx = pick_video_queue_family(qf, qf_vid, num, vid_op); \
+ else \
+ idx = pick_queue_family(qf, num, type); \
+ \
+ if (idx == -1) \
+ continue; \
+ \
+ for (i = 0; i < hwctx->nb_qf; i++) { \
+ if (hwctx->qf[i].idx == idx) { \
+ hwctx->qf[i].flags |= type; \
+ hwctx->qf[i].video_caps |= vid_op; \
+ break; \
+ } \
+ } \
+ if (i == hwctx->nb_qf) { \
+ hwctx->qf[i].idx = idx; \
+ hwctx->qf[i].num = qf[idx].queueFamilyProperties.queueCount; \
+ hwctx->qf[i].flags = type; \
+ hwctx->qf[i].video_caps = vid_op; \
+ hwctx->nb_qf++; \
+ } \
+ } while (0)
+
+ PICK_QF(VK_QUEUE_GRAPHICS_BIT, VK_VIDEO_CODEC_OPERATION_NONE_KHR);
+ PICK_QF(VK_QUEUE_COMPUTE_BIT, VK_VIDEO_CODEC_OPERATION_NONE_KHR);
+ PICK_QF(VK_QUEUE_TRANSFER_BIT, VK_VIDEO_CODEC_OPERATION_NONE_KHR);
+
+ PICK_QF(VK_QUEUE_VIDEO_ENCODE_BIT_KHR, VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR);
+ PICK_QF(VK_QUEUE_VIDEO_DECODE_BIT_KHR, VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR);
+
+ PICK_QF(VK_QUEUE_VIDEO_ENCODE_BIT_KHR, VK_VIDEO_CODEC_OPERATION_ENCODE_H265_BIT_KHR);
+ PICK_QF(VK_QUEUE_VIDEO_DECODE_BIT_KHR, VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR);
+
+ PICK_QF(VK_QUEUE_VIDEO_DECODE_BIT_KHR, VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR);
+
+ av_free(qf);
+ av_free(qf_vid);
+
+#undef PICK_QF
+
+ cd->pQueueCreateInfos = av_malloc_array(hwctx->nb_qf,
+ sizeof(VkDeviceQueueCreateInfo));
+ if (!cd->pQueueCreateInfos)
+ return AVERROR(ENOMEM);
+
+ for (uint32_t i = 0; i < hwctx->nb_qf; i++) {
+ int dup = 0;
+ float *weights = NULL;
+ VkDeviceQueueCreateInfo *pc;
+ for (uint32_t j = 0; j < cd->queueCreateInfoCount; j++) {
+ if (hwctx->qf[i].idx == cd->pQueueCreateInfos[j].queueFamilyIndex) {
+ dup = 1;
+ break;
+ }
+ }
+ if (dup)
+ continue;
+
+ weights = av_malloc_array(hwctx->qf[i].num, sizeof(float));
+ if (!weights) {
+ for (uint32_t j = 0; j < cd->queueCreateInfoCount; j++)
+ av_free((void *)cd->pQueueCreateInfos[i].pQueuePriorities);
+ av_free((void *)cd->pQueueCreateInfos);
+ return AVERROR(ENOMEM);
+ }
+
+ for (uint32_t j = 0; j < hwctx->qf[i].num; j++)
+ weights[j] = 1.0;
- /* Signalling the transfer capabilities on a queue family is optional */
- if (tx_index < 0) {
- tx_index = pick_queue_family(qf, num, VK_QUEUE_COMPUTE_BIT);
- if (tx_index < 0)
- tx_index = pick_queue_family(qf, num, VK_QUEUE_GRAPHICS_BIT);
+ pc = (VkDeviceQueueCreateInfo *)cd->pQueueCreateInfos;
+ pc[cd->queueCreateInfoCount++] = (VkDeviceQueueCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO,
+ .queueFamilyIndex = hwctx->qf[i].idx,
+ .queueCount = hwctx->qf[i].num,
+ .pQueuePriorities = weights,
+ };
}
+ /* Setup deprecated fields */
hwctx->queue_family_index = -1;
hwctx->queue_family_comp_index = -1;
hwctx->queue_family_tx_index = -1;
hwctx->queue_family_encode_index = -1;
hwctx->queue_family_decode_index = -1;
-#define SETUP_QUEUE(qf_idx) \
- if (qf_idx > -1) { \
- int fidx = qf_idx; \
- int qc = qf[fidx].queueCount; \
- VkDeviceQueueCreateInfo *pc; \
- \
- if (fidx == graph_index) { \
- hwctx->queue_family_index = fidx; \
- hwctx->nb_graphics_queues = qc; \
- graph_index = -1; \
- } \
- if (fidx == comp_index) { \
- hwctx->queue_family_comp_index = fidx; \
- hwctx->nb_comp_queues = qc; \
- comp_index = -1; \
- } \
- if (fidx == tx_index) { \
- hwctx->queue_family_tx_index = fidx; \
- hwctx->nb_tx_queues = qc; \
- tx_index = -1; \
- } \
- if (fidx == enc_index) { \
- hwctx->queue_family_encode_index = fidx; \
- hwctx->nb_encode_queues = qc; \
- enc_index = -1; \
- } \
- if (fidx == dec_index) { \
- hwctx->queue_family_decode_index = fidx; \
- hwctx->nb_decode_queues = qc; \
- dec_index = -1; \
- } \
- \
- pc = av_realloc((void *)cd->pQueueCreateInfos, \
- sizeof(*pc) * (cd->queueCreateInfoCount + 1)); \
- if (!pc) { \
- av_free(qf); \
- return AVERROR(ENOMEM); \
- } \
- cd->pQueueCreateInfos = pc; \
- pc = &pc[cd->queueCreateInfoCount]; \
- \
- weights = av_malloc(qc * sizeof(float)); \
- if (!weights) { \
- av_free(qf); \
- return AVERROR(ENOMEM); \
- } \
- \
- memset(pc, 0, sizeof(*pc)); \
- pc->sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO; \
- pc->queueFamilyIndex = fidx; \
- pc->queueCount = qc; \
- pc->pQueuePriorities = weights; \
- \
- for (int i = 0; i < qc; i++) \
- weights[i] = 1.0f / qc; \
- \
- cd->queueCreateInfoCount++; \
- }
-
- SETUP_QUEUE(graph_index)
- SETUP_QUEUE(comp_index)
- SETUP_QUEUE(tx_index)
- SETUP_QUEUE(enc_index)
- SETUP_QUEUE(dec_index)
-
-#undef SETUP_QUEUE
+#define SET_OLD_QF(field, nb_field, type) \
+ do { \
+ if (field < 0 && hwctx->qf[i].flags & type) { \
+ field = hwctx->qf[i].idx; \
+ nb_field = hwctx->qf[i].num; \
+ } \
+ } while (0)
- av_free(qf);
+ for (uint32_t i = 0; i < hwctx->nb_qf; i++) {
+ SET_OLD_QF(hwctx->queue_family_index, hwctx->nb_graphics_queues, VK_QUEUE_GRAPHICS_BIT);
+ SET_OLD_QF(hwctx->queue_family_comp_index, hwctx->nb_comp_queues, VK_QUEUE_COMPUTE_BIT);
+ SET_OLD_QF(hwctx->queue_family_tx_index, hwctx->nb_tx_queues, VK_QUEUE_TRANSFER_BIT);
+ SET_OLD_QF(hwctx->queue_family_encode_index, hwctx->nb_encode_queues, VK_QUEUE_VIDEO_ENCODE_BIT_KHR);
+ SET_OLD_QF(hwctx->queue_family_decode_index, hwctx->nb_decode_queues, VK_QUEUE_VIDEO_DECODE_BIT_KHR);
+ }
+
+#undef SET_OLD_QF
return 0;
}
More information about the ffmpeg-cvslog
mailing list