[FFmpeg-devel] [PATCH 55/97] Vulkan patchset part 2 - hwcontext rewrite and filtering
Lynne
dev at lynne.ee
Thu May 11 21:55:40 EEST 2023
May 11, 2023, 18:04 by anton at khirnov.net:
> Quoting Lynne (2023-04-24 17:56:38)
>
>> From b0c429d0d77d1789b6349bc6b296449ae1f8e9da Mon Sep 17 00:00:00 2001
>> From: Lynne <dev at lynne.ee>
>> Date: Tue, 15 Mar 2022 23:00:32 +0100
>> Subject: [PATCH 26/97] hwcontext_vulkan: support threadsafe queue and frame
>> operations
>>
>> ---
>> libavutil/hwcontext_vulkan.c | 176 +++++++++++++++++++++++++----------
>> libavutil/hwcontext_vulkan.h | 40 +++++++-
>> 2 files changed, 167 insertions(+), 49 deletions(-)
>>
>> diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
>> index 894b4b83f3..b0db59b2d8 100644
>> --- a/libavutil/hwcontext_vulkan.c
>> +++ b/libavutil/hwcontext_vulkan.c
>> @@ -27,6 +27,7 @@
>> #include <dlfcn.h>
>> #endif
>>
>> +#include <pthread.h>
>> #include <unistd.h>
>>
>> #include "config.h"
>> @@ -92,8 +93,10 @@ typedef struct VulkanDevicePriv {
>> VkPhysicalDeviceVulkan13Features device_features_1_3;
>>
>> /* Queues */
>> - uint32_t qfs[5];
>> - int num_qfs;
>> + pthread_mutex_t **qf_mutex;
>> + int nb_tot_qfs;
>> + uint32_t img_qfs[5];
>> + int nb_img_qfs;
>>
>
> This patch would be so much more readable without random renamings.
>
They're not random, the meaning of each variable is different
to what they meant before.
nb_img_qfs is the total number of enabled queue familiesnb_tot_qfs is the total number of queue families listed by the driver
>> /* Debug callback */
>> VkDebugUtilsMessengerEXT debug_ctx;
>> @@ -127,6 +130,8 @@ typedef struct VulkanFramesPriv {
>> } VulkanFramesPriv;
>>
>> typedef struct AVVkFrameInternal {
>> + pthread_mutex_t update_mutex;
>>
>
> As far as I can see, none of the mutices you're adding here are
> ever destroyed.
>
Fixed.
>> +
>> #if CONFIG_CUDA
>> /* Importing external memory into cuda is really expensive so we keep the
>> * memory imported all the time */
>> @@ -1304,6 +1309,10 @@ static void vulkan_device_free(AVHWDeviceContext *ctx)
>> if (p->libvulkan)
>> dlclose(p->libvulkan);
>>
>> + for (int i = 0; i < p->nb_tot_qfs; i++)
>> + av_freep(&p->qf_mutex[i]);
>> + av_freep(&p->qf_mutex);
>> +
>> RELEASE_PROPS(hwctx->enabled_inst_extensions, hwctx->nb_enabled_inst_extensions);
>> RELEASE_PROPS(hwctx->enabled_dev_extensions, hwctx->nb_enabled_dev_extensions);
>> }
>> @@ -1436,13 +1445,26 @@ end:
>> return err;
>> }
>>
>> +static void lock_queue(AVHWDeviceContext *ctx, int queue_family, int index)
>>
>
> It'd be nice to be consistent with types.
> These are uint32 in vulkan, no?
>
Fixed. Though, they're more closely related to the
number of queue families given in the hwcontext, which
are 32-bit ints.
>> +{
>> + VulkanDevicePriv *p = ctx->internal->priv;
>> + pthread_mutex_lock(&p->qf_mutex[queue_family][index]);
>> +}
>> +
>> +static void unlock_queue(AVHWDeviceContext *ctx, int queue_family, int index)
>> +{
>> + VulkanDevicePriv *p = ctx->internal->priv;
>> + pthread_mutex_unlock(&p->qf_mutex[queue_family][index]);
>> +}
>> +
>> static int vulkan_device_init(AVHWDeviceContext *ctx)
>> {
>> int err;
>> - uint32_t queue_num;
>> + uint32_t qf_num;
>> AVVulkanDeviceContext *hwctx = ctx->hwctx;
>> VulkanDevicePriv *p = ctx->internal->priv;
>> FFVulkanFunctions *vk = &p->vkfn;
>> + VkQueueFamilyProperties *qf;
>> int graph_index, comp_index, tx_index, enc_index, dec_index;
>>
>> /* Set device extension flags */
>> @@ -1481,12 +1503,31 @@ static int vulkan_device_init(AVHWDeviceContext *ctx)
>> p->dev_is_nvidia = (p->props.properties.vendorID == 0x10de);
>> p->dev_is_intel = (p->props.properties.vendorID == 0x8086);
>>
>> - vk->GetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &queue_num, NULL);
>> - if (!queue_num) {
>> + vk->GetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &qf_num, NULL);
>> + if (!qf_num) {
>> av_log(ctx, AV_LOG_ERROR, "Failed to get queues!\n");
>> return AVERROR_EXTERNAL;
>> }
>>
>> + qf = av_malloc_array(qf_num, sizeof(VkQueueFamilyProperties));
>> + if (!qf)
>> + return AVERROR(ENOMEM);
>> +
>> + vk->GetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &qf_num, qf);
>> +
>> + p->qf_mutex = av_mallocz(qf_num*sizeof(*p->qf_mutex));
>>
>
> av_calloc()
>
>> + if (!p->qf_mutex)
>> + return AVERROR(ENOMEM);
>> + p->nb_tot_qfs = qf_num;
>> +
>> + for (int i = 0; i < qf_num; i++) {
>> + p->qf_mutex[i] = av_mallocz(qf[i].queueCount*sizeof(**p->qf_mutex));
>>
>
> av_calloc()
>
>> + if (!p->qf_mutex[i])
>> + return AVERROR(ENOMEM);
>> + for (int j = 0; j < qf[i].queueCount; j++)
>> + pthread_mutex_init(&p->qf_mutex[i][j], NULL);
>>
>
> Should be checked.
>
Fixed all three.
>> + }
>> +
>> graph_index = hwctx->queue_family_index;
>> comp_index = hwctx->queue_family_comp_index;
>> tx_index = hwctx->queue_family_tx_index;
>> @@ -1501,9 +1542,9 @@ static int vulkan_device_init(AVHWDeviceContext *ctx)
>> return AVERROR(EINVAL); \
>> } else if (fidx < 0 || ctx_qf < 0) { \
>> break; \
>> - } else if (ctx_qf >= queue_num) { \
>> + } else if (ctx_qf >= qf_num) { \
>> av_log(ctx, AV_LOG_ERROR, "Invalid %s family index %i (device has %i families)!\n", \
>> - type, ctx_qf, queue_num); \
>> + type, ctx_qf, qf_num); \
>> return AVERROR(EINVAL); \
>> } \
>> \
>> @@ -1520,7 +1561,7 @@ static int vulkan_device_init(AVHWDeviceContext *ctx)
>> tx_index = (ctx_qf == tx_index) ? -1 : tx_index; \
>> enc_index = (ctx_qf == enc_index) ? -1 : enc_index; \
>> dec_index = (ctx_qf == dec_index) ? -1 : dec_index; \
>> - p->qfs[p->num_qfs++] = ctx_qf; \
>> + p->img_qfs[p->nb_img_qfs++] = ctx_qf; \
>> } while (0)
>>
>> CHECK_QUEUE("graphics", 0, graph_index, hwctx->queue_family_index, hwctx->nb_graphics_queues);
>> @@ -1531,6 +1572,11 @@ static int vulkan_device_init(AVHWDeviceContext *ctx)
>>
>> #undef CHECK_QUEUE
>>
>> + if (!hwctx->lock_queue)
>> + hwctx->lock_queue = lock_queue;
>> + if (!hwctx->unlock_queue)
>> + hwctx->unlock_queue = unlock_queue;
>> +
>> /* Get device capabilities */
>> vk->GetPhysicalDeviceMemoryProperties(hwctx->phys_dev, &p->mprops);
>>
>> @@ -1732,9 +1778,6 @@ static void vulkan_free_internal(AVVkFrame *f)
>> {
>> AVVkFrameInternal *internal = f->internal;
>>
>> - if (!internal)
>> - return;
>> -
>> #if CONFIG_CUDA
>> if (internal->cuda_fc_ref) {
>> AVHWFramesContext *cuda_fc = (AVHWFramesContext *)internal->cuda_fc_ref->data;
>> @@ -1923,9 +1966,11 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx,
>> uint32_t src_qf, dst_qf;
>> VkImageLayout new_layout;
>> VkAccessFlags new_access;
>> + AVVulkanFramesContext *vkfc = hwfc->hwctx;
>> const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
>> VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
>> FFVulkanFunctions *vk = &p->vkfn;
>> + AVFrame tmp = { .data[0] = (uint8_t *)frame };
>>
>
> ???
>
This enables us to use the common dependency/dispatch code.
The prepare_frame function is used for both frame initialization
and frame import/export queue family transfer operations.
In the former case, no AVFrame exists yet, so, as this is purely
libavutil code, we create a temporary frame on stack. Otherwise,
we'd need to allocate multiple frames somewhere, one for each
possible command buffer dispatch.
Comment added to commit message.
More information about the ffmpeg-devel
mailing list