[FFmpeg-devel] [PATCH] Vulkan hwcontext and filters

Lynne dev at lynne.ee
Sun Jan 19 15:11:36 EET 2020


Jan 18, 2020, 20:23 by sw at jkqxz.net:

> On 10/01/2020 21:05, Lynne wrote:
>
> The CUDA parts look like they could be split off into a separate commit?  (It's already huge.)
>

I don't really want broken commits or commits with dead code.



>>
>> diff --git a/configure b/configure
>> index 46f2038627..3113ebfdd8 100755
>> --- a/configure
>> +++ b/configure
>> @@ -309,6 +309,7 @@ External library support:
>>  --enable-openssl         enable openssl, needed for https support
>>  if gnutls, libtls or mbedtls is not used [no]
>>  --enable-pocketsphinx    enable PocketSphinx, needed for asr filter [no]
>> +  --enable-vulkan          enable Vulkan code [no]
>>
>
> Alphabetical order.
>

Fixed in the git.



>> --disable-sndio          disable sndio support [autodetect]
>>  --disable-schannel       disable SChannel SSP, needed for TLS support on
>>  Windows if openssl and gnutls are not used [autodetect]
>> @@ -1549,11 +1550,11 @@ require_cc(){
>>  }
>>  
>>  require_cpp(){
>> -    name="$1"
>> -    headers="$2"
>> -    classes="$3"
>> -    shift 3
>> -    check_lib_cpp "$headers" "$classes" "$@" || die "ERROR: $name not found"
>> +    log require_cpp "$@"
>> +    name_version="$1"
>> +    name="${1%% *}"
>> +    shift
>> +    check_lib_cpp "$name" "$@" || die "ERROR: $name_version not found"
>>  }
>>
>
> This change looks unrelated.  (require_cpp isn't used in this patch at all.)
>

Its used in the lavfi vulkan patch, moved.



>>
>> require_headers(){
>> @@ -1854,6 +1855,7 @@ HWACCEL_LIBRARY_LIST="
>>  mmal
>>  omx
>>  opencl
>> +    vulkan
>>  "
>>  
>>  DOCUMENT_LIST="
>> @@ -3639,7 +3641,7 @@ avformat_deps="avcodec avutil"
>>  avformat_suggest="libm network zlib"
>>  avresample_deps="avutil"
>>  avresample_suggest="libm"
>> -avutil_suggest="clock_gettime ffnvcodec libm libdrm libmfx opencl user32 vaapi videotoolbox corefoundation corevideo coremedia bcrypt"
>> +avutil_suggest="clock_gettime ffnvcodec libm libdrm libmfx opencl user32 vaapi vulkan videotoolbox corefoundation corevideo coremedia bcrypt"
>>  postproc_deps="avutil gpl"
>>  postproc_suggest="libm"
>>  swresample_deps="avutil"
>> @@ -6626,6 +6628,9 @@ enabled vdpau &&
>>  
>>  enabled crystalhd && check_lib crystalhd "stdint.h libcrystalhd/libcrystalhd_if.h" DtsCrystalHDVersion -lcrystalhd
>>  
>> +enabled vulkan &&
>> +    require_pkg_config vulkan "vulkan >= 1.1.97" "vulkan/vulkan.h" vkCreateInstance
>>
>
> Presumably you have some specific requirement in mind which wants this version - can you note it somewhere?  (Either here or in the commit message.)
>

The DRM export/import and I think the semaphore import API.
Its not a new version, its been out for a long time.



>> +
>>  if enabled x86; then
>>  case $target_os in
>>  mingw32*|mingw64*|win32|win64|linux|cygwin*)
>> ...
>> diff --git a/libavutil/hwcontext.h b/libavutil/hwcontext.h
>> index f5a4b62387..f874af9f8f 100644
>> --- a/libavutil/hwcontext.h
>> +++ b/libavutil/hwcontext.h
>> @@ -36,6 +36,7 @@ enum AVHWDeviceType {
>>  AV_HWDEVICE_TYPE_DRM,
>>  AV_HWDEVICE_TYPE_OPENCL,
>>  AV_HWDEVICE_TYPE_MEDIACODEC,
>> +    AV_HWDEVICE_TYPE_VULKAN,
>>  };
>>  
>>  typedef struct AVHWDeviceInternal AVHWDeviceInternal;
>> diff --git a/libavutil/hwcontext_cuda.c b/libavutil/hwcontext_cuda.c
>> index 30611b1912..18abb87bbd 100644
>> --- a/libavutil/hwcontext_cuda.c
>> +++ b/libavutil/hwcontext_cuda.c
>> @@ -21,6 +21,9 @@
>>  #include "hwcontext.h"
>>  #include "hwcontext_internal.h"
>>  #include "hwcontext_cuda_internal.h"
>> +#if CONFIG_VULKAN
>> +#include "hwcontext_vulkan.h"
>> +#endif
>>  #include "cuda_check.h"
>>  #include "mem.h"
>>  #include "pixdesc.h"
>> @@ -42,6 +45,9 @@ static const enum AVPixelFormat supported_formats[] = {
>>  AV_PIX_FMT_YUV444P16,
>>  AV_PIX_FMT_0RGB32,
>>  AV_PIX_FMT_0BGR32,
>> +#if CONFIG_VULKAN
>> +    AV_PIX_FMT_VULKAN,
>> +#endif
>>
>
> Do all devices we can do CUDA on also support Vulkan?  If not, this should probably filter it out in get_constraints() to avoid exposing something which can't possibly work.
>

Fixed with more pci vendor id checking :(



>> };
>>  
>>  #define CHECK_CU(x) FF_CUDA_CHECK_DL(device_ctx, cu, x)
>> @@ -205,6 +211,10 @@ static int cuda_transfer_data_from(AVHWFramesContext *ctx, AVFrame *dst,
>>  CUcontext dummy;
>>  int i, ret;
>>  
>> +    /* We don't support transfers to HW devices. */
>> +    if (dst->hw_frames_ctx)
>> +        return AVERROR(ENOSYS);
>> +
>>  ret = CHECK_CU(cu->cuCtxPushCurrent(hwctx->cuda_ctx));
>>  if (ret < 0)
>>  return ret;
>> @@ -247,6 +257,10 @@ static int cuda_transfer_data_to(AVHWFramesContext *ctx, AVFrame *dst,
>>  CUcontext dummy;
>>  int i, ret;
>>  
>> +    /* We don't support transfers from HW devices. */
>> +    if (src->hw_frames_ctx)
>> +        return AVERROR(ENOSYS);
>> +
>>  ret = CHECK_CU(cu->cuCtxPushCurrent(hwctx->cuda_ctx));
>>  if (ret < 0)
>>  return ret;
>> @@ -389,6 +403,112 @@ error:
>>  return AVERROR_UNKNOWN;
>>  }
>>  
>> +static int cuda_device_derive(AVHWDeviceContext *device_ctx,
>> +                              AVHWDeviceContext *src_ctx,
>> +                              int flags) {
>> +    AVCUDADeviceContext *hwctx = device_ctx->hwctx;
>> +    CudaFunctions *cu;
>> +    const char *src_uuid = NULL;
>> +    CUcontext dummy;
>> +    int ret, i, device_count, dev_active = 0;
>> +    unsigned int dev_flags = 0;
>> +
>> +    const unsigned int desired_flags = CU_CTX_SCHED_BLOCKING_SYNC;
>> +
>> +    switch (src_ctx->type) {
>> +#if CONFIG_VULKAN
>> +    case AV_HWDEVICE_TYPE_VULKAN: {
>> +        AVVulkanDeviceContext *vkctx = src_ctx->hwctx;
>> +        src_uuid = vkctx->device_uuid;
>> +        break;
>> +    }
>> +#endif
>> +    default:
>> +        return AVERROR(ENOSYS);
>> +    }
>> +
>> +    if (!src_uuid) {
>> +        av_log(device_ctx, AV_LOG_ERROR,
>> +               "Failed to get UUID of source device.\n");
>> +        goto error;
>> +    }
>> +
>> +    if (cuda_device_init(device_ctx) < 0)
>> +        goto error;
>> +
>> +    cu = hwctx->internal->cuda_dl;
>> +
>> +    ret = CHECK_CU(cu->cuInit(0));
>> +    if (ret < 0)
>> +        goto error;
>> +
>> +    ret = CHECK_CU(cu->cuDeviceGetCount(&device_count));
>> +    if (ret < 0)
>> +        goto error;
>> +
>> +    hwctx->internal->cuda_device = -1;
>> +    for (i = 0; i < device_count; i++) {
>> +        CUdevice dev;
>> +        CUuuid uuid;
>> +
>> +        ret = CHECK_CU(cu->cuDeviceGet(&dev, i));
>> +        if (ret < 0)
>> +            goto error;
>> +
>> +        ret = CHECK_CU(cu->cuDeviceGetUuid(&uuid, dev));
>> +        if (ret < 0)
>> +            goto error;
>> +
>> +        if (memcmp(src_uuid, uuid.bytes, sizeof (uuid.bytes)) == 0) {
>> +            hwctx->internal->cuda_device = dev;
>> +            break;
>> +        }
>> +    }
>> +
>> +    if (hwctx->internal->cuda_device == -1) {
>> +        av_log(device_ctx, AV_LOG_ERROR, "Could not derive CUDA device.\n");
>>
>
> This error is maybe more like "Can't find the matching CUDA device to the supplied Vulkan device".
>

Let's keep it that way, its not meant to be vulkan specific, though its only used by vulkan currently.



>> +        goto error;
>> +    }
>> +
>> +    hwctx->internal->flags = flags;
>> +
>> +    if (flags & AV_CUDA_USE_PRIMARY_CONTEXT) {
>> +        ret = CHECK_CU(cu->cuDevicePrimaryCtxGetState(hwctx->internal->cuda_device, &dev_flags, &dev_active));
>> +        if (ret < 0)
>> +            goto error;
>> +
>> +        if (dev_active && dev_flags != desired_flags) {
>> +            av_log(device_ctx, AV_LOG_ERROR, "Primary context already active with incompatible flags.\n");
>> +            goto error;
>> +        } else if (dev_flags != desired_flags) {
>> +            ret = CHECK_CU(cu->cuDevicePrimaryCtxSetFlags(hwctx->internal->cuda_device, desired_flags));
>> +            if (ret < 0)
>> +                goto error;
>> +        }
>> +
>> +        ret = CHECK_CU(cu->cuDevicePrimaryCtxRetain(&hwctx->cuda_ctx, hwctx->internal->cuda_device));
>> +        if (ret < 0)
>> +            goto error;
>> +    } else {
>> +        ret = CHECK_CU(cu->cuCtxCreate(&hwctx->cuda_ctx, desired_flags, hwctx->internal->cuda_device));
>> +        if (ret < 0)
>> +            goto error;
>> +
>> +        CHECK_CU(cu->cuCtxPopCurrent(&dummy));
>> +    }
>> +
>> +    hwctx->internal->is_allocated = 1;
>> +
>> +    // Setting stream to NULL will make functions automatically use the default CUstream
>> +    hwctx->stream = NULL;
>> +
>> +    return 0;
>> +
>> +error:
>> +    cuda_device_uninit(device_ctx);
>> +    return AVERROR_UNKNOWN;
>> +}
>> +
>>  const HWContextType ff_hwcontext_type_cuda = {
>>  .type                 = AV_HWDEVICE_TYPE_CUDA,
>>  .name                 = "CUDA",
>> @@ -397,6 +517,7 @@ const HWContextType ff_hwcontext_type_cuda = {
>>  .frames_priv_size     = sizeof(CUDAFramesContext),
>>  
>>  .device_create        = cuda_device_create,
>> +    .device_derive        = cuda_device_derive,
>>  .device_init          = cuda_device_init,
>>  .device_uninit        = cuda_device_uninit,
>>  .frames_get_constraints = cuda_frames_get_constraints,
>> ...
>> diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
>> new file mode 100644
>> index 0000000000..d4eb8ffd35
>> --- /dev/null
>> +++ b/libavutil/hwcontext_vulkan.c
>> @@ -0,0 +1,2804 @@
>> ...
>> +
>> +static const struct {
>> +    enum AVPixelFormat pixfmt;
>> +    const VkFormat vkfmts[3];
>> +} vk_pixfmt_map[] = {
>> +    { AV_PIX_FMT_GRAY8,   { VK_FORMAT_R8_UNORM } },
>> +    { AV_PIX_FMT_GRAY16,  { VK_FORMAT_R16_UNORM } },
>> +    { AV_PIX_FMT_GRAYF32, { VK_FORMAT_R32_SFLOAT } },
>> +
>> +    { AV_PIX_FMT_NV12, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM } },
>> +    { AV_PIX_FMT_P010, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } },
>>
>
> Is P010 still safe when the low bits might have any value?
>

Our padding is in the top bits. Vulkan defines it in the bottom bits, hence we can't use it.
I can't see why it wouldn't be safe. Every code that deals with user-supplied frames must rely they're junk.



>> +    { AV_PIX_FMT_P016, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } },
>> +
>> +    { AV_PIX_FMT_YUV420P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
>> +    { AV_PIX_FMT_YUV422P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
>> +    { AV_PIX_FMT_YUV444P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
>> +
>> +    { AV_PIX_FMT_YUV420P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
>> +    { AV_PIX_FMT_YUV422P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
>> +    { AV_PIX_FMT_YUV444P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
>> +
>> +    { AV_PIX_FMT_ABGR,   { VK_FORMAT_A8B8G8R8_UNORM_PACK32 } },
>> +    { AV_PIX_FMT_BGRA,   { VK_FORMAT_B8G8R8A8_UNORM } },
>> +    { AV_PIX_FMT_RGBA,   { VK_FORMAT_R8G8B8A8_UNORM } },
>> +    { AV_PIX_FMT_RGB24,  { VK_FORMAT_R8G8B8_UNORM } },
>> +    { AV_PIX_FMT_BGR24,  { VK_FORMAT_B8G8R8_UNORM } },
>> +    { AV_PIX_FMT_RGB48,  { VK_FORMAT_R16G16B16_UNORM } },
>> +    { AV_PIX_FMT_RGBA64, { VK_FORMAT_R16G16B16A16_UNORM } },
>> +    { AV_PIX_FMT_RGB565, { VK_FORMAT_R5G6B5_UNORM_PACK16 } },
>> +    { AV_PIX_FMT_BGR565, { VK_FORMAT_B5G6R5_UNORM_PACK16 } },
>> +    { AV_PIX_FMT_BGR0,   { VK_FORMAT_B8G8R8A8_UNORM } },
>> +    { AV_PIX_FMT_0BGR,   { VK_FORMAT_A8B8G8R8_UNORM_PACK32 } },
>> +    { AV_PIX_FMT_RGB0,   { VK_FORMAT_R8G8B8A8_UNORM } },
>> +
>> +    { AV_PIX_FMT_GBRPF32, { VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT } },
>> +};
>> +
>> ...
>> +static int check_extensions(AVHWDeviceContext *ctx, int dev,
>> +                            const char * const **dst, uint32_t *num, int debug)
>> +{
>> +    const char *tstr;
>> +    const char **extension_names = NULL;
>> +    VulkanDevicePriv *p = ctx->internal->priv;
>> +    AVVulkanDeviceContext *hwctx = ctx->hwctx;
>> +    int err = 0, found, extensions_found = 0;
>> +
>> +    const char *mod;
>> +    int optional_exts_num;
>> +    uint32_t sup_ext_count;
>> +    VkExtensionProperties *sup_ext;
>> +    const VulkanOptExtension *optional_exts;
>> +
>> +    if (!dev) {
>> +        mod = "instance";
>> +        optional_exts = optional_instance_exts;
>> +        optional_exts_num = FF_ARRAY_ELEMS(optional_instance_exts);
>> +        vkEnumerateInstanceExtensionProperties(NULL, &sup_ext_count, NULL);
>> +        sup_ext = av_malloc_array(sup_ext_count, sizeof(VkExtensionProperties));
>> +        if (!sup_ext)
>> +            return AVERROR(ENOMEM);
>> +        vkEnumerateInstanceExtensionProperties(NULL, &sup_ext_count, sup_ext);
>> +    } else {
>> +        mod = "device";
>> +        optional_exts = optional_device_exts;
>> +        optional_exts_num = FF_ARRAY_ELEMS(optional_device_exts);
>> +        vkEnumerateDeviceExtensionProperties(hwctx->phys_dev, NULL,
>> +                                             &sup_ext_count, NULL);
>> +        sup_ext = av_malloc_array(sup_ext_count, sizeof(VkExtensionProperties));
>> +        if (!sup_ext)
>> +            return AVERROR(ENOMEM);
>> +        vkEnumerateDeviceExtensionProperties(hwctx->phys_dev, NULL,
>> +                                             &sup_ext_count, sup_ext);
>> +    }
>> +
>> +    for (int i = 0; i < optional_exts_num; i++) {
>> +        int req = optional_exts[i].flag & EXT_REQUIRED;
>> +        tstr = optional_exts[i].name;
>> +
>> +        found = 0;
>> +        for (int j = 0; j < sup_ext_count; j++) {
>> +            if (!strcmp(tstr, sup_ext[j].extensionName)) {
>> +                found = 1;
>> +                break;
>> +            }
>> +        }
>> +        if (!found) {
>> +            int lvl = req ? AV_LOG_ERROR : AV_LOG_VERBOSE;
>> +            av_log(ctx, lvl, "Extension \"%s\" not found!\n", tstr);
>> +            if (req) {
>> +                err = AVERROR(EINVAL);
>> +                goto end;
>> +            }
>> +            continue;
>> +        }
>> +        if (!req)
>> +            p->extensions |= optional_exts[i].flag;
>> +
>> +        av_log(ctx, AV_LOG_VERBOSE, "Using %s extension \"%s\"\n", mod, tstr);
>> +
>> +        ADD_VAL_TO_LIST(extension_names, extensions_found, tstr);
>> +    }
>> +
>> +    if (debug && !dev) {
>> +        tstr = VK_EXT_DEBUG_UTILS_EXTENSION_NAME;
>> +        found = 0;
>> +        for (int j = 0; j < sup_ext_count; j++) {
>> +            if (!strcmp(tstr, sup_ext[j].extensionName)) {
>> +                found = 1;
>> +                break;
>> +            }
>> +        }
>> +        if (found) {
>> +            ADD_VAL_TO_LIST(extension_names, extensions_found, tstr);
>> +        } else {
>> +            av_log(ctx, AV_LOG_ERROR, "Debug extension \"%s\" not found!\n",
>> +                   tstr);
>> +            err = AVERROR(EINVAL);
>> +            goto end;
>> +        }
>> +    }
>> +
>> +    *dst = extension_names;
>> +    *num = extensions_found;
>> +
>> +end:
>> +    av_free(sup_ext);
>>
>
> I think this leaks the extension_names array with some of the later failures.
>

One, fixed.



>> +    return err;
>> +}
>> +
>> ...
>> +
>> +typedef struct VulkanDeviceSelection {
>> +    uint8_t uuid[VK_UUID_SIZE]; /* Will use this first unless !has_uuid */
>> +    int has_uuid;
>> +    const char *name; /* Will use this second unless NULL */
>> +    uint32_t pci_device; /* Will use this second unless 0x0 */
>> +    uint32_t vendor_id; /* Last resort to find something deterministic */
>> +    int index; /* Finally fall back to index */
>> +} VulkanDeviceSelection;
>> +
>> +static const char *vk_dev_type(enum VkPhysicalDeviceType type)
>> +{
>> +    switch (type) {
>> +    case VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU: return "integrated";
>> +    case VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU:   return "discrete";
>> +    case VK_PHYSICAL_DEVICE_TYPE_VIRTUAL_GPU:    return "virtual";
>> +    case VK_PHYSICAL_DEVICE_TYPE_CPU:            return "software";
>> +    default:                                     return "unknown";
>> +    }
>> +}
>> +
>> +/* Finds a device */
>> +static int find_device(AVHWDeviceContext *ctx, VulkanDeviceSelection *select)
>> +{
>> ...
>>
>
> Yay, I like the improvement to the selection options :)
>
>> +}
>> +
>> ...
>> +
>> +static void free_exec_ctx(AVHWDeviceContext *ctx, VulkanExecCtx *cmd)
>> +{
>> +    AVVulkanDeviceContext *hwctx = ctx->hwctx;
>> +
>> +    vkDestroyFence(hwctx->act_dev, cmd->fence, hwctx->alloc);
>> +
>> +    if (cmd->buf)
>> +        vkFreeCommandBuffers(hwctx->act_dev, cmd->pool, 1, &cmd->buf);
>> +    if (cmd->pool)
>> +        vkDestroyCommandPool(hwctx->act_dev, cmd->pool, hwctx->alloc);
>> +}
>> +
>> +static void vulkan_device_free(AVHWDeviceContext *ctx)
>> +{
>> +    VulkanDevicePriv *p = ctx->internal->priv;
>> +    AVVulkanDeviceContext *hwctx = ctx->hwctx;
>> +
>> +    free_exec_ctx(ctx, &p->cmd);
>>
>
> A device destroyed before it is fully created need not have a valid exec_ctx.
>
> (E.g. "./ffmpeg_g -init_hw_device vulkan:123456789" segfaults here.)
>

Fixed.



>> ...
>> +
>> +static int vulkan_device_init(AVHWDeviceContext *ctx)
>> +{
>> +    int err;
>> +    uint32_t queue_num;
>> +    AVVulkanDeviceContext *hwctx = ctx->hwctx;
>> +    VulkanDevicePriv *p = ctx->internal->priv;
>> +
>> +    vkGetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &queue_num, NULL);
>> +    if (!queue_num) {
>> +        av_log(ctx, AV_LOG_ERROR, "Failed to get queues!\n");
>> +        return AVERROR_EXTERNAL;
>> +    }
>> +
>> +    if (hwctx->queue_family_index      >= queue_num ||
>> +        hwctx->queue_family_tx_index   >= queue_num ||
>> +        hwctx->queue_family_comp_index >= queue_num) {
>> +        av_log(ctx, AV_LOG_ERROR, "Invalid queue index!\n");
>>
>
> Maybe say the invalid indices.
>

Fixed.



>> +        return AVERROR_EXTERNAL;
>>
>
> I think this is EINVAL - the user supplied the invalid queue index.
>

Fixed.



>> +    }
>> +
>> +    /* Create exec context - if there's something invalid this will error out */
>> +    err = create_exec_ctx(ctx, &p->cmd, hwctx->queue_family_tx_index);
>> +    if (err)
>> +        return err;
>> +
>> +    /* Get device capabilities */
>> +    vkGetPhysicalDeviceMemoryProperties(hwctx->phys_dev, &p->mprops);
>> +
>> +    return 0;
>> +}
>> +
>> +static int vulkan_device_create(AVHWDeviceContext *ctx, const char *device,
>> +                                AVDictionary *opts, int flags)
>> +{
>> +    VulkanDeviceSelection dev_select = { 0 };
>> +    if (device && device[0]) {
>> +        char *end = NULL;
>> +        dev_select.index = strtol(device, &end, 10);
>> +        if (end == device) {
>> +            dev_select.index = 0;
>> +            dev_select.name  = device;
>> +        }
>> +    }
>>
>
> Is it worth making uuid=f00 work here as well?  (From opts rather than device: "-init_hw_device vulkan:,uuid=f00".)
>

Would like to, but I don't think its worth the extra dependency (libuuid, its widespread on linux but I'd rather not NIH parsing).



>> +
>> +    return vulkan_device_create_internal(ctx, &dev_select, opts, flags);
>> +}
>> +
>> +static int vulkan_device_derive(AVHWDeviceContext *ctx,
>> +                                AVHWDeviceContext *src_ctx, int flags)
>> +{
>> +    av_unused VulkanDeviceSelection dev_select = { 0 };
>> +
>> +    /* If there's only one device on the system, then even if its not covered
>> +     * by the following checks (e.g. non-PCIe ARM GPU), having an empty
>> +     * dev_select will mean it'll get picked. */
>>
>
> Kindof evil, but makes sense.
>
>> +    switch(src_ctx->type) {
>> +#if CONFIG_LIBDRM
>> +#if CONFIG_VAAPI
>> +    case AV_HWDEVICE_TYPE_VAAPI: {
>> +        AVVAAPIDeviceContext *src_hwctx = src_ctx->hwctx;
>> +
>> +        const char *vendor = vaQueryVendorString(src_hwctx->display);
>> +        if (!vendor) {
>> +            av_log(ctx, AV_LOG_ERROR, "Unable to get device info from VAAPI!\n");
>> +            return AVERROR_EXTERNAL;
>> +        }
>> +
>> +        if (strstr(vendor, "Intel"))
>> +            dev_select.vendor_id = 0x8086;
>> +        if (strstr(vendor, "AMD"))
>> +            dev_select.vendor_id = 0x1002;
>>
>
> Yuck, but I don't see a better way :(
>
> I might look into adding something to libva to allow this to work properly, since this combination will happen.
>
>> +
>> +        return vulkan_device_create_internal(ctx, &dev_select, NULL, flags);
>> +    }
>> +#endif
>> +    case AV_HWDEVICE_TYPE_DRM: {
>> +        AVDRMDeviceContext *src_hwctx = src_ctx->hwctx;
>> +
>> +        drmDevice *drm_dev_info;
>> +        int err = drmGetDevice(src_hwctx->fd, &drm_dev_info);
>> +        if (err) {
>> +            av_log(ctx, AV_LOG_ERROR, "Unable to get device info from DRM fd!\n");
>> +            return AVERROR_EXTERNAL;
>> +        }
>> +
>> +        if (drm_dev_info->bustype == DRM_BUS_PCI)
>> +            dev_select.pci_device = drm_dev_info->deviceinfo.pci->device_id;
>> +
>> +        drmFreeDevice(&drm_dev_info);
>> +
>> +        return vulkan_device_create_internal(ctx, &dev_select, NULL, flags);
>> +    }
>> +#endif
>> +#if CONFIG_CUDA
>> +    case AV_HWDEVICE_TYPE_CUDA: {
>> +        AVHWDeviceContext *cuda_cu = src_ctx;
>> +        AVCUDADeviceContext *src_hwctx = src_ctx->hwctx;
>> +        AVCUDADeviceContextInternal *cu_internal = src_hwctx->internal;
>> +        CudaFunctions *cu = cu_internal->cuda_dl;
>> +
>> +        int ret = CHECK_CU(cu->cuDeviceGetUuid((CUuuid *)&dev_select.uuid,
>> +                                               cu_internal->cuda_device));
>> +        if (ret < 0) {
>> +            av_log(ctx, AV_LOG_ERROR, "Unable to get UUID from CUDA!\n");
>> +            return AVERROR_EXTERNAL;
>> +        }
>> +
>> +        dev_select.has_uuid = 1;
>> +
>> +        return vulkan_device_create_internal(ctx, &dev_select, NULL, flags);
>> +    }
>> +#endif
>> +    default:
>> +        return AVERROR(ENOSYS);
>> +    }
>> +}
>> +
>> +static int vulkan_frames_get_constraints(AVHWDeviceContext *ctx,
>> +                                         const void *hwconfig,
>> +                                         AVHWFramesConstraints *constraints)
>> +{
>> +    int count = 0;
>> +    AVVulkanDeviceContext *hwctx = ctx->hwctx;
>> +    VulkanDevicePriv *p = ctx->internal->priv;
>> +
>> +    for (enum AVPixelFormat i = 0; i < AV_PIX_FMT_NB; i++)
>> +        count += pixfmt_is_supported(hwctx, i, p->use_linear_images);
>> +
>> +#if CONFIG_CUDA
>> +    count++;
>>
>
> I think you should be able to test whether a device supports CUDA here, so it isn't included for non-Nvidia devices?
>

Fixed.



>> +#endif
>> +
>> +    constraints->valid_sw_formats = av_malloc_array(count + 1,
>> +                                                    sizeof(enum AVPixelFormat));
>> +    if (!constraints->valid_sw_formats)
>> +        return AVERROR(ENOMEM);
>> +
>> +    count = 0;
>> +    for (enum AVPixelFormat i = 0; i < AV_PIX_FMT_NB; i++)
>> +        if (pixfmt_is_supported(hwctx, i, p->use_linear_images))
>> +            constraints->valid_sw_formats[count++] = i;
>> +
>> +#if CONFIG_CUDA
>> +    constraints->valid_sw_formats[count++] = AV_PIX_FMT_CUDA;
>> +#endif
>> +    constraints->valid_sw_formats[count++] = AV_PIX_FMT_NONE;
>> +
>> +    constraints->min_width  = 0;
>> +    constraints->min_height = 0;
>> +    constraints->max_width  = p->props.limits.maxImageDimension2D;
>> +    constraints->max_height = p->props.limits.maxImageDimension2D;
>> +
>> +    constraints->valid_hw_formats = av_malloc_array(2, sizeof(enum AVPixelFormat));
>> +    if (!constraints->valid_hw_formats)
>> +        return AVERROR(ENOMEM);
>> +
>> +    constraints->valid_hw_formats[0] = AV_PIX_FMT_VULKAN;
>> +    constraints->valid_hw_formats[1] = AV_PIX_FMT_NONE;
>> +
>> +    return 0;
>> +}
>> +
>> ...
>> +
>> +typedef struct VulkanFramesPriv {
>> +    VulkanExecCtx cmd;
>> +} VulkanFramesPriv;
>>
>
> I think put this definition at the top so that it's easy to find what priv on a Vulkan HWFC is.
>

Done.



>> ...
>> +
>> +static void vulkan_frame_free(void *opaque, uint8_t *data)
>> +{
>> +    AVVkFrame *f = (AVVkFrame *)data;
>> +    AVHWFramesContext *hwfc = opaque;
>> +    AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx;
>> +    int planes = av_pix_fmt_count_planes(hwfc->sw_format);
>> +
>> +    if (!f)
>> +        return;
>>
>
> When can you have !f?  That seems invalid in an "assert that f is not null" type of way.
>

Fixed.



>> +
>> +    vulkan_free_internal(f->internal);
>> +
>> +    for (int i = 0; i < planes; i++) {
>> +        vkDestroyImage(hwctx->act_dev, f->img[i], hwctx->alloc);
>> +        vkFreeMemory(hwctx->act_dev, f->mem[i], hwctx->alloc);
>> +        vkDestroySemaphore(hwctx->act_dev, f->sem[i], hwctx->alloc);
>> +    }
>> +
>> +    av_free(f);
>> +}
>> +
>> ...
>> +
>> +static int vulkan_frames_init(AVHWFramesContext *hwfc)
>> +{
>> +    int err;
>> +    AVVkFrame *f;
>> +    AVVulkanFramesContext *hwctx = hwfc->hwctx;
>> +    VulkanFramesPriv *fp = hwfc->internal->priv;
>> +    AVVulkanDeviceContext *dev_hwctx = hwfc->device_ctx->hwctx;
>> +    VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
>> +
>> +    if (hwfc->pool)
>> +        return 0;
>> +
>> +    /* Default pool flags */
>> +    hwctx->tiling = hwctx->tiling ? hwctx->tiling : p->use_linear_images ?
>> +                    VK_IMAGE_TILING_LINEAR : VK_IMAGE_TILING_OPTIMAL;
>> +
>> +    hwctx->usage |= DEFAULT_USAGE_FLAGS;
>>
>
> Is it possible that this disallows some use-cases in a device where those default flags need not be not supported?  For example, some sort of magic image-writer like a video decoder where the output images can only ever be used as a source by non-magic operations.  Baking that into the API (as in the comment on usage in the header) seems bad if so.
>

You need to support those flags to do anything useful with the images.
This restricts read-only images since those don't support the STORAGE flag. Such images are the planar images, which DO support the STORAGE flag but only for their subimages (e.g. individual planes). This is in spec, regardless what Intel says and disagrees with (they advise to alias memory instead). We don't use them anyway, and if a user wanted to repackage received frames as planar (or unpackage them) they still can.



>> +
>> +    err = create_exec_ctx(hwfc->device_ctx, &fp->cmd,
>> +                          dev_hwctx->queue_family_tx_index);
>> +    if (err)
>> +        return err;
>> +
>> +    /* Test to see if allocation will fail */
>> +    err = create_frame(hwfc, &f, hwctx->tiling, hwctx->usage,
>> +                       hwctx->create_pnext);
>> +    if (err)
>> +        return err;
>> +
>> +    vulkan_frame_free(hwfc, (uint8_t *)f);
>> +
>> +    hwfc->internal->pool_internal = av_buffer_pool_init2(sizeof(AVVkFrame),
>> +                                                         hwfc, vulkan_pool_alloc,
>> +                                                         NULL);
>> +    if (!hwfc->internal->pool_internal)
>> +        return AVERROR(ENOMEM);
>> +
>> +    return 0;
>> +}
>> +
>> ...
>> +
>> +static const struct {
>> +    uint32_t va_fourcc;
>>
>
> va_fourcc?
>

drm_fourcc now.



>> +    VkFormat vk_format;
>> +} vulkan_drm_format_map[] = {
>> +    { DRM_FORMAT_R8,       VK_FORMAT_R8_UNORM       },
>> +    { DRM_FORMAT_R16,      VK_FORMAT_R16_UNORM      },
>> +    { DRM_FORMAT_GR88,     VK_FORMAT_R8G8_UNORM     },
>> +    { DRM_FORMAT_RG88,     VK_FORMAT_R8G8_UNORM     },
>> +    { DRM_FORMAT_GR1616,   VK_FORMAT_R16G16_UNORM   },
>> +    { DRM_FORMAT_RG1616,   VK_FORMAT_R16G16_UNORM   },
>>
>
> Are RG88 and RG1616 right?  I thought you would always want them reversed.
>

I think so. I forgot why I did it that way, probably because of something I saw in hwcontext_vaapi. I'd rather leave them as-is for any weird implementation. Or Intel to break.



>> +{
>> +    for (int i = 0; i < FF_ARRAY_ELEMS(vulkan_drm_format_map); i++)
>> +        if (vulkan_drm_format_map[i].va_fourcc == va_fourcc)
>> +            return vulkan_drm_format_map[i].vk_format;
>> +    return VK_FORMAT_UNDEFINED;
>> +}
>> +
>> +static int vulkan_map_from_drm_frame_desc(AVHWFramesContext *hwfc, AVVkFrame **frame,
>> +                                          AVDRMFrameDescriptor *desc)
>> +{
>> +    int err = 0;
>> +    VkResult ret;
>> +    AVVkFrame *f;
>> +    AVHWDeviceContext *ctx = hwfc->device_ctx;
>> +    AVVulkanDeviceContext *hwctx = ctx->hwctx;
>> +    VulkanDevicePriv *p = ctx->internal->priv;
>> +    const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
>> +    const AVPixFmtDescriptor *fmt_desc = av_pix_fmt_desc_get(hwfc->sw_format);
>> +    const int has_modifiers = p->extensions & EXT_DRM_MODIFIER_FLAGS;
>> +    VkSubresourceLayout plane_data[AV_NUM_DATA_POINTERS];
>> +    VkBindImageMemoryInfo bind_info[AV_NUM_DATA_POINTERS];
>> +    VkExternalMemoryHandleTypeFlagBits htype = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT;
>> +
>> +    VK_LOAD_PFN(hwctx->inst, vkGetMemoryFdPropertiesKHR);
>> +
>> +    for (int i = 0; i < desc->nb_layers; i++) {
>> +        if (desc->layers[i].nb_planes > 1) {
>> +            av_log(ctx, AV_LOG_ERROR, "Cannot import DMABUFS with more than 1 "
>> +                                      "plane per layer!\n");
>> +            return AVERROR(EINVAL);
>> +        }
>> +
>> +        if (drm_to_vulkan_fmt(desc->layers[i].format) == VK_FORMAT_UNDEFINED) {
>> +            av_log(ctx, AV_LOG_ERROR, "Unsupported DMABUF layer format!\n");
>>
>
> Maybe say what the unsupported format is for someone reporting the message, since this is probably relatively easy to hit (e.g. YUYV).
>

Sure, I'll just use the handy DRM API/define to translate DRM FOURCC uint32_t to a string.
Oh wait, I can't because there is no such API. I've had to manually go through every single define in the past, relying on blind luck and speculation to match those up. DRM devs want people to suffer, as if the big-endian confusion-inducing FOURCC isn't evidence enough for it.
Not willing to NIH something to pull the chars out of it yet. Next time I have to look up a drm format id maybe.



>> +            return AVERROR(EINVAL);
>> +        }
>> +    }
>> +
>> +    if (!(f = av_mallocz(sizeof(*f)))) {
>> +        av_log(ctx, AV_LOG_ERROR, "Unable to allocate memory for AVVkFrame!\n");
>> +        err = AVERROR(ENOMEM);
>> +        goto fail;
>> +    }
>> +
>> +    for (int i = 0; i < desc->nb_objects; i++) {
>> +        VkMemoryFdPropertiesKHR fdmp = {
>> +            .sType = VK_STRUCTURE_TYPE_MEMORY_FD_PROPERTIES_KHR,
>> +        };
>> +        VkMemoryRequirements req = {
>> +            .size = desc->objects[i].size,
>> +        };
>> +        VkImportMemoryFdInfoKHR idesc = {
>> +            .sType      = VK_STRUCTURE_TYPE_IMPORT_MEMORY_FD_INFO_KHR,
>> +            .handleType = htype,
>> +            .fd         = desc->objects[i].fd,
>> +        };
>> +
>> +        ret = pfn_vkGetMemoryFdPropertiesKHR(hwctx->act_dev, htype,
>> +                                             desc->objects[i].fd, &fdmp);
>> +        if (ret != VK_SUCCESS) {
>> +            av_log(hwfc, AV_LOG_ERROR, "Failed to get FD properties: %s\n",
>> +                   vk_ret2str(ret));
>> +            err = AVERROR_EXTERNAL;
>> +            goto fail;
>> +        }
>> +
>> +        req.memoryTypeBits = fdmp.memoryTypeBits;
>> +
>> +        err = alloc_mem(ctx, &req, 0x0, &idesc, &f->flags, &f->mem[i]);
>> +        if (err)
>> +            return err;
>> +
>> +        f->size[i] = desc->objects[i].size;
>> +    }
>> +
>> +    f->tiling = has_modifiers ? VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT :
>> +                desc->objects[0].format_modifier == DRM_FORMAT_MOD_LINEAR ?
>> +                VK_IMAGE_TILING_LINEAR : VK_IMAGE_TILING_OPTIMAL;
>> +
>> +    for (int i = 0; i < desc->nb_layers; i++) {
>> +        VkImageDrmFormatModifierExplicitCreateInfoEXT drm_info = {
>> +            .sType = VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_EXPLICIT_CREATE_INFO_EXT,
>> +            .drmFormatModifier = desc->objects[0].format_modifier,
>> +            .drmFormatModifierPlaneCount = desc->layers[i].nb_planes,
>> +            .pPlaneLayouts = (const VkSubresourceLayout *)&plane_data,
>> +        };
>> +
>> +        VkExternalMemoryImageCreateInfo einfo = {
>> +            .sType       = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO,
>> +            .pNext       = has_modifiers ? &drm_info : NULL,
>> +            .handleTypes = htype,
>> +        };
>> +
>> +        VkSemaphoreCreateInfo sem_spawn = {
>> +            .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
>> +        };
>> +
>> +        const int p_w = i > 0 ? AV_CEIL_RSHIFT(hwfc->width, fmt_desc->log2_chroma_w) : hwfc->width;
>> +        const int p_h = i > 0 ? AV_CEIL_RSHIFT(hwfc->height, fmt_desc->log2_chroma_h) : hwfc->height;
>> +
>> +        VkImageCreateInfo image_create_info = {
>> +            .sType         = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
>> +            .pNext         = &einfo,
>> +            .imageType     = VK_IMAGE_TYPE_2D,
>> +            .format        = drm_to_vulkan_fmt(desc->layers[i].format),
>> +            .extent.width  = p_w,
>> +            .extent.height = p_h,
>> +            .extent.depth  = 1,
>> +            .mipLevels     = 1,
>> +            .arrayLayers   = 1,
>> +            .flags         = VK_IMAGE_CREATE_ALIAS_BIT,
>> +            .tiling        = f->tiling,
>> +            .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, /* specs say so */
>> +            .usage         = DEFAULT_USAGE_FLAGS,
>> +            .sharingMode   = VK_SHARING_MODE_EXCLUSIVE,
>> +            .samples       = VK_SAMPLE_COUNT_1_BIT,
>> +        };
>> +
>> +        for (int j = 0; j < desc->layers[i].nb_planes; j++) {
>> +            plane_data[j].offset     = desc->layers[i].planes[j].offset;
>> +            plane_data[j].rowPitch   = desc->layers[i].planes[j].pitch;
>> +            plane_data[j].size       = 0; /* The specs say so for all 3 */
>> +            plane_data[j].arrayPitch = 0;
>> +            plane_data[j].depthPitch = 0;
>> +        }
>> +
>> +        /* Create image */
>> +        ret = vkCreateImage(hwctx->act_dev, &image_create_info,
>> +                            hwctx->alloc, &f->img[i]);
>> +        if (ret != VK_SUCCESS) {
>> +            av_log(ctx, AV_LOG_ERROR, "Image creation failure: %s\n",
>> +                   vk_ret2str(ret));
>> +            err = AVERROR(EINVAL);
>> +            goto fail;
>> +        }
>> +
>> +        ret = vkCreateSemaphore(hwctx->act_dev, &sem_spawn,
>> +                                hwctx->alloc, &f->sem[i]);
>> +        if (ret != VK_SUCCESS) {
>> +            av_log(hwctx, AV_LOG_ERROR, "Failed to create semaphore: %s\n",
>> +                   vk_ret2str(ret));
>> +            return AVERROR_EXTERNAL;
>> +        }
>> +
>> +        /* We'd import a semaphore onto the one we created using
>> +         * vkImportSemaphoreFdKHR but unfortunately neither DRM nor VAAPI
>> +         * offer us anything we could import and sync with, so instead
>> +         * leave the semaphore unsignalled and enjoy the validation spam. */
>>
>
> I have some vague intent to look into this subject myself.  VAAPI needs proper async, and interop with Vulkan is an important use of that.
>

Would be nice. libdrm already has a public API to manage semaphores (create, destroy, export/import from fd, exactly what we need) but no way at all to link semaphores to a DRM surface. Its actually already used by drivers already too.



>> +
>> +        f->layout[i] = image_create_info.initialLayout;
>> +        f->access[i] = 0x0;
>> +
>> +        /* TODO: Fix to support more than 1 plane per layer */
>> +        bind_info[i].sType  = VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_INFO;
>> +        bind_info[i].pNext  = NULL;
>> +        bind_info[i].image  = f->img[i];
>> +        bind_info[i].memory = f->mem[desc->layers[i].planes[0].object_index];
>> +        bind_info[i].memoryOffset = desc->layers[i].planes[0].offset;
>> +    }
>> +
>> +    /* Bind the allocated memory to the images */
>> +    ret = vkBindImageMemory2(hwctx->act_dev, planes, bind_info);
>> +    if (ret != VK_SUCCESS) {
>> +        av_log(ctx, AV_LOG_ERROR, "Failed to bind memory: %s\n",
>> +               vk_ret2str(ret));
>> +        return AVERROR_EXTERNAL;
>> +    }
>> +
>> +    *frame = f;
>> +
>> +    return 0;
>> +
>> +fail:
>> +    for (int i = 0; i < planes; i++) {
>> +        vkDestroyImage(hwctx->act_dev, f->img[i], hwctx->alloc);
>> +        vkFreeMemory(hwctx->act_dev, f->mem[i], hwctx->alloc);
>> +        vkDestroySemaphore(hwctx->act_dev, f->sem[i], hwctx->alloc);
>> +    }
>> +
>> +    av_free(f);
>> +
>> +    return err;
>> +}
>> +
>> ...
>> +
>> +static int vulkan_map_to_drm(AVHWFramesContext *hwfc, AVFrame *dst,
>> +                             const AVFrame *src, int flags)
>> +{
>> +    int err = 0;
>> +    VkResult ret;
>> +    AVVkFrame *f = (AVVkFrame *)src->data[0];
>> +    VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
>> +    AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx;
>> +    const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
>> +    VK_LOAD_PFN(hwctx->inst, vkGetMemoryFdKHR);
>> +    VkImageDrmFormatModifierPropertiesEXT drm_mod = {
>> +        .sType = VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_PROPERTIES_EXT,
>> +    };
>>
>
> Do you need a sync here for any writing being finished, or is it implicit somehow below?
>

There isn't. This would be where we export a semaphore to VAAPI.
We could make a CPU sync by converting the image to read optimal and waiting on the command queue's semaphore, but that would need another exec context.
Should we?
I don't have a way to test this ATM, my test program is gone, so if I do touch it I'll need to write a new one, which is a lot of work. With ffmpeg I can only test raw exporting without it actually being used by anything, since everything complains about some missing contexts IIRC.



>> +
>> +    AVDRMFrameDescriptor *drm_desc = av_mallocz(sizeof(*drm_desc));
>> +    if (!drm_desc)
>> +        return AVERROR(ENOMEM);
>> +
>> +    err = ff_hwframe_map_create(src->hw_frames_ctx, dst, src, &vulkan_unmap_to_drm, drm_desc);
>> +    if (err < 0)
>> +        goto end;
>> +
>> +    if (p->extensions & EXT_DRM_MODIFIER_FLAGS) {
>> +        VK_LOAD_PFN(hwctx->inst, vkGetImageDrmFormatModifierPropertiesEXT);
>> +        ret = pfn_vkGetImageDrmFormatModifierPropertiesEXT(hwctx->act_dev, f->img[0],
>> +                                                           &drm_mod);
>> +        if (ret != VK_SUCCESS) {
>> +            av_log(hwfc, AV_LOG_ERROR, "Failed to retrieve DRM format modifier!\n");
>> +            err = AVERROR_EXTERNAL;
>> +            goto end;
>> +        }
>> +    }
>> +
>> +    for (int i = 0; (i < planes) && (f->mem[i]); i++) {
>> +        VkMemoryGetFdInfoKHR export_info = {
>> +            .sType      = VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR,
>> +            .memory     = f->mem[i],
>> +            .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT,
>> +        };
>> +
>> +        ret = pfn_vkGetMemoryFdKHR(hwctx->act_dev, &export_info,
>> +                                   &drm_desc->objects[i].fd);
>> +        if (ret != VK_SUCCESS) {
>> +            av_log(hwfc, AV_LOG_ERROR, "Unable to export the image as a FD!\n");
>> +            err = AVERROR_EXTERNAL;
>> +            goto end;
>> +        }
>> +
>> +        drm_desc->nb_objects++;
>> +        drm_desc->objects[i].size = f->size[i];
>> +        drm_desc->objects[i].format_modifier = drm_mod.drmFormatModifier;
>> +    }
>> +
>> +    drm_desc->nb_layers = planes;
>> +    for (int i = 0; i < drm_desc->nb_layers; i++) {
>> +        VkSubresourceLayout layout;
>> +        VkImageSubresource sub = {
>> +            .aspectMask = p->extensions & EXT_DRM_MODIFIER_FLAGS ?
>> +                          VK_IMAGE_ASPECT_MEMORY_PLANE_0_BIT_EXT :
>> +                          VK_IMAGE_ASPECT_COLOR_BIT,
>> +        };
>> +        VkFormat plane_vkfmt = av_vkfmt_from_pixfmt(hwfc->sw_format)[i];
>> +
>> +        drm_desc->layers[i].format    = vulkan_fmt_to_drm(plane_vkfmt);
>> +        drm_desc->layers[i].nb_planes = 1;
>> +
>> +        if (drm_desc->layers[i].format == DRM_FORMAT_INVALID) {
>> +            av_log(hwfc, AV_LOG_ERROR, "Cannot map to DRM layer, unsupported!\n");
>> +            err = AVERROR_PATCHWELCOME;
>> +            goto end;
>> +        }
>> +
>> +        drm_desc->layers[i].planes[0].object_index = FFMIN(i, drm_desc->nb_objects - 1);
>> +
>> +        if (f->tiling != VK_IMAGE_TILING_OPTIMAL)
>> +            continue;
>> +
>> +        vkGetImageSubresourceLayout(hwctx->act_dev, f->img[i], &sub, &layout);
>> +        drm_desc->layers[i].planes[0].offset       = layout.offset;
>> +        drm_desc->layers[i].planes[0].pitch        = layout.rowPitch;
>> +    }
>> +
>> +    dst->width   = src->width;
>> +    dst->height  = src->height;
>> +    dst->data[0] = (uint8_t *)drm_desc;
>> +
>> +    av_log(hwfc, AV_LOG_VERBOSE, "Mapped AVVkFrame to a DRM object!\n");
>> +
>> +    return 0;
>> +
>> +end:
>> +    av_free(drm_desc);
>> +    return err;
>> +}
>> +
>> ...
>> +
>> +/* Technically we can use VK_EXT_external_memory_host to upload and download,
>> + * however the alignment requirements make this unfeasible as both the pointer
>> + * and the size of each plane need to be aligned to the minimum alignment
>> + * requirement, which on all current implementations (anv, radv) is 4096.
>> + * If the requirement gets relaxed (unlikely) this can easily be implemented. */
>>
>
> What does the pointer alignment requirement actually apply to?
>
> (Could we lie about the start of the image by rounding to a page, and then do the transfer with an offset?)
>

The problem is the luma plane - in normal AVFrames the luma starts on the first data entry, and that's only aligned to the CPU vector size. That's orders of magnitude less that 4096 bytes.



>> ...
>> +
>> +static int vulkan_transfer_data_to_mem(AVHWFramesContext *hwfc, AVFrame *dst,
>> +                                       const AVFrame *src)
>> +{
>> +    int err = 0;
>> +    AVFrame tmp;
>> +    AVVkFrame *f = (AVVkFrame *)src->data[0];
>> +    AVHWDeviceContext *dev_ctx = hwfc->device_ctx;
>> +    ImageBuffer buf[AV_NUM_DATA_POINTERS] = { { 0 } };
>> +    const int planes = av_pix_fmt_count_planes(dst->format);
>> +    int log2_chroma = av_pix_fmt_desc_get(dst->format)->log2_chroma_h;
>> +
>> +    if (dst->width > hwfc->width || dst->height > hwfc->height)
>> +        return AVERROR(EINVAL);
>> +
>> +    /* For linear, host visiable images */
>> +    if (f->tiling == VK_IMAGE_TILING_LINEAR &&
>> +        f->flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) {
>>
>
> Is it generally expected that this is actually faster than the next option?  (Because evil uncached memory is a thing.)
>

Could be. On Intel it was faster, but now its as fast as tiled images. On NVIDIA its slower. Its still faster on Intel for simple short filter chains. Keeping it for cheap devices.



>> +        AVFrame *map = av_frame_alloc();
>> +        if (!map)
>> +            return AVERROR(ENOMEM);
>> +        map->format = dst->format;
>> +
>> +        err = vulkan_map_frame_to_mem(hwfc, map, src, AV_HWFRAME_MAP_READ);
>> +        if (err)
>> +            return err;
>> +
>> +        err = av_frame_copy(dst, map);
>> +        av_frame_free(&map);
>> +        return err;
>> +    }
>> +
>> +    /* Create buffers */
>> +    for (int i = 0; i < planes; i++) {
>> +        int h = dst->height;
>> +        int p_height = i > 0 ? AV_CEIL_RSHIFT(h, log2_chroma) : h;
>> +
>> +        tmp.linesize[i] = dst->linesize[i];
>> +        err = create_buf(dev_ctx, &buf[i], p_height,
>> +                         &tmp.linesize[i], VK_BUFFER_USAGE_TRANSFER_DST_BIT,
>> +                         VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, NULL, NULL);
>> +    }
>> +
>> +    /* Copy image to buffer */
>> +    if ((err = transfer_image_buf(dev_ctx, f, buf, tmp.linesize,
>> +                                  dst->width, dst->height, dst->format, 1)))
>> +        goto end;
>> +
>> +    /* Map, copy buffer to frame, unmap */
>> +    if ((err = map_buffers(dev_ctx, buf, tmp.data, planes, 1)))
>> +        goto end;
>> +
>> +    av_image_copy(dst->data, dst->linesize, (const uint8_t **)tmp.data,
>> +                  tmp.linesize, dst->format, dst->width, dst->height);
>> +
>> +    err = unmap_buffers(dev_ctx, buf, planes, 0);
>> +
>> +end:
>> +    for (int i = 0; i < planes; i++)
>> +        free_buf(dev_ctx, &buf[i]);
>> +
>> +    return err;
>> +}
>> ...
>> diff --git a/libavutil/hwcontext_vulkan.h b/libavutil/hwcontext_vulkan.h
>> new file mode 100644
>> index 0000000000..4146f14d6e
>> --- /dev/null
>> +++ b/libavutil/hwcontext_vulkan.h
>> @@ -0,0 +1,152 @@
>> +/*
>> + * This file is part of FFmpeg.
>> + *
>> + * FFmpeg is free software; you can redistribute it and/or
>> + * modify it under the terms of the GNU Lesser General Public
>> + * License as published by the Free Software Foundation; either
>> + * version 2.1 of the License, or (at your option) any later version.
>> + *
>> + * FFmpeg is distributed in the hope that it will be useful,
>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>> + * Lesser General Public License for more details.
>> + *
>> + * You should have received a copy of the GNU Lesser General Public
>> + * License along with FFmpeg; if not, write to the Free Software
>> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
>> + */
>> +
>> +#ifndef AVUTIL_HWCONTEXT_VULKAN_H
>> +#define AVUTIL_HWCONTEXT_VULKAN_H
>> +
>> +#include <vulkan/vulkan.h>
>> +
>> +/**
>> + * @file
>> + * API-specific header for AV_HWDEVICE_TYPE_VULKAN.
>> + *
>> + * For user-allocated pools, AVHWFramesContext.pool must return AVBufferRefs
>> + * with the data pointer set to an AVVkFrame.
>> + */
>> +
>> +/**
>> + * Main Vulkan context, allocated as AVHWDeviceContext.hwctx.
>> + * All of these can be set before init to change what the context uses
>> + */
>> +typedef struct AVVulkanDeviceContext {
>> +    /**
>> +     * Custom memory allocator, else NULL
>> +     */
>> +    const VkAllocationCallbacks *alloc;
>>
>
> Do you have some specific use-case in mind for this?  (You haven't used it in anything so far.)
>

For some weird API users who want/use a custom allocator. The spec does say to use the same allocator throughout all the API.
We could link it by default to av_malloc, but its a lot of unnecessary code.



>> +    /**
>> +     * Instance
>> +     */
>> +    VkInstance inst;
>> +    /**
>> +     * Physical device
>> +     */
>> +    VkPhysicalDevice phys_dev;
>> +    /**
>> +     * Activated physical device
>> +     */
>> +    VkDevice act_dev;
>>
>
> I weakly argue for not abbreviating names in the public API like this (but feel free to ignore me).
>

They're commented. I'd rather not go through so many lines of code realigning every substitution.



>> +    /**
>> +     * Queue family index for graphics
>> +     */
>> +    int queue_family_index;
>> +    /**
>> +     * Queue family index for transfer ops only. By default, the priority order
>> +     * is dedicated transfer > dedicated compute > graphics.
>> +     */
>> +    int queue_family_tx_index;
>>
>
> In my experience "tx" is always short for "transmit", not for "transfer".
>

In codecs it rarely means anything other than "transform". In radio and some other software it rarely means anything other than "transfer". I'll keep it.



>> +    /**
>> +     * Queue family index for compute ops. Will be equal to the graphics
>> +     * one unless a dedicated transfer queue is found.
>> +     */
>> +    int queue_family_comp_index;
>> +    /**
>> +     * The UUID of the selected physical device.
>> +     */
>> +    uint8_t device_uuid[VK_UUID_SIZE];
>> +} AVVulkanDeviceContext;
>> +
>> +/**
>> + * Allocated as AVHWFramesContext.hwctx, used to set pool-specific options
>> + */
>> +typedef struct AVVulkanFramesContext {
>> +    /**
>> +     * Controls the tiling of output frames.
>> +     */
>> +    VkImageTiling tiling;
>> +    /**
>> +     * Defines extra usage of output frames. This is bitwise OR'd with the
>> +     * standard usage flags (SAMPLED, STORAGE, TRANSFER_SRC and TRANSFER_DST).
>>
>
> (Referred to somewhere above.)
>
>> +     */
>> +    VkImageUsageFlagBits usage;
>> +    /**
>> +     * Extension data for image creation. By default, if the extension is
>> +     * available, this will be chained to a VkImageFormatListCreateInfoKHR.
>> +     */
>> +    void *create_pnext;
>> +    /**
>> +     * Extension data for memory allocation. Must have as many entries as
>> +     * the number of planes of the sw_format.
>> +     * This will be chained to VkExportMemoryAllocateInfo, which is used
>> +     * to make all pool images exportable to other APIs.
>> +     */
>> +    void *alloc_pnext[AV_NUM_DATA_POINTERS];
>> +} AVVulkanFramesContext;
>> +
>> +/*
>> + * Frame structure, the VkFormat of the image will always match
>> + * the pool's sw_format.
>> + * All frames, imported or allocated, will be created with the
>> + * VK_IMAGE_CREATE_ALIAS_BIT flag set, so the memory may be aliased if needed.
>> + */
>> +typedef struct AVVkFrame {
>> +    /**
>> +     * Vulkan images to which the memory is bound to.
>> +     */
>> +    VkImage img[AV_NUM_DATA_POINTERS];
>> +
>> +    /**
>> +     * Same tiling must be used for all images.
>> +     */
>> +    VkImageTiling tiling;
>> +
>> +    /**
>> +     * Memory backing the images. Could be less than the amount of images
>> +     * if importing from a DRM or VAAPI frame.
>>
>
> Or absent entirely?
>

No, there must always be memory backing the image. With DRM/VAAPI they're the imported FD(s).



>> +     */
>> +    VkDeviceMemory mem[AV_NUM_DATA_POINTERS];
>> +    size_t size[AV_NUM_DATA_POINTERS];
>> +
>> +    /**
>> +     * OR'd flags for all memory allocated
>> +     */
>> +    VkMemoryPropertyFlagBits flags;
>> +
>> +    /**
>> +     * Updated after every barrier
>> +     */
>> +    VkAccessFlagBits access[AV_NUM_DATA_POINTERS];
>> +    VkImageLayout layout[AV_NUM_DATA_POINTERS];
>> +
>> +    /**
>> +     * Per-image semaphores. Must not be freed manually. Must be waited on
>> +     * and signalled at every queue submission.
>>
>
> Perhaps a little more explanation of exactly what is needed on reads/writes would be helpful here.  As written it sounds like multiple readers must be serialised by it, which I'm not sure is intended.
>

Yes, they must be serialized. Not sure how else to explain it, I think the text is clear to me.



>> +     */
>> +    VkSemaphore sem[AV_NUM_DATA_POINTERS];
>> +
>> +    /**
>> +     * Internal data.
>> +     */
>> +    struct AVVkFrameInternal *internal;
>> +} AVVkFrame;
>> +
>> +/**
>> + * Returns the format of each image up to the number of planes for a given sw_format.
>> + */
>> +const VkFormat *av_vkfmt_from_pixfmt(enum AVPixelFormat p);
>> +
>> +#endif /* AVUTIL_HWCONTEXT_VULKAN_H */
>> ...
>> diff --git a/libavutil/pixfmt.h b/libavutil/pixfmt.h
>> index 37ecebd501..5640c9f23d 100644
>> --- a/libavutil/pixfmt.h
>> +++ b/libavutil/pixfmt.h
>> @@ -348,6 +348,13 @@ enum AVPixelFormat {
>>  AV_PIX_FMT_NV24,      ///< planar YUV 4:4:4, 24bpp, 1 plane for Y and 1 plane for the UV components, which are interleaved (first byte U and the following byte V)
>>  AV_PIX_FMT_NV42,      ///< as above, but U and V bytes are swapped
>>  
>> +    /**
>> +     * Vulkan hardware images.
>> +     *
>> +     * data[0] contain an AVVkFrame
>>
>
> points to an AVVkFrame?
>

Fixed.



>> +     */
>> +    AV_PIX_FMT_VULKAN,
>> +
>>  AV_PIX_FMT_NB         ///< number of pixel formats, DO NOT USE THIS if you want to link with shared libav* because the number of formats might differ between versions
>>  };
>>  
>> -- 
>> 2.25.0.rc2
>>
>
> Thanks,
>
> - Mark
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel at ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request at ffmpeg.org with subject "unsubscribe".
>



More information about the ffmpeg-devel mailing list