[FFmpeg-devel] [PATCH] Allow using primary CUDA device context

Timo Rothenpieler timo at rothenpieler.org
Mon Nov 18 00:31:29 EET 2019


On 17.11.2019 15:58, Oleg Dobkin wrote:
> Add AVCUDADeviceContextFlags to control the creation of CUDA device
> context for the hardware CUDA decoder.
> 
> The current values are 0 (default behavior) - new context will be
> created for each decoder, and 1 - primary CUDA context will be used.
> 
> There are several reasons for using primary device context instead of
> creating a new one:
> 
>   - This is the recommended way to handle device contexts (see
> https://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__CTX.html#group__CUDA__CTX_1g65dc0012348bc84810e2103a40d8e2cf)
> 
>   - Memory allocations, kernels and other state are associated with the
> current device context. Currently, the context is not accessible from
> FFmpeg API, so, technically, the memory created by the hardware decoder
> (the video frame) can't be safely read.
> 
> Signed-off-by: Oleg Dobkin <olegd at anyvision.co>
> ---
>   libavutil/hwcontext_cuda.c | 20 +++++++++++++++-----
>   libavutil/hwcontext_cuda.h |  7 +++++++
>   2 files changed, 22 insertions(+), 5 deletions(-)
> 
> diff --git a/libavutil/hwcontext_cuda.c b/libavutil/hwcontext_cuda.c
> index cca39e9fc7..608ea57569 100644
> --- a/libavutil/hwcontext_cuda.c
> +++ b/libavutil/hwcontext_cuda.c
> @@ -281,8 +281,12 @@ static void cuda_device_uninit(AVHWDeviceContext *device_ctx)
>       if (hwctx->internal) {
>           CudaFunctions *cu = hwctx->internal->cuda_dl;
>           if (hwctx->internal->is_allocated && hwctx->cuda_ctx) {
> -            CHECK_CU(cu->cuCtxDestroy(hwctx->cuda_ctx));
> +            if (hwctx->flags == DCF_CREATE_CONTEXT)

Should actually be checking for the flag, not equality.

> +                CHECK_CU(cu->cuCtxDestroy(hwctx->cuda_ctx));
> +            else
> +                CHECK_CU(cu->cuDevicePrimaryCtxRelease(hwctx->cuda_device));
>               hwctx->cuda_ctx = NULL;
> +            hwctx->cuda_device = NULL;
>           }
>           cuda_free_functions(&hwctx->internal->cuda_dl);
>       }
> @@ -322,7 +326,6 @@ static int cuda_device_create(AVHWDeviceContext *device_ctx,
>   {
>       AVCUDADeviceContext *hwctx = device_ctx->hwctx;
>       CudaFunctions *cu;
> -    CUdevice cu_device;
>       CUcontext dummy;
>       int ret, device_idx = 0;
>   
> @@ -338,18 +341,25 @@ static int cuda_device_create(AVHWDeviceContext *device_ctx,
>       if (ret < 0)
>           goto error;
>   
> -    ret = CHECK_CU(cu->cuDeviceGet(&cu_device, device_idx));
> +    ret = CHECK_CU(cu->cuDeviceGet(&hwctx->cuda_device, device_idx));
>       if (ret < 0)
>           goto error;
>   
> -    ret = CHECK_CU(cu->cuCtxCreate(&hwctx->cuda_ctx, CU_CTX_SCHED_BLOCKING_SYNC, cu_device));
> +    hwctx->flags = flags;
> +
> +    if (flags == DCF_CREATE_CONTEXT)
> +        ret = CHECK_CU(cu->cuCtxCreate(&hwctx->cuda_ctx, CU_CTX_SCHED_BLOCKING_SYNC, hwctx->cuda_device));
> +    else
> +        ret = CHECK_CU(cu->cuDevicePrimaryCtxRetain(&hwctx->cuda_ctx, hwctx->cuda_device));
> +
>       if (ret < 0)
>           goto error;
>   
>       // Setting stream to NULL will make functions automatically use the default CUstream
>       hwctx->stream = NULL;
>   
> -    CHECK_CU(cu->cuCtxPopCurrent(&dummy));
> +    if (flags == DCF_CREATE_CONTEXT)
> +        CHECK_CU(cu->cuCtxPopCurrent(&dummy));
>   
>       hwctx->internal->is_allocated = 1;
>   
> diff --git a/libavutil/hwcontext_cuda.h b/libavutil/hwcontext_cuda.h
> index 81a0552cab..bab5eefe54 100644
> --- a/libavutil/hwcontext_cuda.h
> +++ b/libavutil/hwcontext_cuda.h
> @@ -34,6 +34,11 @@
>    * AVBufferRefs whose data pointer is a CUdeviceptr.
>    */
>   
> +enum AVCUDADeviceContextFlags {
> +    DCF_CREATE_CONTEXT = 0,
> +    DCF_USE_PRIMARY_CONTEXT = 1
> +};

I'd only define a flag for the new behavior. If it's not set, keep old 
behavior.

>   typedef struct AVCUDADeviceContextInternal AVCUDADeviceContextInternal;
>   
>   /**
> @@ -43,6 +48,8 @@ typedef struct AVCUDADeviceContext {
>       CUcontext cuda_ctx;
>       CUstream stream;
>       AVCUDADeviceContextInternal *internal;
> +    CUdevice cuda_device;

Can't one just call cuCtxGetDevice on the context to get the device?

> +    enum AVCUDADeviceContextFlags flags;

The device_create/av_hwdevice_ctx_create function already has a (at the 
moment unused) flags parameter. So there should be no need to add this here.
If need be, the information should be stored in 
AVCUDADeviceContextInternal instead.

>   } AVCUDADeviceContext;
>   

Also needs configure updated for the higher ffnvcodec version that's 
required after this patch, and probably deserved a lavu micro bump.




More information about the ffmpeg-devel mailing list