[FFmpeg-devel] [PATCH] Allow using primary CUDA device context
Timo Rothenpieler
timo at rothenpieler.org
Mon Nov 18 00:31:29 EET 2019
On 17.11.2019 15:58, Oleg Dobkin wrote:
> Add AVCUDADeviceContextFlags to control the creation of CUDA device
> context for the hardware CUDA decoder.
>
> The current values are 0 (default behavior) - new context will be
> created for each decoder, and 1 - primary CUDA context will be used.
>
> There are several reasons for using primary device context instead of
> creating a new one:
>
> - This is the recommended way to handle device contexts (see
> https://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__CTX.html#group__CUDA__CTX_1g65dc0012348bc84810e2103a40d8e2cf)
>
> - Memory allocations, kernels and other state are associated with the
> current device context. Currently, the context is not accessible from
> FFmpeg API, so, technically, the memory created by the hardware decoder
> (the video frame) can't be safely read.
>
> Signed-off-by: Oleg Dobkin <olegd at anyvision.co>
> ---
> libavutil/hwcontext_cuda.c | 20 +++++++++++++++-----
> libavutil/hwcontext_cuda.h | 7 +++++++
> 2 files changed, 22 insertions(+), 5 deletions(-)
>
> diff --git a/libavutil/hwcontext_cuda.c b/libavutil/hwcontext_cuda.c
> index cca39e9fc7..608ea57569 100644
> --- a/libavutil/hwcontext_cuda.c
> +++ b/libavutil/hwcontext_cuda.c
> @@ -281,8 +281,12 @@ static void cuda_device_uninit(AVHWDeviceContext *device_ctx)
> if (hwctx->internal) {
> CudaFunctions *cu = hwctx->internal->cuda_dl;
> if (hwctx->internal->is_allocated && hwctx->cuda_ctx) {
> - CHECK_CU(cu->cuCtxDestroy(hwctx->cuda_ctx));
> + if (hwctx->flags == DCF_CREATE_CONTEXT)
Should actually be checking for the flag, not equality.
> + CHECK_CU(cu->cuCtxDestroy(hwctx->cuda_ctx));
> + else
> + CHECK_CU(cu->cuDevicePrimaryCtxRelease(hwctx->cuda_device));
> hwctx->cuda_ctx = NULL;
> + hwctx->cuda_device = NULL;
> }
> cuda_free_functions(&hwctx->internal->cuda_dl);
> }
> @@ -322,7 +326,6 @@ static int cuda_device_create(AVHWDeviceContext *device_ctx,
> {
> AVCUDADeviceContext *hwctx = device_ctx->hwctx;
> CudaFunctions *cu;
> - CUdevice cu_device;
> CUcontext dummy;
> int ret, device_idx = 0;
>
> @@ -338,18 +341,25 @@ static int cuda_device_create(AVHWDeviceContext *device_ctx,
> if (ret < 0)
> goto error;
>
> - ret = CHECK_CU(cu->cuDeviceGet(&cu_device, device_idx));
> + ret = CHECK_CU(cu->cuDeviceGet(&hwctx->cuda_device, device_idx));
> if (ret < 0)
> goto error;
>
> - ret = CHECK_CU(cu->cuCtxCreate(&hwctx->cuda_ctx, CU_CTX_SCHED_BLOCKING_SYNC, cu_device));
> + hwctx->flags = flags;
> +
> + if (flags == DCF_CREATE_CONTEXT)
> + ret = CHECK_CU(cu->cuCtxCreate(&hwctx->cuda_ctx, CU_CTX_SCHED_BLOCKING_SYNC, hwctx->cuda_device));
> + else
> + ret = CHECK_CU(cu->cuDevicePrimaryCtxRetain(&hwctx->cuda_ctx, hwctx->cuda_device));
> +
> if (ret < 0)
> goto error;
>
> // Setting stream to NULL will make functions automatically use the default CUstream
> hwctx->stream = NULL;
>
> - CHECK_CU(cu->cuCtxPopCurrent(&dummy));
> + if (flags == DCF_CREATE_CONTEXT)
> + CHECK_CU(cu->cuCtxPopCurrent(&dummy));
>
> hwctx->internal->is_allocated = 1;
>
> diff --git a/libavutil/hwcontext_cuda.h b/libavutil/hwcontext_cuda.h
> index 81a0552cab..bab5eefe54 100644
> --- a/libavutil/hwcontext_cuda.h
> +++ b/libavutil/hwcontext_cuda.h
> @@ -34,6 +34,11 @@
> * AVBufferRefs whose data pointer is a CUdeviceptr.
> */
>
> +enum AVCUDADeviceContextFlags {
> + DCF_CREATE_CONTEXT = 0,
> + DCF_USE_PRIMARY_CONTEXT = 1
> +};
I'd only define a flag for the new behavior. If it's not set, keep old
behavior.
> typedef struct AVCUDADeviceContextInternal AVCUDADeviceContextInternal;
>
> /**
> @@ -43,6 +48,8 @@ typedef struct AVCUDADeviceContext {
> CUcontext cuda_ctx;
> CUstream stream;
> AVCUDADeviceContextInternal *internal;
> + CUdevice cuda_device;
Can't one just call cuCtxGetDevice on the context to get the device?
> + enum AVCUDADeviceContextFlags flags;
The device_create/av_hwdevice_ctx_create function already has a (at the
moment unused) flags parameter. So there should be no need to add this here.
If need be, the information should be stored in
AVCUDADeviceContextInternal instead.
> } AVCUDADeviceContext;
>
Also needs configure updated for the higher ffnvcodec version that's
required after this patch, and probably deserved a lavu micro bump.
More information about the ffmpeg-devel
mailing list