Re: [FFmpeg-devel] [PATCH] Allow using primary CUDA device context

Oleg Dobkin Mon, 18 Nov 2019 03:00:22 -0800

On Sun, 2019-11-17 at 23:31 +0100, Timo Rothenpieler wrote:
> On 17.11.2019 15:58, Oleg Dobkin wrote:
> 
> Add AVCUDADeviceContextFlags to control the creation of CUDA device
> context for the hardware CUDA decoder.
> 
> The current values are 0 (default behavior) - new context will be
> created for each decoder, and 1 - primary CUDA context will be used.
> 
> There are several reasons for using primary device context instead of
> creating a new one:
> 
>   - This is the recommended way to handle device contexts (see
> 
https://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__CTX.html#group__CUDA__CTX_1g65dc0012348bc84810e2103a40d8e2cf
> )
> 
>   - Memory allocations, kernels and other state are associated with
> the
> current device context. Currently, the context is not accessible from
> FFmpeg API, so, technically, the memory created by the hardware
> decoder
> (the video frame) can't be safely read.
> 
> Signed-off-by: Oleg Dobkin <ol...@anyvision.co>
> ---
>   libavutil/hwcontext_cuda.c | 20 +++++++++++++++-----
>   libavutil/hwcontext_cuda.h |  7 +++++++
>   2 files changed, 22 insertions(+), 5 deletions(-)
> 
> diff --git a/libavutil/hwcontext_cuda.c b/libavutil/hwcontext_cuda.c
> index cca39e9fc7..608ea57569 100644
> --- a/libavutil/hwcontext_cuda.c
> +++ b/libavutil/hwcontext_cuda.c
> @@ -281,8 +281,12 @@ static void cuda_device_uninit(AVHWDeviceContext
> *device_ctx)
>       if (hwctx->internal) {
>           CudaFunctions *cu = hwctx->internal->cuda_dl;
>           if (hwctx->internal->is_allocated && hwctx->cuda_ctx) {
> -            CHECK_CU(cu->cuCtxDestroy(hwctx->cuda_ctx));
> +            if (hwctx->flags == DCF_CREATE_CONTEXT)
> 
> Should actually be checking for the flag, not equality.
> 
> 
> +                CHECK_CU(cu->cuCtxDestroy(hwctx->cuda_ctx));
> +            else
> +                CHECK_CU(cu->cuDevicePrimaryCtxRelease(hwctx-
> >cuda_device));
>               hwctx->cuda_ctx = NULL;
> +            hwctx->cuda_device = NULL;
>           }
>           cuda_free_functions(&hwctx->internal->cuda_dl);
>       }
> @@ -322,7 +326,6 @@ static int cuda_device_create(AVHWDeviceContext
> *device_ctx,
>   {
>       AVCUDADeviceContext *hwctx = device_ctx->hwctx;
>       CudaFunctions *cu;
> -    CUdevice cu_device;
>       CUcontext dummy;
>       int ret, device_idx = 0;
>   
> @@ -338,18 +341,25 @@ static int cuda_device_create(AVHWDeviceContext
> *device_ctx,
>       if (ret < 0)
>           goto error;
>   
> -    ret = CHECK_CU(cu->cuDeviceGet(&cu_device, device_idx));
> +    ret = CHECK_CU(cu->cuDeviceGet(&hwctx->cuda_device,
> device_idx));
>       if (ret < 0)
>           goto error;
>   
> -    ret = CHECK_CU(cu->cuCtxCreate(&hwctx->cuda_ctx,
> CU_CTX_SCHED_BLOCKING_SYNC, cu_device));
> +    hwctx->flags = flags;
> +
> +    if (flags == DCF_CREATE_CONTEXT)
> +        ret = CHECK_CU(cu->cuCtxCreate(&hwctx->cuda_ctx,
> CU_CTX_SCHED_BLOCKING_SYNC, hwctx->cuda_device));
> +    else
> +        ret = CHECK_CU(cu->cuDevicePrimaryCtxRetain(&hwctx-
> >cuda_ctx, hwctx->cuda_device));
> +
>       if (ret < 0)
>           goto error;
>   
>       // Setting stream to NULL will make functions automatically use
> the default CUstream
>       hwctx->stream = NULL;
>   
> -    CHECK_CU(cu->cuCtxPopCurrent(&dummy));
> +    if (flags == DCF_CREATE_CONTEXT)
> +        CHECK_CU(cu->cuCtxPopCurrent(&dummy));
>   
>       hwctx->internal->is_allocated = 1;
>   
> diff --git a/libavutil/hwcontext_cuda.h b/libavutil/hwcontext_cuda.h
> index 81a0552cab..bab5eefe54 100644
> --- a/libavutil/hwcontext_cuda.h
> +++ b/libavutil/hwcontext_cuda.h
> @@ -34,6 +34,11 @@
>    * AVBufferRefs whose data pointer is a CUdeviceptr.
>    */
>   
> +enum AVCUDADeviceContextFlags {
> +    DCF_CREATE_CONTEXT = 0,
> +    DCF_USE_PRIMARY_CONTEXT = 1
> +};
> 
> I'd only define a flag for the new behavior. If it's not set, keep
> old 
> behavior.
> 
> 
>   typedef struct AVCUDADeviceContextInternal
> AVCUDADeviceContextInternal;
>   
>   /**
> @@ -43,6 +48,8 @@ typedef struct AVCUDADeviceContext {
>       CUcontext cuda_ctx;
>       CUstream stream;
>       AVCUDADeviceContextInternal *internal;
> +    CUdevice cuda_device;
> 
> Can't one just call cuCtxGetDevice on the context to get the device?
> 
> 
> +    enum AVCUDADeviceContextFlags flags;
> 
> The device_create/av_hwdevice_ctx_create function already has a (at
> the 
> moment unused) flags parameter. So there should be no need to add
> this here.
> If need be, the information should be stored in 
> AVCUDADeviceContextInternal instead.
> 
> 
>   } AVCUDADeviceContext;
>   
> 
> Also needs configure updated for the higher ffnvcodec version that's 
> required after this patch, and probably deserved a lavu micro bump.
> 
> 
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
> 
> To unsubscribe, visit link above, or email
> ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

From a62c5ab691d409c69bd55ede8ae73aeef6d5d0f5 Mon Sep 17 00:00:00 2001
From: Oleg Dobkin <ol...@anyvision.co>
Date: Sun, 17 Nov 2019 16:32:45 +0200
Subject: [PATCH] Allow using primary CUDA device context


Add AVCUDADeviceContextFlags to control the creation of CUDA device
context for the hardware CUDA decoder.

The current values are 0 (default behavior) - new context will be
created for each decoder, and 1 - primary CUDA context will be used.

There are several reasons for using primary device context instead of
creating a new one:

 - This is the recommended way to handle device contexts (see
https://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__CTX.html#group__CUDA__CTX_1g65dc0012348bc84810e2103a40d8e2cf)

 - Memory allocations, kernels and other state are associated with the
current device context. Currently, the context is not accessible from
FFmpeg API, so, technically, the memory created by the hardware decoder
(the video frame) can't be safely read.

Signed-off-by: Oleg Dobkin <ol...@anyvision.co>
---
 libavutil/hwcontext_cuda.c          | 22 +++++++++++++++++-----
 libavutil/hwcontext_cuda_internal.h |  2 ++
 2 files changed, 19 insertions(+), 5 deletions(-)

diff --git a/libavutil/hwcontext_cuda.c b/libavutil/hwcontext_cuda.c
index cca39e9fc7..e72efbe5af 100644
--- a/libavutil/hwcontext_cuda.c
+++ b/libavutil/hwcontext_cuda.c
@@ -29,6 +29,8 @@
 
 #define CUDA_FRAME_ALIGNMENT 256
 
+#define USE_PRIMARY_CONTEXT 1
+
 typedef struct CUDAFramesContext {
     int shift_width, shift_height;
 } CUDAFramesContext;
@@ -281,8 +283,12 @@ static void cuda_device_uninit(AVHWDeviceContext *device_ctx)
     if (hwctx->internal) {
         CudaFunctions *cu = hwctx->internal->cuda_dl;
         if (hwctx->internal->is_allocated && hwctx->cuda_ctx) {
-            CHECK_CU(cu->cuCtxDestroy(hwctx->cuda_ctx));
+            if (hwctx->internal->flags & USE_PRIMARY_CONTEXT)
+                CHECK_CU(cu->cuDevicePrimaryCtxRelease(hwctx->internal->cuda_device));
+            else
+                CHECK_CU(cu->cuCtxDestroy(hwctx->cuda_ctx));
             hwctx->cuda_ctx = NULL;
+            hwctx->internal->cuda_device = NULL;
         }
         cuda_free_functions(&hwctx->internal->cuda_dl);
     }
@@ -322,7 +328,6 @@ static int cuda_device_create(AVHWDeviceContext *device_ctx,
 {
     AVCUDADeviceContext *hwctx = device_ctx->hwctx;
     CudaFunctions *cu;
-    CUdevice cu_device;
     CUcontext dummy;
     int ret, device_idx = 0;
 
@@ -338,18 +343,25 @@ static int cuda_device_create(AVHWDeviceContext *device_ctx,
     if (ret < 0)
         goto error;
 
-    ret = CHECK_CU(cu->cuDeviceGet(&cu_device, device_idx));
+    ret = CHECK_CU(cu->cuDeviceGet(&hwctx->internal->cuda_device, device_idx));
     if (ret < 0)
         goto error;
 
-    ret = CHECK_CU(cu->cuCtxCreate(&hwctx->cuda_ctx, CU_CTX_SCHED_BLOCKING_SYNC, cu_device));
+    hwctx->internal->flags = flags;
+
+    if (flags & USE_PRIMARY_CONTEXT)
+        ret = CHECK_CU(cu->cuDevicePrimaryCtxRetain(&hwctx->cuda_ctx, hwctx->internal->cuda_device));
+    else
+        ret = CHECK_CU(cu->cuCtxCreate(&hwctx->cuda_ctx, CU_CTX_SCHED_BLOCKING_SYNC, hwctx->internal->cuda_device));
+
     if (ret < 0)
         goto error;
 
     // Setting stream to NULL will make functions automatically use the default CUstream
     hwctx->stream = NULL;
 
-    CHECK_CU(cu->cuCtxPopCurrent(&dummy));
+    if (!(flags & USE_PRIMARY_CONTEXT))
+        CHECK_CU(cu->cuCtxPopCurrent(&dummy));
 
     hwctx->internal->is_allocated = 1;
 
diff --git a/libavutil/hwcontext_cuda_internal.h b/libavutil/hwcontext_cuda_internal.h
index e1bc6ff350..d5633c58d5 100644
--- a/libavutil/hwcontext_cuda_internal.h
+++ b/libavutil/hwcontext_cuda_internal.h
@@ -31,6 +31,8 @@
 struct AVCUDADeviceContextInternal {
     CudaFunctions *cuda_dl;
     int is_allocated;
+    CUdevice cuda_device;
+    int flags;
 };
 
 #endif /* AVUTIL_HWCONTEXT_CUDA_INTERNAL_H */
-- 
2.17.1

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

Re: [FFmpeg-devel] [PATCH] Allow using primary CUDA device context

Reply via email to