[FFmpeg-devel] [PATCH] avcodec/cuviddec: update amount of decoder surfaces from within sequence decode callback

Roman Arzumanyan Mon, 05 Jun 2023 00:30:37 -0700

Hello,

This patch reduces vRAM usage by cuvid decoder implementation.
The number of surfaces used for decoding is updated within the parser
sequence decode callback.
Also the "surfaces" AVDictionary option specific to cuvid was removed in
favor of "extra_hw_surfaces".


vRAM consumption was tested on various videos and savings are between 1%
for 360p resolution up to 21% for some 1080p H.264 videos.
Decoding performance was tested on various H.264 and H.265 videos in
different resolutions from 360p and higher, no performance penalty was
found.

From 32a1b016e88fa40b983318d4583750ef250a78d9 Mon Sep 17 00:00:00 2001
From: Roman Arzumanyan <r.arzumanyan@visionlabs.ai>
Date: Thu, 1 Jun 2023 11:17:39 +0300
Subject: [PATCH] libavcodec/cuviddec: determine DPB size from within cuvid
 parser

---
 libavcodec/cuviddec.c | 29 +++++++++++++++++++++++++++--
 1 file changed, 27 insertions(+), 2 deletions(-)

diff --git a/libavcodec/cuviddec.c b/libavcodec/cuviddec.c
index 3d43bbd466..759ed49870 100644
--- a/libavcodec/cuviddec.c
+++ b/libavcodec/cuviddec.c
@@ -115,6 +115,12 @@ typedef struct CuvidParsedFrame
 
 #define CHECK_CU(x) FF_CUDA_CHECK_DL(avctx, ctx->cudl, x)
 
+// NV recommends [2;4] range
+#define CUVID_MAX_DISPLAY_DELAY (4)
+
+// Actual DPB size will be determined by parser.
+#define CUVID_DEFAULT_NUM_SURFACES (CUVID_MAX_DISPLAY_DELAY + 1)
+
 static int CUDAAPI cuvid_handle_video_sequence(void *opaque, CUVIDEOFORMAT* format)
 {
     AVCodecContext *avctx = opaque;
@@ -309,6 +315,25 @@ static int CUDAAPI cuvid_handle_video_sequence(void *opaque, CUVIDEOFORMAT* form
         return 0;
     }
 
+    if (ctx->nb_surfaces < format->min_num_decode_surfaces + 3)
+        ctx->nb_surfaces = format->min_num_decode_surfaces + 3;
+
+    if (avctx->extra_hw_frames > 0)
+        ctx->nb_surfaces += avctx->extra_hw_frames;
+
+    if (0 > av_fifo_realloc2(ctx->frame_queue, ctx->nb_surfaces * sizeof(CuvidParsedFrame))) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to recreate frame queue on video sequence callback\n");
+        ctx->internal_error = AVERROR(EINVAL);
+        return 0;
+    }
+
+    ctx->key_frame = av_realloc_array(ctx->key_frame, ctx->nb_surfaces, sizeof(int));
+    if (!ctx->key_frame) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to recreate key frame queue on video sequence callback\n");
+        ctx->internal_error = AVERROR(EINVAL);
+        return 0;
+    }
+
     cuinfo.ulNumDecodeSurfaces = ctx->nb_surfaces;
     cuinfo.ulNumOutputSurfaces = 1;
     cuinfo.ulCreationFlags = cudaVideoCreate_PreferCUVID;
@@ -846,6 +871,7 @@ static av_cold int cuvid_decode_init(AVCodecContext *avctx)
         goto error;
     }
 
+    ctx->nb_surfaces = CUVID_DEFAULT_NUM_SURFACES;
     ctx->frame_queue = av_fifo_alloc2(ctx->nb_surfaces, sizeof(CuvidParsedFrame), 0);
     if (!ctx->frame_queue) {
         ret = AVERROR(ENOMEM);
@@ -993,7 +1019,7 @@ static av_cold int cuvid_decode_init(AVCodecContext *avctx)
     }
 
     ctx->cuparseinfo.ulMaxNumDecodeSurfaces = ctx->nb_surfaces;
-    ctx->cuparseinfo.ulMaxDisplayDelay = (avctx->flags & AV_CODEC_FLAG_LOW_DELAY) ? 0 : 4;
+    ctx->cuparseinfo.ulMaxDisplayDelay = (avctx->flags & AV_CODEC_FLAG_LOW_DELAY) ? 0 : CUVID_MAX_DISPLAY_DELAY;
     ctx->cuparseinfo.pUserData = avctx;
     ctx->cuparseinfo.pfnSequenceCallback = cuvid_handle_video_sequence;
     ctx->cuparseinfo.pfnDecodePicture = cuvid_handle_picture_decode;
@@ -1097,7 +1123,6 @@ static const AVOption options[] = {
     { "bob",      "Bob deinterlacing",                       0, AV_OPT_TYPE_CONST, { .i64 = cudaVideoDeinterlaceMode_Bob      }, 0, 0, VD, "deint" },
     { "adaptive", "Adaptive deinterlacing",                  0, AV_OPT_TYPE_CONST, { .i64 = cudaVideoDeinterlaceMode_Adaptive }, 0, 0, VD, "deint" },
     { "gpu",      "GPU to be used for decoding", OFFSET(cu_gpu), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, VD },
-    { "surfaces", "Maximum surfaces to be used for decoding", OFFSET(nb_surfaces), AV_OPT_TYPE_INT, { .i64 = 25 }, 0, INT_MAX, VD },
     { "drop_second_field", "Drop second field when deinterlacing", OFFSET(drop_second_field), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VD },
     { "crop",     "Crop (top)x(bottom)x(left)x(right)", OFFSET(crop_expr), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, VD },
     { "resize",   "Resize (width)x(height)", OFFSET(resize_expr), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, VD },
-- 
2.34.1

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH] avcodec/cuviddec: update amount of decoder surfaces from within sequence decode callback

Reply via email to