Hello, This patch reduces vRAM usage by cuvid decoder implementation. The number of surfaces used for decoding is updated within the parser sequence decode callback. Also the "surfaces" AVDictionary option specific to cuvid was removed in favor of "extra_hw_surfaces".
vRAM consumption was tested on various videos and savings are between 1% for 360p resolution up to 21% for some 1080p H.264 videos. Decoding performance was tested on various H.264 and H.265 videos in different resolutions from 360p and higher, no performance penalty was found.
From 32a1b016e88fa40b983318d4583750ef250a78d9 Mon Sep 17 00:00:00 2001 From: Roman Arzumanyan <r.arzumanyan@visionlabs.ai> Date: Thu, 1 Jun 2023 11:17:39 +0300 Subject: [PATCH] libavcodec/cuviddec: determine DPB size from within cuvid parser --- libavcodec/cuviddec.c | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/libavcodec/cuviddec.c b/libavcodec/cuviddec.c index 3d43bbd466..759ed49870 100644 --- a/libavcodec/cuviddec.c +++ b/libavcodec/cuviddec.c @@ -115,6 +115,12 @@ typedef struct CuvidParsedFrame #define CHECK_CU(x) FF_CUDA_CHECK_DL(avctx, ctx->cudl, x) +// NV recommends [2;4] range +#define CUVID_MAX_DISPLAY_DELAY (4) + +// Actual DPB size will be determined by parser. +#define CUVID_DEFAULT_NUM_SURFACES (CUVID_MAX_DISPLAY_DELAY + 1) + static int CUDAAPI cuvid_handle_video_sequence(void *opaque, CUVIDEOFORMAT* format) { AVCodecContext *avctx = opaque; @@ -309,6 +315,25 @@ static int CUDAAPI cuvid_handle_video_sequence(void *opaque, CUVIDEOFORMAT* form return 0; } + if (ctx->nb_surfaces < format->min_num_decode_surfaces + 3) + ctx->nb_surfaces = format->min_num_decode_surfaces + 3; + + if (avctx->extra_hw_frames > 0) + ctx->nb_surfaces += avctx->extra_hw_frames; + + if (0 > av_fifo_realloc2(ctx->frame_queue, ctx->nb_surfaces * sizeof(CuvidParsedFrame))) { + av_log(avctx, AV_LOG_ERROR, "Failed to recreate frame queue on video sequence callback\n"); + ctx->internal_error = AVERROR(EINVAL); + return 0; + } + + ctx->key_frame = av_realloc_array(ctx->key_frame, ctx->nb_surfaces, sizeof(int)); + if (!ctx->key_frame) { + av_log(avctx, AV_LOG_ERROR, "Failed to recreate key frame queue on video sequence callback\n"); + ctx->internal_error = AVERROR(EINVAL); + return 0; + } + cuinfo.ulNumDecodeSurfaces = ctx->nb_surfaces; cuinfo.ulNumOutputSurfaces = 1; cuinfo.ulCreationFlags = cudaVideoCreate_PreferCUVID; @@ -846,6 +871,7 @@ static av_cold int cuvid_decode_init(AVCodecContext *avctx) goto error; } + ctx->nb_surfaces = CUVID_DEFAULT_NUM_SURFACES; ctx->frame_queue = av_fifo_alloc2(ctx->nb_surfaces, sizeof(CuvidParsedFrame), 0); if (!ctx->frame_queue) { ret = AVERROR(ENOMEM); @@ -993,7 +1019,7 @@ static av_cold int cuvid_decode_init(AVCodecContext *avctx) } ctx->cuparseinfo.ulMaxNumDecodeSurfaces = ctx->nb_surfaces; - ctx->cuparseinfo.ulMaxDisplayDelay = (avctx->flags & AV_CODEC_FLAG_LOW_DELAY) ? 0 : 4; + ctx->cuparseinfo.ulMaxDisplayDelay = (avctx->flags & AV_CODEC_FLAG_LOW_DELAY) ? 0 : CUVID_MAX_DISPLAY_DELAY; ctx->cuparseinfo.pUserData = avctx; ctx->cuparseinfo.pfnSequenceCallback = cuvid_handle_video_sequence; ctx->cuparseinfo.pfnDecodePicture = cuvid_handle_picture_decode; @@ -1097,7 +1123,6 @@ static const AVOption options[] = { { "bob", "Bob deinterlacing", 0, AV_OPT_TYPE_CONST, { .i64 = cudaVideoDeinterlaceMode_Bob }, 0, 0, VD, "deint" }, { "adaptive", "Adaptive deinterlacing", 0, AV_OPT_TYPE_CONST, { .i64 = cudaVideoDeinterlaceMode_Adaptive }, 0, 0, VD, "deint" }, { "gpu", "GPU to be used for decoding", OFFSET(cu_gpu), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, VD }, - { "surfaces", "Maximum surfaces to be used for decoding", OFFSET(nb_surfaces), AV_OPT_TYPE_INT, { .i64 = 25 }, 0, INT_MAX, VD }, { "drop_second_field", "Drop second field when deinterlacing", OFFSET(drop_second_field), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VD }, { "crop", "Crop (top)x(bottom)x(left)x(right)", OFFSET(crop_expr), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, VD }, { "resize", "Resize (width)x(height)", OFFSET(resize_expr), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, VD }, -- 2.34.1
_______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".