Hi all Attached is a patch for the above.
10-bit HEVC encoding is a new feature of the latest Pascal Nvidia GPUs, released in the past few months; I’ve added support for the yuv420p10le and yuv444p10le pixel formats. Rate control lookahead is available on pre-Pascal models too but is available with the latest SDK/latest drivers. As part of this I’ve bumped the required SDK version to the latest, which is 7. Feedback welcome. This is only my second patch; I seem to average about one a year :) Regards Oliver --- configure | 4 +- libavcodec/nvenc.c | 120 ++++++++++++++++++++++++++++++++++++++++++++++-- libavcodec/nvenc.h | 6 +++ libavcodec/nvenc_hevc.c | 6 ++- 4 files changed, 129 insertions(+), 7 deletions(-) diff --git a/configure b/configure index 9b92426..46ff144 100755 --- a/configure +++ b/configure @@ -5774,8 +5774,8 @@ enabled mmal && check_func_headers interface/mmal/mmal.h "MMAL_PARAMETER_VIDEO_M enabled netcdf && require_pkg_config netcdf netcdf.h nc_inq_libvers enabled nvenc && { check_header nvEncodeAPI.h || die "ERROR: nvEncodeAPI.h not found."; } && - { check_cpp_condition nvEncodeAPI.h "NVENCAPI_MAJOR_VERSION >= 6" || - die "ERROR: NVENC API version 5 or older is not supported"; } && + { check_cpp_condition nvEncodeAPI.h "NVENCAPI_MAJOR_VERSION >= 7" || + die "ERROR: NVENC API version 6 or older is not supported"; } && { [ $target_os != cygwin ] || die "ERROR: NVENC is not supported on Cygwin currently."; } enabled openal && { { for al_libs in "${OPENAL_LIBS}" "-lopenal" "-lOpenAL32"; do check_lib 'AL/al.h' alGetError "${al_libs}" && break; done } || diff --git a/libavcodec/nvenc.c b/libavcodec/nvenc.c index 984dd3b..685dd7d 100644 --- a/libavcodec/nvenc.c +++ b/libavcodec/nvenc.c @@ -75,8 +75,10 @@ const enum AVPixelFormat ff_nvenc_pix_fmts[] = { AV_PIX_FMT_YUV420P, + AV_PIX_FMT_YUV420P10LE, AV_PIX_FMT_NV12, AV_PIX_FMT_YUV444P, + AV_PIX_FMT_YUV444P10LE, #if CONFIG_CUDA AV_PIX_FMT_CUDA, #endif @@ -314,6 +316,18 @@ static int nvenc_check_capabilities(AVCodecContext *avctx) return AVERROR(ENOSYS); } + ret = nvenc_check_cap(avctx, NV_ENC_CAPS_SUPPORT_10BIT_ENCODE); + if ((ctx->data_pix_fmt == AV_PIX_FMT_YUV420P10LE || ctx->data_pix_fmt == AV_PIX_FMT_YUV444P10LE) && ret <= 0) { + av_log(avctx, AV_LOG_VERBOSE, "10 bit encode not supported\n"); + return AVERROR(ENOSYS); + } + + ret = nvenc_check_cap(avctx, NV_ENC_CAPS_SUPPORT_LOOKAHEAD); + if (ctx->rc_lookahead > 0 && ret <= 0) { + av_log(avctx, AV_LOG_VERBOSE, "RC lookahead not supported\n"); + return AVERROR(ENOSYS); + } + return 0; } @@ -673,6 +687,11 @@ static av_cold void nvenc_setup_rate_control(AVCodecContext *avctx) } else if (ctx->encode_config.rcParams.averageBitRate > 0) { ctx->encode_config.rcParams.vbvBufferSize = 2 * ctx->encode_config.rcParams.averageBitRate; } + + if (ctx->rc_lookahead > 0) { + ctx->encode_config.rcParams.enableLookahead = 1; + ctx->encode_config.rcParams.lookaheadDepth = FFMIN(ctx->rc_lookahead, 32); + } } static av_cold int nvenc_setup_h264_config(AVCodecContext *avctx) @@ -800,9 +819,26 @@ static av_cold int nvenc_setup_hevc_config(AVCodecContext *avctx) hevc->outputPictureTimingSEI = 1; } - /* No other profile is supported in the current SDK version 5 */ - cc->profileGUID = NV_ENC_HEVC_PROFILE_MAIN_GUID; - avctx->profile = FF_PROFILE_HEVC_MAIN; + switch(ctx->profile) { + case NV_ENC_HEVC_PROFILE_MAIN: + cc->profileGUID = NV_ENC_HEVC_PROFILE_MAIN_GUID; + avctx->profile = FF_PROFILE_HEVC_MAIN; + break; + case NV_ENC_HEVC_PROFILE_MAIN_10: + cc->profileGUID = NV_ENC_HEVC_PROFILE_MAIN10_GUID; + avctx->profile = FF_PROFILE_HEVC_MAIN_10; + break; + } + + // force setting profile as main10 if input is AV_PIX_FMT_YUVXXXP10LE + if (ctx->data_pix_fmt == AV_PIX_FMT_YUV420P10LE || ctx->data_pix_fmt == AV_PIX_FMT_YUV444P10LE) { + cc->profileGUID = NV_ENC_HEVC_PROFILE_MAIN10_GUID; + avctx->profile = FF_PROFILE_HEVC_MAIN_10; + } + + hevc->chromaFormatIDC = ctx->data_pix_fmt == AV_PIX_FMT_YUV444P || ctx->data_pix_fmt == AV_PIX_FMT_YUV444P10LE ? 3 : 1; + + hevc->pixelBitDepthMinus8 = ctx->data_pix_fmt == AV_PIX_FMT_YUV420P10LE || ctx->data_pix_fmt == AV_PIX_FMT_YUV444P10LE ? 2 : 0; hevc->level = ctx->level; @@ -954,6 +990,10 @@ static av_cold int nvenc_alloc_surface(AVCodecContext *avctx, int idx) ctx->surfaces[idx].format = NV_ENC_BUFFER_FORMAT_YV12_PL; break; + case AV_PIX_FMT_YUV420P10LE: + ctx->surfaces[idx].format = NV_ENC_BUFFER_FORMAT_YUV420_10BIT; + break; + case AV_PIX_FMT_NV12: ctx->surfaces[idx].format = NV_ENC_BUFFER_FORMAT_NV12_PL; break; @@ -962,6 +1002,10 @@ static av_cold int nvenc_alloc_surface(AVCodecContext *avctx, int idx) ctx->surfaces[idx].format = NV_ENC_BUFFER_FORMAT_YUV444_PL; break; + case AV_PIX_FMT_YUV444P10LE: + ctx->surfaces[idx].format = NV_ENC_BUFFER_FORMAT_YUV444_10BIT; + break; + default: av_log(avctx, AV_LOG_FATAL, "Invalid input pixel format\n"); return AVERROR(EINVAL); @@ -1206,6 +1250,49 @@ static NvencSurface *get_free_frame(NvencContext *ctx) return NULL; } +static void copy_single_10bit_plane(uint8_t *dst, int dst_linesize, + const uint8_t *src, int src_linesize, + int width, int height) +{ + if (!dst || !src) + return; + av_assert0(abs(src_linesize) >= width << 1); + av_assert0(abs(dst_linesize) >= width << 1); + for (;height > 0; height--) { + uint16_t* tdst = (uint16_t*)dst; + uint16_t* tsrc = (uint16_t*)src; + for (int w = width; w > 0; w--) { + *tdst++ = *tsrc++ << 6; + } + dst += dst_linesize; + src += src_linesize; + } +} + +static void interleave_10bit_planes(uint8_t *dst, int dst_linesize, + const uint8_t *src1, int src1_linesize, + const uint8_t *src2, int src2_linesize, + int width, int height) +{ + if (!dst || !src1 || !src2) + return; + av_assert0(abs(src1_linesize) >= width); + av_assert0(abs(src2_linesize) >= width); + av_assert0(abs(dst_linesize) >= width << 1); + for (;height > 0; height--) { + uint16_t* tdst = (uint16_t*)dst; + uint16_t* tsrc1 = (uint16_t*)src1; + uint16_t* tsrc2 = (uint16_t*)src2; + for (int w = width; w > 0; w-=2) { + *tdst++ = *tsrc1++ << 6; + *tdst++ = *tsrc2++ << 6; + } + dst += dst_linesize; + src1 += src1_linesize; + src2 += src2_linesize; + } +} + static int nvenc_copy_frame(AVCodecContext *avctx, NvencSurface *inSurf, NV_ENC_LOCK_INPUT_BUFFER *lockBufferParams, const AVFrame *frame) { @@ -1228,6 +1315,17 @@ static int nvenc_copy_frame(AVCodecContext *avctx, NvencSurface *inSurf, av_image_copy_plane(buf, lockBufferParams->pitch >> 1, frame->data[1], frame->linesize[1], avctx->width >> 1, avctx->height >> 1); + } else if (frame->format == AV_PIX_FMT_YUV420P10LE) { + copy_single_10bit_plane(buf, lockBufferParams->pitch, + frame->data[0], frame->linesize[0], + avctx->width, avctx->height); + + buf += off; + + interleave_10bit_planes(buf, lockBufferParams->pitch, + frame->data[1], frame->linesize[1], + frame->data[2], frame->linesize[2], + avctx->width, avctx->height >> 1); } else if (frame->format == AV_PIX_FMT_NV12) { av_image_copy_plane(buf, lockBufferParams->pitch, frame->data[0], frame->linesize[0], @@ -1254,6 +1352,22 @@ static int nvenc_copy_frame(AVCodecContext *avctx, NvencSurface *inSurf, av_image_copy_plane(buf, lockBufferParams->pitch, frame->data[2], frame->linesize[2], avctx->width, avctx->height); + } else if (frame->format == AV_PIX_FMT_YUV444P10LE) { + copy_single_10bit_plane(buf, lockBufferParams->pitch, + frame->data[0], frame->linesize[0], + avctx->width, avctx->height); + + buf += off; + + copy_single_10bit_plane(buf, lockBufferParams->pitch, + frame->data[1], frame->linesize[1], + avctx->width, avctx->height); + + buf += off; + + copy_single_10bit_plane(buf, lockBufferParams->pitch, + frame->data[2], frame->linesize[2], + avctx->width, avctx->height); } else { av_log(avctx, AV_LOG_FATAL, "Invalid pixel format!\n"); return AVERROR(EINVAL); diff --git a/libavcodec/nvenc.h b/libavcodec/nvenc.h index 961cbc7..9366a26 100644 --- a/libavcodec/nvenc.h +++ b/libavcodec/nvenc.h @@ -117,6 +117,11 @@ enum { }; enum { + NV_ENC_HEVC_PROFILE_MAIN, + NV_ENC_HEVC_PROFILE_MAIN_10, +}; + +enum { NVENC_LOWLATENCY = 1, NVENC_LOSSLESS = 2, NVENC_ONE_PASS = 4, @@ -174,6 +179,7 @@ typedef struct NvencContext int device; int flags; int async_depth; + int rc_lookahead; } NvencContext; int ff_nvenc_encode_init(AVCodecContext *avctx); diff --git a/libavcodec/nvenc_hevc.c b/libavcodec/nvenc_hevc.c index 1ce7c89..04e351a 100644 --- a/libavcodec/nvenc_hevc.c +++ b/libavcodec/nvenc_hevc.c @@ -39,8 +39,9 @@ static const AVOption options[] = { { "llhp", "low latency hp", 0, AV_OPT_TYPE_CONST, { .i64 = PRESET_LOW_LATENCY_HP }, 0, 0, VE, "preset" }, { "lossless", "lossless", 0, AV_OPT_TYPE_CONST, { .i64 = PRESET_LOSSLESS_DEFAULT }, 0, 0, VE, "preset" }, { "losslesshp", "lossless hp", 0, AV_OPT_TYPE_CONST, { .i64 = PRESET_LOSSLESS_HP }, 0, 0, VE, "preset" }, - { "profile", "Set the encoding profile", OFFSET(profile), AV_OPT_TYPE_INT, { .i64 = FF_PROFILE_HEVC_MAIN }, FF_PROFILE_HEVC_MAIN, FF_PROFILE_HEVC_MAIN, VE, "profile" }, - { "main", "", 0, AV_OPT_TYPE_CONST, { .i64 = FF_PROFILE_HEVC_MAIN }, 0, 0, VE, "profile" }, + { "profile", "Set the encoding profile", OFFSET(profile), AV_OPT_TYPE_INT, { .i64 = NV_ENC_HEVC_PROFILE_MAIN }, NV_ENC_HEVC_PROFILE_MAIN, FF_PROFILE_HEVC_MAIN_10, VE, "profile" }, + { "main", "", 0, AV_OPT_TYPE_CONST, { .i64 = NV_ENC_HEVC_PROFILE_MAIN }, 0, 0, VE, "profile" }, + { "main10", "", 0, AV_OPT_TYPE_CONST, { .i64 = NV_ENC_HEVC_PROFILE_MAIN_10 }, 0, 0, VE, "profile" }, { "level", "Set the encoding level restriction", OFFSET(level), AV_OPT_TYPE_INT, { .i64 = NV_ENC_LEVEL_AUTOSELECT }, NV_ENC_LEVEL_AUTOSELECT, NV_ENC_LEVEL_HEVC_62, VE, "level" }, { "auto", "", 0, AV_OPT_TYPE_CONST, { .i64 = NV_ENC_LEVEL_AUTOSELECT }, 0, 0, VE, "level" }, { "1", "", 0, AV_OPT_TYPE_CONST, { .i64 = NV_ENC_LEVEL_HEVC_1 }, 0, 0, VE, "level" }, @@ -73,6 +74,7 @@ static const AVOption options[] = { { "ll_2pass_quality", "Multi-pass optimized for image quality (only for low-latency presets)", 0, AV_OPT_TYPE_CONST, { .i64 = NV_ENC_PARAMS_RC_2_PASS_QUALITY }, 0, 0, VE, "rc" }, { "ll_2pass_size", "Multi-pass optimized for constant frame size (only for low-latency presets)", 0, AV_OPT_TYPE_CONST, { .i64 = NV_ENC_PARAMS_RC_2_PASS_FRAMESIZE_CAP }, 0, 0, VE, "rc" }, { "vbr_2pass", "Multi-pass variable bitrate mode", 0, AV_OPT_TYPE_CONST, { .i64 = NV_ENC_PARAMS_RC_2_PASS_VBR }, 0, 0, VE, "rc" }, + { "rc-lookahead", "Number of frames to look ahead for rate-control", OFFSET(rc_lookahead), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, INT_MAX, VE }, { "surfaces", "Number of concurrent surfaces", OFFSET(nb_surfaces), AV_OPT_TYPE_INT, { .i64 = 32 }, 0, INT_MAX, VE }, { "cbr", "Use cbr encoding mode", OFFSET(cbr), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE }, { "2pass", "Use 2pass encoding mode", OFFSET(twopass), AV_OPT_TYPE_BOOL, { .i64 = -1 }, -1, 1, VE }, -- 2.7.4 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel