On Sat, 19 Nov 2016 17:18:08 -0800 Philip Langdale <phil...@overt.org> wrote:
> The nvidia 375.xx driver introduces support for P016 output surfaces, > for 10bit and 12bit HEVC content (it's also the first driver to support > hardware decoding of 12bit content). > > Technically, we don't support P016, but in practice I don't think we > zero-out the extra bits in P010 so it can be used to carry the data. > > This change introduces cuvid decoder support for P010 output for > output to hardware and system memory surfaces. For simplicity, it > does not maintain the previous ability to output NV12 for > 8 bit > input video - the user will need to update their driver to decode > such videos. > > After this change, both cuvid and nvenc support P010, but the > ffmpeg_cuvid transcoding logic will need more work to connect the > two together. Similarly, the scale_npp filter still only works with > 8bit surfaces. > > Signed-off-by: Philip Langdale <phil...@overt.org> > --- > compat/cuda/dynlink_cuviddec.h | 3 ++- > libavcodec/cuvid.c | 58 > +++++++++++++++++++++++++++++++----------- > libavutil/hwcontext_cuda.c | 11 +++++++- > 3 files changed, 55 insertions(+), 17 deletions(-) > > diff --git a/compat/cuda/dynlink_cuviddec.h b/compat/cuda/dynlink_cuviddec.h > index 17207bc..9ff2741 100644 > --- a/compat/cuda/dynlink_cuviddec.h > +++ b/compat/cuda/dynlink_cuviddec.h > @@ -83,7 +83,8 @@ typedef enum cudaVideoCodec_enum { > * Video Surface Formats Enums > */ > typedef enum cudaVideoSurfaceFormat_enum { > - cudaVideoSurfaceFormat_NV12=0 /**< NV12 (currently the only > supported output format) */ > + cudaVideoSurfaceFormat_NV12=0, /**< NV12 */ > + cudaVideoSurfaceFormat_P016=1 /**< P016 */ > } cudaVideoSurfaceFormat; > > /*! > diff --git a/libavcodec/cuvid.c b/libavcodec/cuvid.c > index c3e831a..34b0734 100644 > --- a/libavcodec/cuvid.c > +++ b/libavcodec/cuvid.c > @@ -28,6 +28,7 @@ > #include "libavutil/fifo.h" > #include "libavutil/log.h" > #include "libavutil/opt.h" > +#include "libavutil/pixdesc.h" > > #include "avcodec.h" > #include "internal.h" > @@ -103,11 +104,35 @@ static int CUDAAPI cuvid_handle_video_sequence(void > *opaque, CUVIDEOFORMAT* form > CuvidContext *ctx = avctx->priv_data; > AVHWFramesContext *hwframe_ctx = (AVHWFramesContext*)ctx->hwframe->data; > CUVIDDECODECREATEINFO cuinfo; > + int surface_fmt; > + > + enum AVPixelFormat pix_fmts_nv12[3] = { AV_PIX_FMT_CUDA, > + AV_PIX_FMT_NV12, > + AV_PIX_FMT_NONE }; > + > + enum AVPixelFormat pix_fmts_p010[3] = { AV_PIX_FMT_CUDA, > + AV_PIX_FMT_P010, > + AV_PIX_FMT_NONE }; > > av_log(avctx, AV_LOG_TRACE, "pfnSequenceCallback, > progressive_sequence=%d\n", format->progressive_sequence); > > ctx->internal_error = 0; > > + surface_fmt = ff_get_format(avctx, format->bit_depth_luma_minus8 > 0 ? > + pix_fmts_p010 : pix_fmts_nv12); > + if (surface_fmt < 0) { > + av_log(avctx, AV_LOG_ERROR, "ff_get_format failed: %d\n", > surface_fmt); > + ctx->internal_error = AVERROR(EINVAL); > + return 0; > + } > + > + av_log(avctx, AV_LOG_VERBOSE, "Formats: Original: %s | HW: %s | SW: > %s\n", > + av_get_pix_fmt_name(avctx->pix_fmt), > + av_get_pix_fmt_name(surface_fmt), > + av_get_pix_fmt_name(avctx->sw_pix_fmt)); > + > + avctx->pix_fmt = surface_fmt; > + > avctx->width = format->display_area.right; > avctx->height = format->display_area.bottom; > > @@ -156,7 +181,7 @@ static int CUDAAPI cuvid_handle_video_sequence(void > *opaque, CUVIDEOFORMAT* form > hwframe_ctx->width < avctx->width || > hwframe_ctx->height < avctx->height || > hwframe_ctx->format != AV_PIX_FMT_CUDA || > - hwframe_ctx->sw_format != AV_PIX_FMT_NV12)) { > + hwframe_ctx->sw_format != avctx->sw_pix_fmt)) { > av_log(avctx, AV_LOG_ERROR, "AVHWFramesContext is already > initialized with incompatible parameters\n"); > ctx->internal_error = AVERROR(EINVAL); > return 0; > @@ -177,7 +202,19 @@ static int CUDAAPI cuvid_handle_video_sequence(void > *opaque, CUVIDEOFORMAT* form > > cuinfo.CodecType = ctx->codec_type = format->codec; > cuinfo.ChromaFormat = format->chroma_format; > - cuinfo.OutputFormat = cudaVideoSurfaceFormat_NV12; > + > + switch (avctx->sw_pix_fmt) { > + case AV_PIX_FMT_NV12: > + cuinfo.OutputFormat = cudaVideoSurfaceFormat_NV12; > + break; > + case AV_PIX_FMT_P010: > + cuinfo.OutputFormat = cudaVideoSurfaceFormat_P016; > + break; > + default: > + av_log(avctx, AV_LOG_ERROR, "Output formats other than NV12 or P010 > are not supported\n"); > + ctx->internal_error = AVERROR(EINVAL); > + return 0; > + } > > cuinfo.ulWidth = avctx->coded_width; > cuinfo.ulHeight = avctx->coded_height; > @@ -209,7 +246,7 @@ static int CUDAAPI cuvid_handle_video_sequence(void > *opaque, CUVIDEOFORMAT* form > > if (!hwframe_ctx->pool) { > hwframe_ctx->format = AV_PIX_FMT_CUDA; > - hwframe_ctx->sw_format = AV_PIX_FMT_NV12; > + hwframe_ctx->sw_format = avctx->sw_pix_fmt; > hwframe_ctx->width = avctx->width; > hwframe_ctx->height = avctx->height; > > @@ -417,7 +454,8 @@ static int cuvid_output_frame(AVCodecContext *avctx, > AVFrame *frame) > > offset += avctx->coded_height; > } > - } else if (avctx->pix_fmt == AV_PIX_FMT_NV12) { > + } else if (avctx->pix_fmt == AV_PIX_FMT_NV12 || > + avctx->pix_fmt == AV_PIX_FMT_P010) { > AVFrame *tmp_frame = av_frame_alloc(); > if (!tmp_frame) { > av_log(avctx, AV_LOG_ERROR, "av_frame_alloc failed\n"); > @@ -615,17 +653,6 @@ static av_cold int cuvid_decode_init(AVCodecContext > *avctx) > const AVBitStreamFilter *bsf; > int ret = 0; > > - enum AVPixelFormat pix_fmts[3] = { AV_PIX_FMT_CUDA, > - AV_PIX_FMT_NV12, > - AV_PIX_FMT_NONE }; > - > - ret = ff_get_format(avctx, pix_fmts); > - if (ret < 0) { > - av_log(avctx, AV_LOG_ERROR, "ff_get_format failed: %d\n", ret); > - return ret; > - } > - avctx->pix_fmt = ret; > - > ret = cuvid_load_functions(&ctx->cvdl); > if (ret < 0) { > av_log(avctx, AV_LOG_ERROR, "Failed loading nvcuvid.\n"); > @@ -899,6 +926,7 @@ static const AVOption options[] = { > .capabilities = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_AVOID_PROBING, \ > .pix_fmts = (const enum AVPixelFormat[]){ AV_PIX_FMT_CUDA, \ > AV_PIX_FMT_NV12, \ > + AV_PIX_FMT_P010, \ > AV_PIX_FMT_NONE }, \ > }; > > diff --git a/libavutil/hwcontext_cuda.c b/libavutil/hwcontext_cuda.c > index 30de299..e413aa8 100644 > --- a/libavutil/hwcontext_cuda.c > +++ b/libavutil/hwcontext_cuda.c > @@ -35,6 +35,7 @@ static const enum AVPixelFormat supported_formats[] = { > AV_PIX_FMT_NV12, > AV_PIX_FMT_YUV420P, > AV_PIX_FMT_YUV444P, > + AV_PIX_FMT_P010, > }; > > static void cuda_buffer_free(void *opaque, uint8_t *data) > @@ -111,6 +112,7 @@ static int cuda_frames_init(AVHWFramesContext *ctx) > size = aligned_width * ctx->height * 3 / 2; > break; > case AV_PIX_FMT_YUV444P: > + case AV_PIX_FMT_P010: > size = aligned_width * ctx->height * 3; > break; > } > @@ -125,7 +127,13 @@ static int cuda_frames_init(AVHWFramesContext *ctx) > > static int cuda_get_buffer(AVHWFramesContext *ctx, AVFrame *frame) > { > - int aligned_width = FFALIGN(ctx->width, CUDA_FRAME_ALIGNMENT); > + int aligned_width; > + int width_in_bytes = ctx->width; > + > + if (ctx->sw_format == AV_PIX_FMT_P010) { > + width_in_bytes *= 2; > + } > + aligned_width = FFALIGN(width_in_bytes, CUDA_FRAME_ALIGNMENT); > > frame->buf[0] = av_buffer_pool_get(ctx->pool); > if (!frame->buf[0]) > @@ -133,6 +141,7 @@ static int cuda_get_buffer(AVHWFramesContext *ctx, > AVFrame *frame) > > switch (ctx->sw_format) { > case AV_PIX_FMT_NV12: > + case AV_PIX_FMT_P010: > frame->data[0] = frame->buf[0]->data; > frame->data[1] = frame->data[0] + aligned_width * ctx->height; > frame->linesize[0] = aligned_width; I think it would be better to add a P016 pixfmt if the decoder can output data that has the LSBs set for the ones that are normally 0 in P010. _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel