On Sun, Jun 5, 2016 at 8:58 PM, Timo Rothenpieler <t...@rothenpieler.org> wrote: > --- > Changelog | 2 + > MAINTAINERS | 1 + > configure | 20 ++ > libavcodec/Makefile | 2 + > libavcodec/allcodecs.c | 4 + > libavcodec/cuvid.c | 550 > +++++++++++++++++++++++++++++++++++++++++++++++++ > libavcodec/version.h | 4 +- > 7 files changed, 581 insertions(+), 2 deletions(-) > create mode 100644 libavcodec/cuvid.c > > diff --git a/Changelog b/Changelog > index d5228b2..35e17e5 100644 > --- a/Changelog > +++ b/Changelog > @@ -38,6 +38,8 @@ version <next>: > - loudnorm filter > - MTAF demuxer and decoder > - MagicYUV decoder > +- CUDA CUVID H264/HEVC decoder > + > > version 3.0: > - Common Encryption (CENC) MP4 encoding and decoding support > diff --git a/MAINTAINERS b/MAINTAINERS > index 9ce2524..bf99d0c 100644 > --- a/MAINTAINERS > +++ b/MAINTAINERS > @@ -163,6 +163,7 @@ Codecs: > cpia.c Stephan Hilb > crystalhd.c Philip Langdale > cscd.c Reimar Doeffinger > + cuvid.c Timo Rothenpieler > dca.c Kostya Shishkov, Benjamin Larsson > dirac* Rostislav Pehlivanov > dnxhd* Baptiste Coudurier > diff --git a/configure b/configure > index 7c463a5..2b2d5f8 100755 > --- a/configure > +++ b/configure > @@ -158,6 +158,7 @@ Hardware accelerators: > > Hardware-accelerated decoding/encoding: > --enable-cuda enable dynamically linked CUDA [no] > + --enable-cuvid enable CUVID support [autodetect] > --enable-libmfx enable HW acceleration through libmfx > --enable-mmal enable decoding via MMAL [no] > --enable-nvenc enable NVIDIA NVENC support [no] > @@ -1567,6 +1568,7 @@ FEATURE_LIST=" > > HW_CODECS_LIST=" > cuda > + cuvid > libmfx > mmal > nvenc > @@ -2328,6 +2330,7 @@ comfortnoise_encoder_select="lpc" > cook_decoder_select="audiodsp mdct sinewin" > cscd_decoder_select="lzo" > cscd_decoder_suggest="zlib" > +cuvid_decoder_deps="cuda cuvid" > dca_decoder_select="mdct" > dds_decoder_select="texturedsp" > dirac_decoder_select="dirac_parse dwt golomb videodsp mpegvideoenc" > @@ -2522,6 +2525,7 @@ audiotoolbox_extralibs="-framework CoreFoundation > -framework AudioToolbox -frame > > # hardware accelerators > crystalhd_deps="libcrystalhd_libcrystalhd_if_h" > +cuvid_deps="cuda" > d3d11va_deps="d3d11_h dxva_h ID3D11VideoDecoder ID3D11VideoContext" > dxva2_deps="dxva2api_h DXVA2_ConfigPictureDecode" > vaapi_deps="va_va_h" > @@ -2539,6 +2543,7 @@ h263_vaapi_hwaccel_select="h263_decoder" > h263_videotoolbox_hwaccel_deps="videotoolbox" > h263_videotoolbox_hwaccel_select="h263_decoder" > h264_crystalhd_decoder_select="crystalhd h264_mp4toannexb_bsf h264_parser" > +h264_cuvid_hwaccel_deps="cuda cuvid" > h264_d3d11va_hwaccel_deps="d3d11va" > h264_d3d11va_hwaccel_select="h264_decoder" > h264_dxva2_hwaccel_deps="dxva2" > @@ -2564,6 +2569,7 @@ h264_vdpau_hwaccel_deps="vdpau" > h264_vdpau_hwaccel_select="h264_decoder" > h264_videotoolbox_hwaccel_deps="videotoolbox" > h264_videotoolbox_hwaccel_select="h264_decoder" > +hevc_cuvid_hwaccel_deps="cuda cuvid" > hevc_d3d11va_hwaccel_deps="d3d11va DXVA_PicParams_HEVC" > hevc_d3d11va_hwaccel_select="hevc_decoder" > hevc_dxva2_hwaccel_deps="dxva2 DXVA_PicParams_HEVC" > @@ -2657,6 +2663,8 @@ hwupload_cuda_filter_deps="cuda" > scale_npp_filter_deps="cuda libnpp" > > nvenc_encoder_deps="nvenc" > +h264_cuvid_decoder_deps="cuda cuvid" > +h264_cuvid_decoder_select="h264_mp4toannexb_bsf h264_cuvid_hwaccel" > h264_qsv_decoder_deps="libmfx" > h264_qsv_decoder_select="h264_mp4toannexb_bsf h264_parser qsvdec > h264_qsv_hwaccel" > h264_qsv_encoder_deps="libmfx" > @@ -2664,6 +2672,8 @@ h264_qsv_encoder_select="qsvenc" > h264_vaapi_encoder_deps="VAEncPictureParameterBufferH264" > h264_vaapi_encoder_select="vaapi_encode golomb" > > +hevc_cuvid_decoder_deps="cuda cuvid" > +hevc_cuvid_decoder_select="hevc_mp4toannexb_bsf hevc_cuvid_hwaccel" > hevc_qsv_decoder_deps="libmfx" > hevc_qsv_decoder_select="hevc_mp4toannexb_bsf hevc_parser qsvdec > hevc_qsv_hwaccel" > hevc_qsv_encoder_deps="libmfx" > @@ -5002,6 +5012,7 @@ die_license_disabled gpl libxvid > die_license_disabled gpl x11grab > > die_license_disabled nonfree cuda > +die_license_disabled nonfree cuvid > die_license_disabled nonfree libfaac > die_license_disabled nonfree libnpp > enabled gpl && die_license_disabled_gpl nonfree libfdk_aac > @@ -5572,6 +5583,11 @@ for func in $COMPLEX_FUNCS; do > eval check_complexfunc $func \${${func}_args:-1} > done > > +# Enable CUVID by default if CUDA is enabled > +if enabled cuda && ! disabled cuvid; then > + enable cuvid > +fi > + > # these are off by default, so fail if requested and not available > enabled avfoundation_indev && { check_header_objcc > AVFoundation/AVFoundation.h || disable avfoundation_indev; } > enabled avfoundation_indev && { check_lib2 CoreGraphics/CoreGraphics.h > CGGetActiveDisplayList -framework CoreGraphics || > @@ -5581,6 +5597,10 @@ enabled avisynth && { { check_lib2 > "windows.h" LoadLibrary; } || > die "ERROR: LoadLibrary/dlopen not found for > avisynth"; } > enabled cuda && { check_lib cuda.h cuInit -lcuda || > die "ERROR: CUDA not found"; } > +enabled cuvid && { check_lib cuviddec.h cuvidCreateDecoder > -lnvcuvid || > + die "ERROR: CUVID not found"; } && > + { enabled cuda || > + die "ERROR: CUVID requires CUDA"; } > enabled chromaprint && require chromaprint chromaprint.h > chromaprint_get_version -lchromaprint > enabled coreimage_filter && { check_header_objcc QuartzCore/CoreImage.h || > disable coreimage_filter; } > enabled coreimagesrc_filter && { check_header_objcc QuartzCore/CoreImage.h > || disable coreimagesrc_filter; } > diff --git a/libavcodec/Makefile b/libavcodec/Makefile > index 6e26aad..2927b84 100644 > --- a/libavcodec/Makefile > +++ b/libavcodec/Makefile > @@ -767,12 +767,14 @@ OBJS-$(CONFIG_QSV) += qsv.o > OBJS-$(CONFIG_QSVDEC) += qsvdec.o > OBJS-$(CONFIG_QSVENC) += qsvenc.o > > +OBJS-$(CONFIG_H264_CUVID_DECODER) += cuvid.o > OBJS-$(CONFIG_H264_MMAL_DECODER) += mmaldec.o > OBJS-$(CONFIG_H264_VDA_DECODER) += vda_h264_dec.o > OBJS-$(CONFIG_H264_OMX_ENCODER) += omx.o > OBJS-$(CONFIG_H264_QSV_DECODER) += qsvdec_h2645.o > OBJS-$(CONFIG_H264_QSV_ENCODER) += qsvenc_h264.o > OBJS-$(CONFIG_H264_VAAPI_ENCODER) += vaapi_encode_h264.o > vaapi_encode_h26x.o > +OBJS-$(CONFIG_HEVC_CUVID_DECODER) += cuvid.o > OBJS-$(CONFIG_HEVC_QSV_DECODER) += qsvdec_h2645.o > OBJS-$(CONFIG_HEVC_QSV_ENCODER) += qsvenc_hevc.o hevc_ps_enc.o > h2645_parse.o > OBJS-$(CONFIG_HEVC_VAAPI_ENCODER) += vaapi_encode_h265.o > vaapi_encode_h26x.o > diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c > index 7aa54ee..9256e99 100644 > --- a/libavcodec/allcodecs.c > +++ b/libavcodec/allcodecs.c > @@ -69,6 +69,7 @@ void avcodec_register_all(void) > /* hardware accelerators */ > REGISTER_HWACCEL(H263_VAAPI, h263_vaapi); > REGISTER_HWACCEL(H263_VIDEOTOOLBOX, h263_videotoolbox); > + REGISTER_HWACCEL(H264_CUVID, h264_cuvid); > REGISTER_HWACCEL(H264_D3D11VA, h264_d3d11va); > REGISTER_HWACCEL(H264_DXVA2, h264_dxva2); > REGISTER_HWACCEL(H264_MMAL, h264_mmal); > @@ -78,6 +79,7 @@ void avcodec_register_all(void) > REGISTER_HWACCEL(H264_VDA_OLD, h264_vda_old); > REGISTER_HWACCEL(H264_VDPAU, h264_vdpau); > REGISTER_HWACCEL(H264_VIDEOTOOLBOX, h264_videotoolbox); > + REGISTER_HWACCEL(HEVC_CUVID, hevc_cuvid); > REGISTER_HWACCEL(HEVC_D3D11VA, hevc_d3d11va); > REGISTER_HWACCEL(HEVC_DXVA2, hevc_dxva2); > REGISTER_HWACCEL(HEVC_QSV, hevc_qsv); > @@ -617,6 +619,7 @@ void avcodec_register_all(void) > /* external libraries, that shouldn't be used by default if one of the > * above is available */ > REGISTER_ENCODER(LIBOPENH264, libopenh264); > + REGISTER_DECODER(H264_CUVID, h264_cuvid); > REGISTER_ENCODER(H264_QSV, h264_qsv); > REGISTER_ENCODER(H264_VAAPI, h264_vaapi); > REGISTER_ENCODER(H264_VIDEOTOOLBOX, h264_videotoolbox); > @@ -624,6 +627,7 @@ void avcodec_register_all(void) > REGISTER_ENCODER(H264_OMX, h264_omx); > REGISTER_ENCODER(NVENC_H264, nvenc_h264); > REGISTER_ENCODER(NVENC_HEVC, nvenc_hevc); > + REGISTER_DECODER(HEVC_CUVID, hevc_cuvid); > REGISTER_ENCODER(HEVC_QSV, hevc_qsv); > REGISTER_ENCODER(HEVC_VAAPI, hevc_vaapi); > REGISTER_ENCODER(LIBKVAZAAR, libkvazaar); > diff --git a/libavcodec/cuvid.c b/libavcodec/cuvid.c > new file mode 100644 > index 0000000..6cadadf > --- /dev/null > +++ b/libavcodec/cuvid.c > @@ -0,0 +1,550 @@ > +/* > + * Nvidia CUVID decoder > + * Copyright (c) 2016 Timo Rothenpieler <t...@rothenpieler.org> > + * > + * This file is part of FFmpeg. > + * > + * FFmpeg is free software; you can redistribute it and/or > + * modify it under the terms of the GNU Lesser General Public > + * License as published by the Free Software Foundation; either > + * version 2.1 of the License, or (at your option) any later version. > + * > + * FFmpeg is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + * Lesser General Public License for more details. > + * > + * You should have received a copy of the GNU Lesser General Public > + * License along with FFmpeg; if not, write to the Free Software > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 > USA > + */ > + > +#include "libavutil/buffer.h" > +#include "libavutil/mathematics.h" > +#include "libavutil/hwcontext.h" > +#include "libavutil/hwcontext_cuda.h" > +#include "libavutil/fifo.h" > +#include "libavutil/log.h" > + > +#include "avcodec.h" > +#include "internal.h" > + > +#include <nvcuvid.h> > + > +#define MAX_FRAME_COUNT 20 > +#define FRAME_DELAY 4 > + > +typedef struct CuvidContext > +{ > + CUvideodecoder cudecoder; > + CUvideoparser cuparser; > + > + AVBufferRef *hwdevice; > + AVBufferRef *hwframe; > + > + AVBSFContext *bsf; > + > + AVFifoBuffer *frame_queue; > + > + int internal_error; > +} CuvidContext; > + > +static int CUDAAPI cuvid_handle_video_sequence(void *opaque, CUVIDEOFORMAT* > format) > +{ > + AVCodecContext *avctx = opaque; > + CuvidContext *ctx = avctx->priv_data; > + AVHWFramesContext *hwframe_ctx = (AVHWFramesContext*)ctx->hwframe->data; > + CUVIDDECODECREATEINFO cuinfo; > + CUresult err; > + > + ctx->internal_error = 0; > + > + if (ctx->cudecoder) { > + av_log(avctx, AV_LOG_ERROR, "re-initializing decoder is not > supported\n"); > + ctx->internal_error = AVERROR(EINVAL); > + return 0; > + } > + > + if (hwframe_ctx->pool) { > + av_log(avctx, AV_LOG_ERROR, "AVHWFramesContext is already > initialized\n"); > + ctx->internal_error = AVERROR(EINVAL); > + return 0; > + } > + > + avctx->width = format->coded_width; > + avctx->height = format->coded_height; > + > + memset(&cuinfo, 0, sizeof(cuinfo)); > + > + cuinfo.CodecType = format->codec; > + cuinfo.ChromaFormat = format->chroma_format; > + cuinfo.OutputFormat = cudaVideoSurfaceFormat_NV12; > + cuinfo.ulWidth = format->coded_width; > + cuinfo.ulHeight = format->coded_height; > + cuinfo.ulTargetWidth = cuinfo.ulWidth; > + cuinfo.ulTargetHeight = cuinfo.ulHeight; > + cuinfo.display_area.right = cuinfo.ulWidth; > + cuinfo.display_area.bottom = cuinfo.ulHeight; > + cuinfo.ulNumDecodeSurfaces = MAX_FRAME_COUNT; > + cuinfo.ulNumOutputSurfaces = 1; > + cuinfo.ulCreationFlags = cudaVideoCreate_PreferCUVID; > + > + if (format->progressive_sequence) > + cuinfo.DeinterlaceMode = cudaVideoDeinterlaceMode_Weave; > + else > + cuinfo.DeinterlaceMode = cudaVideoDeinterlaceMode_Adaptive; > + > + err = cuvidCreateDecoder(&ctx->cudecoder, &cuinfo); > + if (err != CUDA_SUCCESS) { > + av_log(avctx, AV_LOG_ERROR, "Error creating a CUVID decoder\n"); > + ctx->internal_error = AVERROR_UNKNOWN; > + return 0; > + } > + > + hwframe_ctx->format = AV_PIX_FMT_CUDA; > + hwframe_ctx->sw_format = AV_PIX_FMT_NV12; > + hwframe_ctx->width = FFALIGN(cuinfo.ulTargetWidth, 16); > + hwframe_ctx->height = FFALIGN(cuinfo.ulTargetHeight, 16); > + > + if ((ctx->internal_error = av_hwframe_ctx_init(ctx->hwframe)) < 0) { > + av_log(avctx, AV_LOG_ERROR, "av_hwframe_ctx_init failed\n"); > + return 0; > + } > + > + return 1; > +} > + > +static int CUDAAPI cuvid_handle_picture_decode(void *opaque, CUVIDPICPARAMS* > picparams) > +{ > + AVCodecContext *avctx = opaque; > + CuvidContext *ctx = avctx->priv_data; > + CUresult err; > + > + ctx->internal_error = 0; > + > + err = cuvidDecodePicture(ctx->cudecoder, picparams); > + if (err != CUDA_SUCCESS) { > + av_log(avctx, AV_LOG_ERROR, "cuvidDecodePicture failed\n"); > + ctx->internal_error = AVERROR_UNKNOWN; > + return 0; > + } > + > + return 1; > +} > + > +static int CUDAAPI cuvid_handle_picture_display(void *opaque, > CUVIDPARSERDISPINFO* dispinfo) > +{ > + AVCodecContext *avctx = opaque; > + CuvidContext *ctx = avctx->priv_data; > + > + ctx->internal_error = 0; > + > + av_fifo_generic_write(ctx->frame_queue, dispinfo, > sizeof(CUVIDPARSERDISPINFO), NULL); > + > + return 1; > +} > + > +static int cuvid_decode_frame(AVCodecContext *avctx, void *data, int > *got_frame, AVPacket *avpkt) > +{ > + CuvidContext *ctx = avctx->priv_data; > + AVHWDeviceContext *device_ctx = (AVHWDeviceContext*)ctx->hwdevice->data; > + AVCUDADeviceContext *device_hwctx = device_ctx->hwctx; > + CUcontext dummy, cuda_ctx = device_hwctx->cuda_ctx; > + AVFrame *frame = data; > + CUVIDSOURCEDATAPACKET cupkt; > + CUresult err; > + AVPacket filter_packet = { 0 }; > + AVPacket filtered_packet = { 0 }; > + int ret = 0; > + > + if (avpkt->size) { > + if ((ret = av_packet_ref(&filter_packet, avpkt)) < 0) { > + av_log(avctx, AV_LOG_ERROR, "av_packet_ref failed\n"); > + return ret; > + } > + > + if ((ret = av_bsf_send_packet(ctx->bsf, &filter_packet)) < 0) { > + av_log(avctx, AV_LOG_ERROR, "av_bsf_send_packet failed\n"); > + av_packet_unref(&filter_packet); > + return ret; > + } > + > + if ((ret = av_bsf_receive_packet(ctx->bsf, &filtered_packet)) < 0) { > + av_log(avctx, AV_LOG_ERROR, "av_bsf_receive_packet failed\n"); > + return ret; > + } > + } > + > + err = cuCtxPushCurrent(cuda_ctx); > + if (err != CUDA_SUCCESS) { > + av_log(avctx, AV_LOG_ERROR, "cuCtxPushCurrent failed\n"); > + av_packet_unref(&filtered_packet); > + return AVERROR_UNKNOWN; > + } > + > + memset(&cupkt, 0, sizeof(cupkt)); > + > + if (avpkt->size && filtered_packet.size) { > + cupkt.payload_size = filtered_packet.size; > + cupkt.payload = filtered_packet.data; > + > + if (filtered_packet.pts != AV_NOPTS_VALUE) { > + cupkt.flags = CUVID_PKT_TIMESTAMP; > + cupkt.timestamp = av_rescale_q(filtered_packet.pts, > avctx->time_base, (AVRational){1, 10000000}); > + } > + } else { > + cupkt.flags = CUVID_PKT_ENDOFSTREAM; > + } > + > + err = cuvidParseVideoData(ctx->cuparser, &cupkt); > + > + av_packet_unref(&filtered_packet); > + > + if (err != CUDA_SUCCESS) { > + av_log(avctx, AV_LOG_ERROR, "cuvidParseVideoData failed\n"); > + if (ctx->internal_error) > + ret = ctx->internal_error; > + else > + ret = AVERROR_UNKNOWN; > + goto error; > + } > + > + if (av_fifo_size(ctx->frame_queue) >= FRAME_DELAY * > sizeof(CUVIDPARSERDISPINFO)) { > + CUVIDPARSERDISPINFO dispinfo; > + CUVIDPROCPARAMS params; > + unsigned int pitch = 0; > + CUdeviceptr mapped_frame = 0; > + int offset = 0; > + int i; > + > + av_fifo_generic_read(ctx->frame_queue, &dispinfo, > sizeof(CUVIDPARSERDISPINFO), NULL); > + > + memset(¶ms, 0, sizeof(params)); > + params.progressive_frame = dispinfo.progressive_frame; > + params.second_field = 0; > + params.top_field_first = dispinfo.top_field_first; > + params.unpaired_field = (dispinfo.progressive_frame == 1 || > dispinfo.repeat_first_field <= 1); > + > + err = cuvidMapVideoFrame(ctx->cudecoder, dispinfo.picture_index, > &mapped_frame, &pitch, ¶ms); > + if (err != CUDA_SUCCESS) { > + av_log(avctx, AV_LOG_ERROR, "cuvidMapVideoFrame failed on index > %d: 0x%x\n", dispinfo.picture_index, (int)err); > + ret = AVERROR_EXTERNAL; > + goto error; > + } > + > + if (avctx->pix_fmt == AV_PIX_FMT_CUDA) { > + ret = av_hwframe_get_buffer(ctx->hwframe, frame, 0); > + if (ret < 0) { > + av_log(avctx, AV_LOG_ERROR, "av_hwframe_get_buffer > failed\n"); > + goto error; > + }
Should probably call ff_decode_frame_props on the AVFrame here, to fill in various frame properties, and make the AVFrames behave like any other decoders. > + > + for (i = 0; i < 2; i++) { > + CUDA_MEMCPY2D cpy = { > + .srcMemoryType = CU_MEMORYTYPE_DEVICE, > + .dstMemoryType = CU_MEMORYTYPE_DEVICE, > + .srcDevice = mapped_frame, > + .dstDevice = (CUdeviceptr)frame->data[i], > + .srcPitch = pitch, > + .dstPitch = frame->linesize[i], > + .srcY = offset, > + .WidthInBytes = FFMIN(pitch, frame->linesize[i]), > + .Height = avctx->height >> (i ? 1 : 0), > + }; > + > + err = cuMemcpy2D(&cpy); > + if (err != CUDA_SUCCESS) { > + av_log(avctx, AV_LOG_ERROR, "cuMemcpy2D failed\n"); > + ret = AVERROR_EXTERNAL; > + goto error; > + } > + > + offset += avctx->height; > + } > + } else if (avctx->pix_fmt == AV_PIX_FMT_NV12) { > + AVFrame *tmp_frame = av_frame_alloc(); > + if (!tmp_frame) { > + av_log(avctx, AV_LOG_ERROR, "av_frame_alloc failed\n"); > + cuvidUnmapVideoFrame(ctx->cudecoder, mapped_frame); > + ret = AVERROR(ENOMEM); > + goto error; > + } > + > + tmp_frame->format = AV_PIX_FMT_CUDA; > + tmp_frame->hw_frames_ctx = av_buffer_ref(ctx->hwframe); > + tmp_frame->data[0] = (uint8_t*)mapped_frame; > + tmp_frame->linesize[0] = pitch; > + tmp_frame->data[1] = (uint8_t*)(mapped_frame + > avctx->height * pitch); > + tmp_frame->linesize[1] = pitch; > + tmp_frame->width = avctx->width; > + tmp_frame->height = avctx->height; I would recommend to use ff_get_buffer to allocate frames here for the software output case, as that will automatically allocate your frame and fill it with a bunch of required info. av_hwframe_transfer_data can take a pre-allocated frame as well. > + > + ret = av_hwframe_transfer_data(frame, tmp_frame, 0); > + if (ret) { > + av_log(avctx, AV_LOG_ERROR, "av_hwframe_transfer_data > failed\n"); > + cuvidUnmapVideoFrame(ctx->cudecoder, mapped_frame); > + av_frame_free(&tmp_frame); > + goto error; > + } > + > + av_frame_free(&tmp_frame); > + } else { > + ret = AVERROR_BUG; > + goto error; > + } > + > + err = cuvidUnmapVideoFrame(ctx->cudecoder, mapped_frame); > + if (err != CUDA_SUCCESS) { > + av_log(avctx, AV_LOG_ERROR, "cuvidUnmapVideoFrame failed\n"); > + ret = AVERROR_EXTERNAL; > + goto error; > + } > + > + frame->width = avctx->width; > + frame->height = avctx->height; > + frame->pts = frame->pkt_pts = av_rescale_q(dispinfo.timestamp, > (AVRational){1, 10000000}, avctx->time_base); > + > + *got_frame = 1; > + } else { > + *got_frame = 0; > + } > + > +error: > + err = cuCtxPopCurrent(&dummy); > + if (err != CUDA_SUCCESS) { > + av_log(avctx, AV_LOG_ERROR, "cuvidCtxUnlock failed\n"); > + ret = AVERROR_EXTERNAL; > + } > + > + return ret; > +} > + > +static av_cold int cuvid_decode_end(AVCodecContext *avctx) > +{ > + CuvidContext *ctx = avctx->priv_data; > + > + av_fifo_freep(&ctx->frame_queue); > + > + if (ctx->bsf) > + av_bsf_free(&ctx->bsf); > + > + if (ctx->cuparser) > + cuvidDestroyVideoParser(ctx->cuparser); > + > + if (ctx->cudecoder) > + cuvidDestroyDecoder(ctx->cudecoder); > + > + av_buffer_unref(&ctx->hwframe); > + av_buffer_unref(&ctx->hwdevice); > + > + return 0; > +} > + > +static void cuvid_ctx_free(AVHWDeviceContext *ctx) > +{ > + AVCUDADeviceContext *hwctx = ctx->hwctx; > + cuCtxDestroy(hwctx->cuda_ctx); > +} > + > +static av_cold int cuvid_decode_init(AVCodecContext *avctx) > +{ > + CuvidContext *ctx = avctx->priv_data; > + AVCUDADeviceContext *device_hwctx; > + AVHWDeviceContext *device_ctx; > + AVHWFramesContext *hwframe_ctx; > + CUVIDPARSERPARAMS cuparseinfo; > + CUdevice device; > + CUcontext cuda_ctx = NULL; > + CUcontext dummy; > + CUresult err; > + const AVBitStreamFilter *bsf; > + int ret = 0; > + > + enum AVPixelFormat pix_fmts[3] = { AV_PIX_FMT_CUDA, > + AV_PIX_FMT_NV12, > + AV_PIX_FMT_NONE }; > + > + ret = ff_get_format(avctx, pix_fmts); > + if (ret < 0) { > + av_log(avctx, AV_LOG_ERROR, "ff_get_format failed: %d\n", ret); > + return ret; > + } > + > + avctx->pix_fmt = ret; > + > + if (avctx->hw_frames_ctx) { > + ctx->hwframe = av_buffer_ref(avctx->hw_frames_ctx); > + if (!ctx->hwframe) { > + ret = AVERROR(ENOMEM); > + goto error; > + } > + > + hwframe_ctx = (AVHWFramesContext*)ctx->hwframe->data; > + > + ctx->hwdevice = av_buffer_ref(hwframe_ctx->device_ref); > + if (!ctx->hwdevice) { > + ret = AVERROR(ENOMEM); > + goto error; > + } > + > + device_ctx = hwframe_ctx->device_ctx; > + device_hwctx = device_ctx->hwctx; > + cuda_ctx = device_hwctx->cuda_ctx; > + } else { > + ctx->hwdevice = av_hwdevice_ctx_alloc(AV_HWDEVICE_TYPE_CUDA); > + if (!ctx->hwdevice) { > + av_log(avctx, AV_LOG_ERROR, "Error allocating hwdevice\n"); > + ret = AVERROR(ENOMEM); > + goto error; > + } > + > + err = cuInit(0); > + if (err != CUDA_SUCCESS) { > + av_log(avctx, AV_LOG_ERROR, "Could not initialize the CUDA > driver API\n"); > + ret = AVERROR_UNKNOWN; > + goto error; > + } > + > + err = cuDeviceGet(&device, 0); ///TODO: Make device index > configurable > + if (err != CUDA_SUCCESS) { > + av_log(avctx, AV_LOG_ERROR, "Could not get the device number > %d\n", 0); > + ret = AVERROR_UNKNOWN; > + goto error; > + } > + > + err = cuCtxCreate(&cuda_ctx, CU_CTX_SCHED_BLOCKING_SYNC, device); > + if (err != CUDA_SUCCESS) { > + av_log(avctx, AV_LOG_ERROR, "Error creating a CUDA context\n"); > + ret = AVERROR_UNKNOWN; > + goto error; > + } > + > + device_ctx = (AVHWDeviceContext*)ctx->hwdevice->data; > + device_ctx->free = cuvid_ctx_free; > + > + device_hwctx = device_ctx->hwctx; > + device_hwctx->cuda_ctx = cuda_ctx; > + > + err = cuCtxPopCurrent(&dummy); > + if (err != CUDA_SUCCESS) { > + av_log(avctx, AV_LOG_ERROR, "cuCtxPopCurrent failed\n"); > + ret = AVERROR_UNKNOWN; > + goto error; > + } > + > + ret = av_hwdevice_ctx_init(ctx->hwdevice); > + if (ret < 0) { > + av_log(avctx, AV_LOG_ERROR, "av_hwdevice_ctx_init failed\n"); > + goto error; > + } > + > + ctx->hwframe = av_hwframe_ctx_alloc(ctx->hwdevice); > + if (!ctx->hwframe) { > + av_log(avctx, AV_LOG_ERROR, "av_hwframe_ctx_alloc failed\n"); > + ret = AVERROR(ENOMEM); > + goto error; > + } > + } > + > + memset(&cuparseinfo, 0, sizeof(cuparseinfo)); > + > + if (avctx->codec->id == AV_CODEC_ID_H264) > + cuparseinfo.CodecType = cudaVideoCodec_H264; > + else if (avctx->codec->id == AV_CODEC_ID_HEVC) > + cuparseinfo.CodecType = cudaVideoCodec_HEVC; > + else > + return AVERROR_BUG; > + > + cuparseinfo.ulMaxNumDecodeSurfaces = MAX_FRAME_COUNT; > + cuparseinfo.ulMaxDisplayDelay = FRAME_DELAY; > + cuparseinfo.pUserData = avctx; > + cuparseinfo.pfnSequenceCallback = cuvid_handle_video_sequence; > + cuparseinfo.pfnDecodePicture = cuvid_handle_picture_decode; > + cuparseinfo.pfnDisplayPicture = cuvid_handle_picture_display; > + > + err = cuvidCreateVideoParser(&ctx->cuparser, &cuparseinfo); > + if (err != CUDA_SUCCESS) { > + av_log(avctx, AV_LOG_ERROR, "Error creating a CUVID parser\n"); > + ret = AVERROR_UNKNOWN; > + goto error; > + } > + > + if (avctx->codec->id == AV_CODEC_ID_H264) > + bsf = av_bsf_get_by_name("h264_mp4toannexb"); > + else if (avctx->codec->id == AV_CODEC_ID_HEVC) > + bsf = av_bsf_get_by_name("hevc_mp4toannexb"); > + else > + return AVERROR_BUG; > + > + if (!bsf) { > + ret = AVERROR_BSF_NOT_FOUND; > + goto error; > + } > + if (ret = av_bsf_alloc(bsf, &ctx->bsf)) { > + goto error; > + } > + if (((ret = avcodec_parameters_from_context(ctx->bsf->par_in, avctx)) < > 0) || ((ret = av_bsf_init(ctx->bsf)) < 0)) { > + av_bsf_free(&ctx->bsf); > + goto error; > + } > + > + ctx->frame_queue = av_fifo_alloc(MAX_FRAME_COUNT * > sizeof(CUVIDPARSERDISPINFO)); > + if (!ctx->frame_queue) { > + ret = AVERROR(ENOMEM); > + goto error; > + } > + > + return 0; > + > +error: > + cuvid_decode_end(avctx); > + return ret; > +} > + > +#if CONFIG_HEVC_CUVID_DECODER > +AVHWAccel ff_hevc_cuvid_hwaccel = { > + .name = "hevc_cuvid", > + .type = AVMEDIA_TYPE_VIDEO, > + .id = AV_CODEC_ID_HEVC, > + .pix_fmt = AV_PIX_FMT_CUDA, > +}; > + > +AVCodec ff_hevc_cuvid_decoder = { > + .name = "hevc_cuvid", > + .long_name = NULL_IF_CONFIG_SMALL("Nvidia CUVID HEVC Decoder"), > + .type = AVMEDIA_TYPE_VIDEO, > + .id = AV_CODEC_ID_HEVC, > + .priv_data_size = sizeof(CuvidContext), > + .init = cuvid_decode_init, > + .close = cuvid_decode_end, > + .decode = cuvid_decode_frame, > + .capabilities = AV_CODEC_CAP_DELAY, > + .pix_fmts = (const enum AVPixelFormat[]){ AV_PIX_FMT_CUDA, > + AV_PIX_FMT_NV12, > + AV_PIX_FMT_NONE }, > +}; > +#endif > + > +#if CONFIG_H264_CUVID_DECODER > +AVHWAccel ff_h264_cuvid_hwaccel = { > + .name = "h264_cuvid", > + .type = AVMEDIA_TYPE_VIDEO, > + .id = AV_CODEC_ID_H264, > + .pix_fmt = AV_PIX_FMT_CUDA, > +}; > + > +AVCodec ff_h264_cuvid_decoder = { > + .name = "h264_cuvid", > + .long_name = NULL_IF_CONFIG_SMALL("Nvidia CUVID H264 Decoder"), > + .type = AVMEDIA_TYPE_VIDEO, > + .id = AV_CODEC_ID_H264, > + .priv_data_size = sizeof(CuvidContext), > + .init = cuvid_decode_init, > + .close = cuvid_decode_end, > + .decode = cuvid_decode_frame, > + .capabilities = AV_CODEC_CAP_DELAY, > + .pix_fmts = (const enum AVPixelFormat[]){ AV_PIX_FMT_CUDA, > + AV_PIX_FMT_NV12, > + AV_PIX_FMT_NONE }, > +}; > +#endif > diff --git a/libavcodec/version.h b/libavcodec/version.h > index cf7f231..d30d3e2 100644 > --- a/libavcodec/version.h > +++ b/libavcodec/version.h > @@ -28,8 +28,8 @@ > #include "libavutil/version.h" > > #define LIBAVCODEC_VERSION_MAJOR 57 > -#define LIBAVCODEC_VERSION_MINOR 44 > -#define LIBAVCODEC_VERSION_MICRO 101 > +#define LIBAVCODEC_VERSION_MINOR 45 > +#define LIBAVCODEC_VERSION_MICRO 100 > > #define LIBAVCODEC_VERSION_INT AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \ > LIBAVCODEC_VERSION_MINOR, \ > -- > 2.8.3 > > _______________________________________________ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > http://ffmpeg.org/mailman/listinfo/ffmpeg-devel _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel