On Wed, Oct 19, 2016 at 2:00 PM, Timo Rothenpieler <t...@rothenpieler.org> wrote: > --- > compat/cuda/dynlink_cuda.h | 88 +++++ > compat/cuda/dynlink_cuviddec.h | 808 > +++++++++++++++++++++++++++++++++++++++++ > compat/cuda/dynlink_loader.h | 254 +++++++++++++ > compat/cuda/dynlink_nvcuvid.h | 316 ++++++++++++++++ > 4 files changed, 1466 insertions(+) > create mode 100644 compat/cuda/dynlink_cuda.h > create mode 100644 compat/cuda/dynlink_cuviddec.h > create mode 100644 compat/cuda/dynlink_loader.h > create mode 100644 compat/cuda/dynlink_nvcuvid.h > > diff --git a/compat/cuda/dynlink_cuda.h b/compat/cuda/dynlink_cuda.h > new file mode 100644 > index 0000000..908f12d > --- /dev/null > +++ b/compat/cuda/dynlink_cuda.h > @@ -0,0 +1,88 @@ > +/* > + * This file is part of FFmpeg. > + * > + * FFmpeg is free software; you can redistribute it and/or > + * modify it under the terms of the GNU Lesser General Public > + * License as published by the Free Software Foundation; either > + * version 2.1 of the License, or (at your option) any later version. > + * > + * FFmpeg is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + * Lesser General Public License for more details. > + * > + * You should have received a copy of the GNU Lesser General Public > + * License along with FFmpeg; if not, write to the Free Software > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 > USA > + */
So did you write this without once looking at the NVIDIA header file? Because if you did even read it, then you can't realistically claim this is LGPL. > + > +#if !defined(AV_COMPAT_DYNLINK_CUDA_H) && !defined(CUDA_VERSION) > +#define AV_COMPAT_DYNLINK_CUDA_H > + > +#include <stddef.h> > + > +#define CUDA_VERSION 7050 > + > +#if defined(_WIN32) || defined(__CYGWIN__) > +#define CUDAAPI __stdcall > +#else > +#define CUDAAPI > +#endif > + > +#define CU_CTX_SCHED_BLOCKING_SYNC 4 > + > +typedef int CUdevice; > +typedef void* CUarray; > +typedef void* CUcontext; > +#if defined(__x86_64) || defined(AMD64) || defined(_M_AMD64) > +typedef unsigned long long CUdeviceptr; > +#else > +typedef unsigned int CUdeviceptr; > +#endif > + > +typedef enum cudaError_enum { > + CUDA_SUCCESS = 0 > +} CUresult; > + > +typedef enum CUmemorytype_enum { > + CU_MEMORYTYPE_HOST = 1, > + CU_MEMORYTYPE_DEVICE = 2 > +} CUmemorytype; > + > +typedef struct CUDA_MEMCPY2D_st { > + size_t srcXInBytes; > + size_t srcY; > + CUmemorytype srcMemoryType; > + const void *srcHost; > + CUdeviceptr srcDevice; > + CUarray srcArray; > + size_t srcPitch; > + > + size_t dstXInBytes; > + size_t dstY; > + CUmemorytype dstMemoryType; > + void *dstHost; > + CUdeviceptr dstDevice; > + CUarray dstArray; > + size_t dstPitch; > + > + size_t WidthInBytes; > + size_t Height; > +} CUDA_MEMCPY2D; > + > +typedef CUresult CUDAAPI tcuInit(unsigned int Flags); > +typedef CUresult CUDAAPI tcuDeviceGetCount(int *count); > +typedef CUresult CUDAAPI tcuDeviceGet(CUdevice *device, int ordinal); > +typedef CUresult CUDAAPI tcuDeviceGetName(char *name, int len, CUdevice dev); > +typedef CUresult CUDAAPI tcuDeviceComputeCapability(int *major, int *minor, > CUdevice dev); > +typedef CUresult CUDAAPI tcuCtxCreate_v2(CUcontext *pctx, unsigned int > flags, CUdevice dev); > +typedef CUresult CUDAAPI tcuCtxPushCurrent_v2(CUcontext *pctx); > +typedef CUresult CUDAAPI tcuCtxPopCurrent_v2(CUcontext *pctx); > +typedef CUresult CUDAAPI tcuCtxDestroy_v2(CUcontext ctx); > +typedef CUresult CUDAAPI tcuMemAlloc_v2(CUdeviceptr *dptr, size_t bytesize); > +typedef CUresult CUDAAPI tcuMemFree_v2(CUdeviceptr dptr); > +typedef CUresult CUDAAPI tcuMemcpy2D_v2(const CUDA_MEMCPY2D *pcopy); > +typedef CUresult CUDAAPI tcuGetErrorName(CUresult error, const char** pstr); > +typedef CUresult CUDAAPI tcuGetErrorString(CUresult error, const char** > pstr); > + > +#endif > diff --git a/compat/cuda/dynlink_cuviddec.h b/compat/cuda/dynlink_cuviddec.h > new file mode 100644 > index 0000000..17207bc > --- /dev/null > +++ b/compat/cuda/dynlink_cuviddec.h > @@ -0,0 +1,808 @@ > +/* > + * This copyright notice applies to this header file only: > + * > + * Copyright (c) 2010-2016 NVIDIA Corporation > + * > + * Permission is hereby granted, free of charge, to any person > + * obtaining a copy of this software and associated documentation > + * files (the "Software"), to deal in the Software without > + * restriction, including without limitation the rights to use, > + * copy, modify, merge, publish, distribute, sublicense, and/or sell > + * copies of the software, and to permit persons to whom the > + * software is furnished to do so, subject to the following > + * conditions: > + * > + * The above copyright notice and this permission notice shall be > + * included in all copies or substantial portions of the Software. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, > + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES > + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND > + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT > + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, > + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR > + * OTHER DEALINGS IN THE SOFTWARE. > + */ > + > +/** > + * \file cuviddec.h > + * NvCuvid API provides Video Decoding interface to NVIDIA GPU devices. > + * \date 2015-2016 > + * This file contains constants, structure definitions and function > prototypes used for decoding. > + */ > + > +#if !defined(__CUDA_VIDEO_H__) > +#define __CUDA_VIDEO_H__ > + > +#if defined(__x86_64) || defined(AMD64) || defined(_M_AMD64) > +#if (CUDA_VERSION >= 3020) && (!defined(CUDA_FORCE_API_VERSION) || > (CUDA_FORCE_API_VERSION >= 3020)) > +#define __CUVID_DEVPTR64 > +#endif > +#endif > + > +#if defined(__cplusplus) > +extern "C" { > +#endif /* __cplusplus */ > + > +typedef void *CUvideodecoder; > +typedef struct _CUcontextlock_st *CUvideoctxlock; > + > +/** > + * \addtogroup VIDEO_DECODER Video Decoder > + * @{ > + */ > + > +/*! > + * \enum cudaVideoCodec > + * Video Codec Enums > + */ > +typedef enum cudaVideoCodec_enum { > + cudaVideoCodec_MPEG1=0, /**< MPEG1 */ > + cudaVideoCodec_MPEG2, /**< MPEG2 */ > + cudaVideoCodec_MPEG4, /**< MPEG4 */ > + cudaVideoCodec_VC1, /**< VC1 */ > + cudaVideoCodec_H264, /**< H264 */ > + cudaVideoCodec_JPEG, /**< JPEG */ > + cudaVideoCodec_H264_SVC, /**< H264-SVC */ > + cudaVideoCodec_H264_MVC, /**< H264-MVC */ > + cudaVideoCodec_HEVC, /**< HEVC */ > + cudaVideoCodec_VP8, /**< VP8 */ > + cudaVideoCodec_VP9, /**< VP9 */ > + cudaVideoCodec_NumCodecs, /**< Max COdecs */ > + // Uncompressed YUV > + cudaVideoCodec_YUV420 = (('I'<<24)|('Y'<<16)|('U'<<8)|('V')), /**< > Y,U,V (4:2:0) */ > + cudaVideoCodec_YV12 = (('Y'<<24)|('V'<<16)|('1'<<8)|('2')), /**< > Y,V,U (4:2:0) */ > + cudaVideoCodec_NV12 = (('N'<<24)|('V'<<16)|('1'<<8)|('2')), /**< > Y,UV (4:2:0) */ > + cudaVideoCodec_YUYV = (('Y'<<24)|('U'<<16)|('Y'<<8)|('V')), /**< > YUYV/YUY2 (4:2:2) */ > + cudaVideoCodec_UYVY = (('U'<<24)|('Y'<<16)|('V'<<8)|('Y')) /**< > UYVY (4:2:2) */ > +} cudaVideoCodec; > + > +/*! > + * \enum cudaVideoSurfaceFormat > + * Video Surface Formats Enums > + */ > +typedef enum cudaVideoSurfaceFormat_enum { > + cudaVideoSurfaceFormat_NV12=0 /**< NV12 (currently the only > supported output format) */ > +} cudaVideoSurfaceFormat; > + > +/*! > + * \enum cudaVideoDeinterlaceMode > + * Deinterlacing Modes Enums > + */ > +typedef enum cudaVideoDeinterlaceMode_enum { > + cudaVideoDeinterlaceMode_Weave=0, /**< Weave both fields (no > deinterlacing) */ > + cudaVideoDeinterlaceMode_Bob, /**< Drop one field */ > + cudaVideoDeinterlaceMode_Adaptive /**< Adaptive deinterlacing */ > +} cudaVideoDeinterlaceMode; > + > +/*! > + * \enum cudaVideoChromaFormat > + * Chroma Formats Enums > + */ > +typedef enum cudaVideoChromaFormat_enum { > + cudaVideoChromaFormat_Monochrome=0, /**< MonoChrome */ > + cudaVideoChromaFormat_420, /**< 4:2:0 */ > + cudaVideoChromaFormat_422, /**< 4:2:2 */ > + cudaVideoChromaFormat_444 /**< 4:4:4 */ > +} cudaVideoChromaFormat; > + > +/*! > + * \enum cudaVideoCreateFlags > + * Decoder Flags Enums > + */ > +typedef enum cudaVideoCreateFlags_enum { > + cudaVideoCreate_Default = 0x00, /**< Default operation mode: use > dedicated video engines */ > + cudaVideoCreate_PreferCUDA = 0x01, /**< Use a CUDA-based decoder if > faster than dedicated engines (requires a valid vidLock object for > multi-threading) */ > + cudaVideoCreate_PreferDXVA = 0x02, /**< Go through DXVA internally if > possible (requires D3D9 interop) */ > + cudaVideoCreate_PreferCUVID = 0x04 /**< Use dedicated video engines > directly */ > +} cudaVideoCreateFlags; > + > +/*! > + * \struct CUVIDDECODECREATEINFO > + * Struct used in create decoder > + */ > +typedef struct _CUVIDDECODECREATEINFO > +{ > + unsigned long ulWidth; /**< Coded Sequence Width */ > + unsigned long ulHeight; /**< Coded Sequence Height */ > + unsigned long ulNumDecodeSurfaces; /**< Maximum number of internal > decode surfaces */ > + cudaVideoCodec CodecType; /**< cudaVideoCodec_XXX */ > + cudaVideoChromaFormat ChromaFormat; /**< cudaVideoChromaFormat_XXX (only > 4:2:0 is currently supported) */ > + unsigned long ulCreationFlags; /**< Decoder creation flags > (cudaVideoCreateFlags_XXX) */ > + unsigned long bitDepthMinus8; > + unsigned long Reserved1[4]; /**< Reserved for future use - set > to zero */ > + /** > + * area of the frame that should be displayed > + */ > + struct { > + short left; > + short top; > + short right; > + short bottom; > + } display_area; > + > + cudaVideoSurfaceFormat OutputFormat; /**< > cudaVideoSurfaceFormat_XXX */ > + cudaVideoDeinterlaceMode DeinterlaceMode; /**< > cudaVideoDeinterlaceMode_XXX */ > + unsigned long ulTargetWidth; /**< Post-processed Output > Width (Should be aligned to 2) */ > + unsigned long ulTargetHeight; /**< Post-processed Output > Height (Should be aligbed to 2) */ > + unsigned long ulNumOutputSurfaces; /**< Maximum number of output > surfaces simultaneously mapped */ > + CUvideoctxlock vidLock; /**< If non-NULL, context > lock used for synchronizing ownership of the cuda context */ > + /** > + * target rectangle in the output frame (for aspect ratio conversion) > + * if a null rectangle is specified, {0,0,ulTargetWidth,ulTargetHeight} > will be used > + */ > + struct { > + short left; > + short top; > + short right; > + short bottom; > + } target_rect; > + unsigned long Reserved2[5]; /**< Reserved for future use > - set to zero */ > +} CUVIDDECODECREATEINFO; > + > +/*! > + * \struct CUVIDH264DPBENTRY > + * H.264 DPB Entry > + */ > +typedef struct _CUVIDH264DPBENTRY > +{ > + int PicIdx; /**< picture index of reference frame */ > + int FrameIdx; /**< frame_num(short-term) or > LongTermFrameIdx(long-term) */ > + int is_long_term; /**< 0=short term reference, 1=long term > reference */ > + int not_existing; /**< non-existing reference frame > (corresponding PicIdx should be set to -1) */ > + int used_for_reference; /**< 0=unused, 1=top_field, 2=bottom_field, > 3=both_fields */ > + int FieldOrderCnt[2]; /**< field order count of top and bottom > fields */ > +} CUVIDH264DPBENTRY; > + > +/*! > + * \struct CUVIDH264MVCEXT > + * H.264 MVC Picture Parameters Ext > + */ > +typedef struct _CUVIDH264MVCEXT > +{ > + int num_views_minus1; > + int view_id; > + unsigned char inter_view_flag; > + unsigned char num_inter_view_refs_l0; > + unsigned char num_inter_view_refs_l1; > + unsigned char MVCReserved8Bits; > + int InterViewRefsL0[16]; > + int InterViewRefsL1[16]; > +} CUVIDH264MVCEXT; > + > +/*! > + * \struct CUVIDH264SVCEXT > + * H.264 SVC Picture Parameters Ext > + */ > +typedef struct _CUVIDH264SVCEXT > +{ > + unsigned char profile_idc; > + unsigned char level_idc; > + unsigned char DQId; > + unsigned char DQIdMax; > + unsigned char disable_inter_layer_deblocking_filter_idc; > + unsigned char ref_layer_chroma_phase_y_plus1; > + signed char inter_layer_slice_alpha_c0_offset_div2; > + signed char inter_layer_slice_beta_offset_div2; > + > + unsigned short DPBEntryValidFlag; > + unsigned char inter_layer_deblocking_filter_control_present_flag; > + unsigned char extended_spatial_scalability_idc; > + unsigned char adaptive_tcoeff_level_prediction_flag; > + unsigned char slice_header_restriction_flag; > + unsigned char chroma_phase_x_plus1_flag; > + unsigned char chroma_phase_y_plus1; > + > + unsigned char tcoeff_level_prediction_flag; > + unsigned char constrained_intra_resampling_flag; > + unsigned char ref_layer_chroma_phase_x_plus1_flag; > + unsigned char store_ref_base_pic_flag; > + unsigned char Reserved8BitsA; > + unsigned char Reserved8BitsB; > + // For the 4 scaled_ref_layer_XX fields below, > + // if (extended_spatial_scalability_idc == 1), SPS field, G.7.3.2.1.4, > add prefix "seq_" > + // if (extended_spatial_scalability_idc == 2), SLH field, G.7.3.3.4, > + short scaled_ref_layer_left_offset; > + short scaled_ref_layer_top_offset; > + short scaled_ref_layer_right_offset; > + short scaled_ref_layer_bottom_offset; > + unsigned short Reserved16Bits; > + struct _CUVIDPICPARAMS *pNextLayer; /**< Points to the picparams for the > next layer to be decoded. Linked list ends at the target layer. */ > + int bRefBaseLayer; /**< whether to store ref base pic */ > +} CUVIDH264SVCEXT; > + > +/*! > + * \struct CUVIDH264PICPARAMS > + * H.264 Picture Parameters > + */ > +typedef struct _CUVIDH264PICPARAMS > +{ > + // SPS > + int log2_max_frame_num_minus4; > + int pic_order_cnt_type; > + int log2_max_pic_order_cnt_lsb_minus4; > + int delta_pic_order_always_zero_flag; > + int frame_mbs_only_flag; > + int direct_8x8_inference_flag; > + int num_ref_frames; // NOTE: shall meet level 4.1 > restrictions > + unsigned char residual_colour_transform_flag; > + unsigned char bit_depth_luma_minus8; // Must be 0 (only 8-bit > supported) > + unsigned char bit_depth_chroma_minus8; // Must be 0 (only 8-bit > supported) > + unsigned char qpprime_y_zero_transform_bypass_flag; > + // PPS > + int entropy_coding_mode_flag; > + int pic_order_present_flag; > + int num_ref_idx_l0_active_minus1; > + int num_ref_idx_l1_active_minus1; > + int weighted_pred_flag; > + int weighted_bipred_idc; > + int pic_init_qp_minus26; > + int deblocking_filter_control_present_flag; > + int redundant_pic_cnt_present_flag; > + int transform_8x8_mode_flag; > + int MbaffFrameFlag; > + int constrained_intra_pred_flag; > + int chroma_qp_index_offset; > + int second_chroma_qp_index_offset; > + int ref_pic_flag; > + int frame_num; > + int CurrFieldOrderCnt[2]; > + // DPB > + CUVIDH264DPBENTRY dpb[16]; // List of reference frames within > the DPB > + // Quantization Matrices (raster-order) > + unsigned char WeightScale4x4[6][16]; > + unsigned char WeightScale8x8[2][64]; > + // FMO/ASO > + unsigned char fmo_aso_enable; > + unsigned char num_slice_groups_minus1; > + unsigned char slice_group_map_type; > + signed char pic_init_qs_minus26; > + unsigned int slice_group_change_rate_minus1; > + union > + { > + unsigned long long slice_group_map_addr; > + const unsigned char *pMb2SliceGroupMap; > + } fmo; > + unsigned int Reserved[12]; > + // SVC/MVC > + union > + { > + CUVIDH264MVCEXT mvcext; > + CUVIDH264SVCEXT svcext; > + } svcmvc; > +} CUVIDH264PICPARAMS; > + > + > +/*! > + * \struct CUVIDMPEG2PICPARAMS > + * MPEG-2 Picture Parameters > + */ > +typedef struct _CUVIDMPEG2PICPARAMS > +{ > + int ForwardRefIdx; // Picture index of forward reference > (P/B-frames) > + int BackwardRefIdx; // Picture index of backward reference > (B-frames) > + int picture_coding_type; > + int full_pel_forward_vector; > + int full_pel_backward_vector; > + int f_code[2][2]; > + int intra_dc_precision; > + int frame_pred_frame_dct; > + int concealment_motion_vectors; > + int q_scale_type; > + int intra_vlc_format; > + int alternate_scan; > + int top_field_first; > + // Quantization matrices (raster order) > + unsigned char QuantMatrixIntra[64]; > + unsigned char QuantMatrixInter[64]; > +} CUVIDMPEG2PICPARAMS; > + > +//////////////////////////////////////////////////////////////////////////////////////////////// > +// > +// MPEG-4 Picture Parameters > +// > + > +// MPEG-4 has VOP types instead of Picture types > +#define I_VOP 0 > +#define P_VOP 1 > +#define B_VOP 2 > +#define S_VOP 3 > + > +/*! > + * \struct CUVIDMPEG4PICPARAMS > + * MPEG-4 Picture Parameters > + */ > +typedef struct _CUVIDMPEG4PICPARAMS > +{ > + int ForwardRefIdx; // Picture index of forward reference > (P/B-frames) > + int BackwardRefIdx; // Picture index of backward reference > (B-frames) > + // VOL > + int video_object_layer_width; > + int video_object_layer_height; > + int vop_time_increment_bitcount; > + int top_field_first; > + int resync_marker_disable; > + int quant_type; > + int quarter_sample; > + int short_video_header; > + int divx_flags; > + // VOP > + int vop_coding_type; > + int vop_coded; > + int vop_rounding_type; > + int alternate_vertical_scan_flag; > + int interlaced; > + int vop_fcode_forward; > + int vop_fcode_backward; > + int trd[2]; > + int trb[2]; > + // Quantization matrices (raster order) > + unsigned char QuantMatrixIntra[64]; > + unsigned char QuantMatrixInter[64]; > + int gmc_enabled; > +} CUVIDMPEG4PICPARAMS; > + > +/*! > + * \struct CUVIDVC1PICPARAMS > + * VC1 Picture Parameters > + */ > +typedef struct _CUVIDVC1PICPARAMS > +{ > + int ForwardRefIdx; /**< Picture index of forward reference > (P/B-frames) */ > + int BackwardRefIdx; /**< Picture index of backward reference > (B-frames) */ > + int FrameWidth; /**< Actual frame width */ > + int FrameHeight; /**< Actual frame height */ > + // PICTURE > + int intra_pic_flag; /**< Set to 1 for I,BI frames */ > + int ref_pic_flag; /**< Set to 1 for I,P frames */ > + int progressive_fcm; /**< Progressive frame */ > + // SEQUENCE > + int profile; > + int postprocflag; > + int pulldown; > + int interlace; > + int tfcntrflag; > + int finterpflag; > + int psf; > + int multires; > + int syncmarker; > + int rangered; > + int maxbframes; > + // ENTRYPOINT > + int panscan_flag; > + int refdist_flag; > + int extended_mv; > + int dquant; > + int vstransform; > + int loopfilter; > + int fastuvmc; > + int overlap; > + int quantizer; > + int extended_dmv; > + int range_mapy_flag; > + int range_mapy; > + int range_mapuv_flag; > + int range_mapuv; > + int rangeredfrm; // range reduction state > +} CUVIDVC1PICPARAMS; > + > +/*! > + * \struct CUVIDJPEGPICPARAMS > + * JPEG Picture Parameters > + */ > +typedef struct _CUVIDJPEGPICPARAMS > +{ > + int Reserved; > +} CUVIDJPEGPICPARAMS; > + > + > + /*! > + * \struct CUVIDHEVCPICPARAMS > + * HEVC Picture Parameters > + */ > +typedef struct _CUVIDHEVCPICPARAMS > +{ > + // sps > + int pic_width_in_luma_samples; > + int pic_height_in_luma_samples; > + unsigned char log2_min_luma_coding_block_size_minus3; > + unsigned char log2_diff_max_min_luma_coding_block_size; > + unsigned char log2_min_transform_block_size_minus2; > + unsigned char log2_diff_max_min_transform_block_size; > + unsigned char pcm_enabled_flag; > + unsigned char log2_min_pcm_luma_coding_block_size_minus3; > + unsigned char log2_diff_max_min_pcm_luma_coding_block_size; > + unsigned char pcm_sample_bit_depth_luma_minus1; > + > + unsigned char pcm_sample_bit_depth_chroma_minus1; > + unsigned char pcm_loop_filter_disabled_flag; > + unsigned char strong_intra_smoothing_enabled_flag; > + unsigned char max_transform_hierarchy_depth_intra; > + unsigned char max_transform_hierarchy_depth_inter; > + unsigned char amp_enabled_flag; > + unsigned char separate_colour_plane_flag; > + unsigned char log2_max_pic_order_cnt_lsb_minus4; > + > + unsigned char num_short_term_ref_pic_sets; > + unsigned char long_term_ref_pics_present_flag; > + unsigned char num_long_term_ref_pics_sps; > + unsigned char sps_temporal_mvp_enabled_flag; > + unsigned char sample_adaptive_offset_enabled_flag; > + unsigned char scaling_list_enable_flag; > + unsigned char IrapPicFlag; > + unsigned char IdrPicFlag; > + > + unsigned char bit_depth_luma_minus8; > + unsigned char bit_depth_chroma_minus8; > + unsigned char reserved1[14]; > + > + // pps > + unsigned char dependent_slice_segments_enabled_flag; > + unsigned char slice_segment_header_extension_present_flag; > + unsigned char sign_data_hiding_enabled_flag; > + unsigned char cu_qp_delta_enabled_flag; > + unsigned char diff_cu_qp_delta_depth; > + signed char init_qp_minus26; > + signed char pps_cb_qp_offset; > + signed char pps_cr_qp_offset; > + > + unsigned char constrained_intra_pred_flag; > + unsigned char weighted_pred_flag; > + unsigned char weighted_bipred_flag; > + unsigned char transform_skip_enabled_flag; > + unsigned char transquant_bypass_enabled_flag; > + unsigned char entropy_coding_sync_enabled_flag; > + unsigned char log2_parallel_merge_level_minus2; > + unsigned char num_extra_slice_header_bits; > + > + unsigned char loop_filter_across_tiles_enabled_flag; > + unsigned char loop_filter_across_slices_enabled_flag; > + unsigned char output_flag_present_flag; > + unsigned char num_ref_idx_l0_default_active_minus1; > + unsigned char num_ref_idx_l1_default_active_minus1; > + unsigned char lists_modification_present_flag; > + unsigned char cabac_init_present_flag; > + unsigned char pps_slice_chroma_qp_offsets_present_flag; > + > + unsigned char deblocking_filter_override_enabled_flag; > + unsigned char pps_deblocking_filter_disabled_flag; > + signed char pps_beta_offset_div2; > + signed char pps_tc_offset_div2; > + unsigned char tiles_enabled_flag; > + unsigned char uniform_spacing_flag; > + unsigned char num_tile_columns_minus1; > + unsigned char num_tile_rows_minus1; > + > + unsigned short column_width_minus1[21]; > + unsigned short row_height_minus1[21]; > + unsigned int reserved3[15]; > + > + // RefPicSets > + int NumBitsForShortTermRPSInSlice; > + int NumDeltaPocsOfRefRpsIdx; > + int NumPocTotalCurr; > + int NumPocStCurrBefore; > + int NumPocStCurrAfter; > + int NumPocLtCurr; > + int CurrPicOrderCntVal; > + int RefPicIdx[16]; // [refpic] Indices of valid > reference pictures (-1 if unused for reference) > + int PicOrderCntVal[16]; // [refpic] > + unsigned char IsLongTerm[16]; // [refpic] 0=not a long-term > reference, 1=long-term reference > + unsigned char RefPicSetStCurrBefore[8]; // [0..NumPocStCurrBefore-1] -> > refpic (0..15) > + unsigned char RefPicSetStCurrAfter[8]; // [0..NumPocStCurrAfter-1] -> > refpic (0..15) > + unsigned char RefPicSetLtCurr[8]; // [0..NumPocLtCurr-1] -> refpic > (0..15) > + unsigned char RefPicSetInterLayer0[8]; > + unsigned char RefPicSetInterLayer1[8]; > + unsigned int reserved4[12]; > + > + // scaling lists (diag order) > + unsigned char ScalingList4x4[6][16]; // [matrixId][i] > + unsigned char ScalingList8x8[6][64]; // [matrixId][i] > + unsigned char ScalingList16x16[6][64]; // [matrixId][i] > + unsigned char ScalingList32x32[2][64]; // [matrixId][i] > + unsigned char ScalingListDCCoeff16x16[6]; // [matrixId] > + unsigned char ScalingListDCCoeff32x32[2]; // [matrixId] > +} CUVIDHEVCPICPARAMS; > + > + > +/*! > + * \struct CUVIDVP8PICPARAMS > + * VP8 Picture Parameters > + */ > +typedef struct _CUVIDVP8PICPARAMS > +{ > + int width; > + int height; > + unsigned int first_partition_size; > + //Frame Indexes > + unsigned char LastRefIdx; > + unsigned char GoldenRefIdx; > + unsigned char AltRefIdx; > + union { > + struct { > + unsigned char frame_type : 1; /**< 0 = KEYFRAME, 1 = > INTERFRAME */ > + unsigned char version : 3; > + unsigned char show_frame : 1; > + unsigned char update_mb_segmentation_data : 1; /**< Must be 0 > if segmentation is not enabled */ > + unsigned char Reserved2Bits : 2; > + }; > + unsigned char wFrameTagFlags; > + } tagflags; > + unsigned char Reserved1[4]; > + unsigned int Reserved2[3]; > +} CUVIDVP8PICPARAMS; > + > +/*! > + * \struct CUVIDVP9PICPARAMS > + * VP9 Picture Parameters > + */ > +typedef struct _CUVIDVP9PICPARAMS > +{ > + unsigned int width; > + unsigned int height; > + > + //Frame Indices > + unsigned char LastRefIdx; > + unsigned char GoldenRefIdx; > + unsigned char AltRefIdx; > + unsigned char colorSpace; > + > + unsigned short profile : 3; > + unsigned short frameContextIdx : 2; > + unsigned short frameType : 1; > + unsigned short showFrame : 1; > + unsigned short errorResilient : 1; > + unsigned short frameParallelDecoding : 1; > + unsigned short subSamplingX : 1; > + unsigned short subSamplingY : 1; > + unsigned short intraOnly : 1; > + unsigned short allow_high_precision_mv : 1; > + unsigned short refreshEntropyProbs : 1; > + unsigned short reserved2Bits : 2; > + > + unsigned short reserved16Bits; > + > + unsigned char refFrameSignBias[4]; > + > + unsigned char bitDepthMinus8Luma; > + unsigned char bitDepthMinus8Chroma; > + unsigned char loopFilterLevel; > + unsigned char loopFilterSharpness; > + > + unsigned char modeRefLfEnabled; > + unsigned char log2_tile_columns; > + unsigned char log2_tile_rows; > + > + unsigned char segmentEnabled : 1; > + unsigned char segmentMapUpdate : 1; > + unsigned char segmentMapTemporalUpdate : 1; > + unsigned char segmentFeatureMode : 1; > + unsigned char reserved4Bits : 4; > + > + > + unsigned char segmentFeatureEnable[8][4]; > + short segmentFeatureData[8][4]; > + unsigned char mb_segment_tree_probs[7]; > + unsigned char segment_pred_probs[3]; > + unsigned char reservedSegment16Bits[2]; > + > + int qpYAc; > + int qpYDc; > + int qpChDc; > + int qpChAc; > + > + unsigned int activeRefIdx[3]; > + unsigned int resetFrameContext; > + unsigned int mcomp_filter_type; > + unsigned int mbRefLfDelta[4]; > + unsigned int mbModeLfDelta[2]; > + unsigned int frameTagSize; > + unsigned int offsetToDctParts; > + unsigned int reserved128Bits[4]; > + > +} CUVIDVP9PICPARAMS; > + > + > +/*! > + * \struct CUVIDPICPARAMS > + * Picture Parameters for Decoding > + */ > +typedef struct _CUVIDPICPARAMS > +{ > + int PicWidthInMbs; /**< Coded Frame Size */ > + int FrameHeightInMbs; /**< Coded Frame Height */ > + int CurrPicIdx; /**< Output index of the current > picture */ > + int field_pic_flag; /**< 0=frame picture, 1=field > picture */ > + int bottom_field_flag; /**< 0=top field, 1=bottom field > (ignored if field_pic_flag=0) */ > + int second_field; /**< Second field of a > complementary field pair */ > + // Bitstream data > + unsigned int nBitstreamDataLen; /**< Number of bytes in bitstream > data buffer */ > + const unsigned char *pBitstreamData; /**< Ptr to bitstream data for > this picture (slice-layer) */ > + unsigned int nNumSlices; /**< Number of slices in this > picture */ > + const unsigned int *pSliceDataOffsets; /**< nNumSlices entries, contains > offset of each slice within the bitstream data buffer */ > + int ref_pic_flag; /**< This picture is a reference > picture */ > + int intra_pic_flag; /**< This picture is entirely > intra coded */ > + unsigned int Reserved[30]; /**< Reserved for future use */ > + // Codec-specific data > + union { > + CUVIDMPEG2PICPARAMS mpeg2; /**< Also used for MPEG-1 */ > + CUVIDH264PICPARAMS h264; > + CUVIDVC1PICPARAMS vc1; > + CUVIDMPEG4PICPARAMS mpeg4; > + CUVIDJPEGPICPARAMS jpeg; > + CUVIDHEVCPICPARAMS hevc; > + CUVIDVP8PICPARAMS vp8; > + CUVIDVP9PICPARAMS vp9; > + unsigned int CodecReserved[1024]; > + } CodecSpecific; > +} CUVIDPICPARAMS; > + > + > +/*! > + * \struct CUVIDPROCPARAMS > + * Picture Parameters for Postprocessing > + */ > +typedef struct _CUVIDPROCPARAMS > +{ > + int progressive_frame; /**< Input is progressive (deinterlace_mode will > be ignored) */ > + int second_field; /**< Output the second field (ignored if > deinterlace mode is Weave) */ > + int top_field_first; /**< Input frame is top field first (1st field > is top, 2nd field is bottom) */ > + int unpaired_field; /**< Input only contains one field (2nd field is > invalid) */ > + // The fields below are used for raw YUV input > + unsigned int reserved_flags; /**< Reserved for future use (set to > zero) */ > + unsigned int reserved_zero; /**< Reserved (set to zero) */ > + unsigned long long raw_input_dptr; /**< Input CUdeviceptr for raw YUV > extensions */ > + unsigned int raw_input_pitch; /**< pitch in bytes of raw YUV input > (should be aligned appropriately) */ > + unsigned int raw_input_format; /**< Reserved for future use (set to > zero) */ > + unsigned long long raw_output_dptr; /**< Reserved for future use (set to > zero) */ > + unsigned int raw_output_pitch; /**< Reserved for future use (set to > zero) */ > + unsigned int Reserved[48]; > + void *Reserved3[3]; > +} CUVIDPROCPARAMS; > + > + > +/** > + * > + * In order to minimize decode latencies, there should be always at least 2 > pictures in the decode > + * queue at any time, in order to make sure that all decode engines are > always busy. > + * > + * Overall data flow: > + * - cuvidCreateDecoder(...) > + * For each picture: > + * - cuvidDecodePicture(N) > + * - cuvidMapVideoFrame(N-4) > + * - do some processing in cuda > + * - cuvidUnmapVideoFrame(N-4) > + * - cuvidDecodePicture(N+1) > + * - cuvidMapVideoFrame(N-3) > + * ... > + * - cuvidDestroyDecoder(...) > + * > + * NOTE: > + * - When the cuda context is created from a D3D device, the D3D device must > also be created > + * with the D3DCREATE_MULTITHREADED flag. > + * - There is a limit to how many pictures can be mapped simultaneously > (ulNumOutputSurfaces) > + * - cuVidDecodePicture may block the calling thread if there are too many > pictures pending > + * in the decode queue > + */ > + > +/** > + * \fn CUresult CUDAAPI cuvidCreateDecoder(CUvideodecoder *phDecoder, > CUVIDDECODECREATEINFO *pdci) > + * Create the decoder object > + */ > +typedef CUresult CUDAAPI tcuvidCreateDecoder(CUvideodecoder *phDecoder, > CUVIDDECODECREATEINFO *pdci); > + > +/** > + * \fn CUresult CUDAAPI cuvidDestroyDecoder(CUvideodecoder hDecoder) > + * Destroy the decoder object > + */ > +typedef CUresult CUDAAPI tcuvidDestroyDecoder(CUvideodecoder hDecoder); > + > +/** > + * \fn CUresult CUDAAPI cuvidDecodePicture(CUvideodecoder hDecoder, > CUVIDPICPARAMS *pPicParams) > + * Decode a single picture (field or frame) > + */ > +typedef CUresult CUDAAPI tcuvidDecodePicture(CUvideodecoder hDecoder, > CUVIDPICPARAMS *pPicParams); > + > + > +#if !defined(__CUVID_DEVPTR64) || defined(__CUVID_INTERNAL) > +/** > + * \fn CUresult CUDAAPI cuvidMapVideoFrame(CUvideodecoder hDecoder, int > nPicIdx, unsigned int *pDevPtr, unsigned int *pPitch, CUVIDPROCPARAMS *pVPP); > + * Post-process and map a video frame for use in cuda > + */ > +typedef CUresult CUDAAPI tcuvidMapVideoFrame(CUvideodecoder hDecoder, int > nPicIdx, > + unsigned int *pDevPtr, unsigned > int *pPitch, > + CUVIDPROCPARAMS *pVPP); > + > +/** > + * \fn CUresult CUDAAPI cuvidUnmapVideoFrame(CUvideodecoder hDecoder, > unsigned int DevPtr) > + * Unmap a previously mapped video frame > + */ > +typedef CUresult CUDAAPI tcuvidUnmapVideoFrame(CUvideodecoder hDecoder, > unsigned int DevPtr); > +#endif > + > +#if defined(WIN64) || defined(_WIN64) || defined(__x86_64) || defined(AMD64) > || defined(_M_AMD64) > +/** > + * \fn CUresult CUDAAPI cuvidMapVideoFrame64(CUvideodecoder hDecoder, int > nPicIdx, unsigned long long *pDevPtr, unsigned int *pPitch, CUVIDPROCPARAMS > *pVPP); > + * map a video frame > + */ > +typedef CUresult CUDAAPI tcuvidMapVideoFrame64(CUvideodecoder hDecoder, int > nPicIdx, unsigned long long *pDevPtr, > + unsigned int *pPitch, > CUVIDPROCPARAMS *pVPP); > + > +/** > + * \fn CUresult CUDAAPI cuvidUnmapVideoFrame64(CUvideodecoder hDecoder, > unsigned long long DevPtr); > + * Unmap a previously mapped video frame > + */ > +typedef CUresult CUDAAPI tcuvidUnmapVideoFrame64(CUvideodecoder hDecoder, > unsigned long long DevPtr); > + > +#if defined(__CUVID_DEVPTR64) && !defined(__CUVID_INTERNAL) > +#define tcuvidMapVideoFrame tcuvidMapVideoFrame64 > +#define tcuvidUnmapVideoFrame tcuvidUnmapVideoFrame64 > +#endif > +#endif > + > + > +/** > + * > + * Context-locking: to facilitate multi-threaded implementations, the > following 4 functions > + * provide a simple mutex-style host synchronization. If a non-NULL context > is specified > + * in CUVIDDECODECREATEINFO, the codec library will acquire the mutex > associated with the given > + * context before making any cuda calls. > + * A multi-threaded application could create a lock associated with a > context handle so that > + * multiple threads can safely share the same cuda context: > + * - use cuCtxPopCurrent immediately after context creation in order to > create a 'floating' context > + * that can be passed to cuvidCtxLockCreate. > + * - When using a floating context, all cuda calls should only be made > within a cuvidCtxLock/cuvidCtxUnlock section. > + * > + * NOTE: This is a safer alternative to cuCtxPushCurrent and > cuCtxPopCurrent, and is not related to video > + * decoder in any way (implemented as a critical section associated with > cuCtx{Push|Pop}Current calls). > +*/ > + > +/** > + * \fn CUresult CUDAAPI cuvidCtxLockCreate(CUvideoctxlock *pLock, CUcontext > ctx) > + */ > +typedef CUresult CUDAAPI tcuvidCtxLockCreate(CUvideoctxlock *pLock, > CUcontext ctx); > + > +/** > + * \fn CUresult CUDAAPI cuvidCtxLockDestroy(CUvideoctxlock lck) > + */ > +typedef CUresult CUDAAPI tcuvidCtxLockDestroy(CUvideoctxlock lck); > + > +/** > + * \fn CUresult CUDAAPI cuvidCtxLock(CUvideoctxlock lck, unsigned int > reserved_flags) > + */ > +typedef CUresult CUDAAPI tcuvidCtxLock(CUvideoctxlock lck, unsigned int > reserved_flags); > + > +/** > + * \fn CUresult CUDAAPI cuvidCtxUnlock(CUvideoctxlock lck, unsigned int > reserved_flags) > + */ > +typedef CUresult CUDAAPI tcuvidCtxUnlock(CUvideoctxlock lck, unsigned int > reserved_flags); > + > +/** @} */ /* End VIDEO_DECODER */ > + > +#if defined(__cplusplus) > +} > +#endif /* __cplusplus */ > + > +#endif // __CUDA_VIDEO_H__ > diff --git a/compat/cuda/dynlink_loader.h b/compat/cuda/dynlink_loader.h > new file mode 100644 > index 0000000..6275664 > --- /dev/null > +++ b/compat/cuda/dynlink_loader.h > @@ -0,0 +1,254 @@ > +/* > + * This file is part of FFmpeg. > + * > + * FFmpeg is free software; you can redistribute it and/or > + * modify it under the terms of the GNU Lesser General Public > + * License as published by the Free Software Foundation; either > + * version 2.1 of the License, or (at your option) any later version. > + * > + * FFmpeg is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + * Lesser General Public License for more details. > + * > + * You should have received a copy of the GNU Lesser General Public > + * License along with FFmpeg; if not, write to the Free Software > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 > USA > + */ > + > +#ifndef AV_COMPAT_CUDA_DYNLINK_LOADER_H > +#define AV_COMPAT_CUDA_DYNLINK_LOADER_H > + > +#include "compat/cuda/dynlink_cuda.h" > +#include "compat/cuda/dynlink_nvcuvid.h" > +#include "compat/nvenc/nvEncodeAPI.h" > + > +#include "libavutil/log.h" > +#include "libavutil/error.h" > + > +#if defined(_WIN32) > +# include <windows.h> > +# define dlopen(filename, flags) LoadLibrary(TEXT(filename)) > +# define dlsym(handle, symbol) GetProcAddress(handle, symbol) > +# define dlclose(handle) FreeLibrary(handle) > +# define LIB_HANDLE HMODULE > +#else > +# include <dlfcn.h> > +# define LIB_HANDLE void* > +#endif > + > +#if defined(_WIN32) || defined(__CYGWIN__) > +# define CUDA_LIBNAME "nvcuda.dll" > +# define NVCUVID_LIBNAME "nvcuvid.dll" > +# if ARCH_X86_64 > +# define NVENC_LIBNAME "nvEncodeAPI64.dll" > +# else > +# define NVENC_LIBNAME "nvEncodeAPI.dll" > +# endif > +#else > +# define CUDA_LIBNAME "libcuda.so.1" > +# define NVCUVID_LIBNAME "libnvcuvid.so.1" > +# define NVENC_LIBNAME "libnvidia-encode.so.1" > +#endif > + > +#define LOAD_LIBRARY(l, path) \ > + do { \ > + if (!((l) = dlopen(path, RTLD_LAZY))) { \ > + av_log(NULL, AV_LOG_ERROR, "Cannot load %s\n", path); \ > + ret = AVERROR_UNKNOWN; \ > + goto error; \ > + } \ > + av_log(NULL, AV_LOG_TRACE, "Loaded lib: %s\n", path); \ > + } while (0) > + > +#define LOAD_SYMBOL(fun, symbol) \ > + do { \ > + if (!((f->fun) = dlsym(f->lib, symbol))) { \ > + av_log(NULL, AV_LOG_ERROR, "Cannot load %s\n", symbol); \ > + ret = AVERROR_UNKNOWN; \ > + goto error; \ > + } \ > + av_log(NULL, AV_LOG_TRACE, "Loaded sym: %s\n", symbol); \ > + } while (0) > + > +#define GENERIC_LOAD_FUNC_PREAMBLE(T, n, N) \ > + T *f; \ > + int ret; \ > + \ > + n##_free_functions(functions); \ > + \ > + f = *functions = av_mallocz(sizeof(*f)); \ > + if (!f) \ > + return AVERROR(ENOMEM); \ > + \ > + LOAD_LIBRARY(f->lib, N); > + > +#define GENERIC_LOAD_FUNC_FINALE(n) \ > + return 0; \ > +error: \ > + n##_free_functions(functions); \ > + return ret; > + > +#define GENERIC_FREE_FUNC() \ > + if (!functions) \ > + return; \ > + if (*functions && (*functions)->lib) \ > + dlclose((*functions)->lib); \ > + av_freep(functions); > + > +#ifdef AV_COMPAT_DYNLINK_CUDA_H > +typedef struct CudaFunctions { > + tcuInit *cuInit; > + tcuDeviceGetCount *cuDeviceGetCount; > + tcuDeviceGet *cuDeviceGet; > + tcuDeviceGetName *cuDeviceGetName; > + tcuDeviceComputeCapability *cuDeviceComputeCapability; > + tcuCtxCreate_v2 *cuCtxCreate; > + tcuCtxPushCurrent_v2 *cuCtxPushCurrent; > + tcuCtxPopCurrent_v2 *cuCtxPopCurrent; > + tcuCtxDestroy_v2 *cuCtxDestroy; > + tcuMemAlloc_v2 *cuMemAlloc; > + tcuMemFree_v2 *cuMemFree; > + tcuMemcpy2D_v2 *cuMemcpy2D; > + tcuGetErrorName *cuGetErrorName; > + tcuGetErrorString *cuGetErrorString; > + > + LIB_HANDLE lib; > +} CudaFunctions; > +#else > +typedef struct CudaFunctions CudaFunctions; > +#endif > + > +typedef struct CuvidFunctions { > + tcuvidCreateDecoder *cuvidCreateDecoder; > + tcuvidDestroyDecoder *cuvidDestroyDecoder; > + tcuvidDecodePicture *cuvidDecodePicture; > + tcuvidMapVideoFrame *cuvidMapVideoFrame; > + tcuvidUnmapVideoFrame *cuvidUnmapVideoFrame; > + tcuvidCtxLockCreate *cuvidCtxLockCreate; > + tcuvidCtxLockDestroy *cuvidCtxLockDestroy; > + tcuvidCtxLock *cuvidCtxLock; > + tcuvidCtxUnlock *cuvidCtxUnlock; > + > + tcuvidCreateVideoSource *cuvidCreateVideoSource; > + tcuvidCreateVideoSourceW *cuvidCreateVideoSourceW; > + tcuvidDestroyVideoSource *cuvidDestroyVideoSource; > + tcuvidSetVideoSourceState *cuvidSetVideoSourceState; > + tcuvidGetVideoSourceState *cuvidGetVideoSourceState; > + tcuvidGetSourceVideoFormat *cuvidGetSourceVideoFormat; > + tcuvidGetSourceAudioFormat *cuvidGetSourceAudioFormat; > + tcuvidCreateVideoParser *cuvidCreateVideoParser; > + tcuvidParseVideoData *cuvidParseVideoData; > + tcuvidDestroyVideoParser *cuvidDestroyVideoParser; > + > + LIB_HANDLE lib; > +} CuvidFunctions; > + > +typedef struct NvencFunctions { > + NVENCSTATUS (NVENCAPI > *NvEncodeAPICreateInstance)(NV_ENCODE_API_FUNCTION_LIST *functionList); > + NVENCSTATUS (NVENCAPI *NvEncodeAPIGetMaxSupportedVersion)(uint32_t* > version); > + > + LIB_HANDLE lib; > +} NvencFunctions; > + > +#ifdef AV_COMPAT_DYNLINK_CUDA_H > +static inline void cuda_free_functions(CudaFunctions **functions) > +{ > + GENERIC_FREE_FUNC(); > +} > +#endif > + > +static inline void cuvid_free_functions(CuvidFunctions **functions) > +{ > + GENERIC_FREE_FUNC(); > +} > + > +static inline void nvenc_free_functions(NvencFunctions **functions) > +{ > + GENERIC_FREE_FUNC(); > +} > + > +#ifdef AV_COMPAT_DYNLINK_CUDA_H > +static inline int cuda_load_functions(CudaFunctions **functions) > +{ > + GENERIC_LOAD_FUNC_PREAMBLE(CudaFunctions, cuda, CUDA_LIBNAME); > + > + LOAD_SYMBOL(cuInit, "cuInit"); > + LOAD_SYMBOL(cuDeviceGetCount, "cuDeviceGetCount"); > + LOAD_SYMBOL(cuDeviceGet, "cuDeviceGet"); > + LOAD_SYMBOL(cuDeviceGetName, "cuDeviceGetName"); > + LOAD_SYMBOL(cuDeviceComputeCapability, "cuDeviceComputeCapability"); > + LOAD_SYMBOL(cuCtxCreate, "cuCtxCreate_v2"); > + LOAD_SYMBOL(cuCtxPushCurrent, "cuCtxPushCurrent_v2"); > + LOAD_SYMBOL(cuCtxPopCurrent, "cuCtxPopCurrent_v2"); > + LOAD_SYMBOL(cuCtxDestroy, "cuCtxDestroy_v2"); > + LOAD_SYMBOL(cuMemAlloc, "cuMemAlloc_v2"); > + LOAD_SYMBOL(cuMemFree, "cuMemFree_v2"); > + LOAD_SYMBOL(cuMemcpy2D, "cuMemcpy2D_v2"); > + LOAD_SYMBOL(cuGetErrorName, "cuGetErrorName"); > + LOAD_SYMBOL(cuGetErrorString, "cuGetErrorString"); > + > + GENERIC_LOAD_FUNC_FINALE(cuda); > +} > +#endif > + > +static inline int cuvid_load_functions(CuvidFunctions **functions) > +{ > + GENERIC_LOAD_FUNC_PREAMBLE(CuvidFunctions, cuvid, NVCUVID_LIBNAME); > + > + LOAD_SYMBOL(cuvidCreateDecoder, "cuvidCreateDecoder"); > + LOAD_SYMBOL(cuvidDestroyDecoder, "cuvidDestroyDecoder"); > + LOAD_SYMBOL(cuvidDecodePicture, "cuvidDecodePicture"); > +#ifdef __CUVID_DEVPTR64 > + LOAD_SYMBOL(cuvidMapVideoFrame, "cuvidMapVideoFrame64"); > + LOAD_SYMBOL(cuvidUnmapVideoFrame, "cuvidUnmapVideoFrame64"); > +#else > + LOAD_SYMBOL(cuvidMapVideoFrame, "cuvidMapVideoFrame"); > + LOAD_SYMBOL(cuvidUnmapVideoFrame, "cuvidUnmapVideoFrame"); > +#endif > + LOAD_SYMBOL(cuvidCtxLockCreate, "cuvidCtxLockCreate"); > + LOAD_SYMBOL(cuvidCtxLockDestroy, "cuvidCtxLockDestroy"); > + LOAD_SYMBOL(cuvidCtxLock, "cuvidCtxLock"); > + LOAD_SYMBOL(cuvidCtxUnlock, "cuvidCtxUnlock"); > + > + LOAD_SYMBOL(cuvidCreateVideoSource, "cuvidCreateVideoSource"); > + LOAD_SYMBOL(cuvidCreateVideoSourceW, "cuvidCreateVideoSourceW"); > + LOAD_SYMBOL(cuvidDestroyVideoSource, "cuvidDestroyVideoSource"); > + LOAD_SYMBOL(cuvidSetVideoSourceState, "cuvidSetVideoSourceState"); > + LOAD_SYMBOL(cuvidGetVideoSourceState, "cuvidGetVideoSourceState"); > + LOAD_SYMBOL(cuvidGetSourceVideoFormat, "cuvidGetSourceVideoFormat"); > + LOAD_SYMBOL(cuvidGetSourceAudioFormat, "cuvidGetSourceAudioFormat"); > + LOAD_SYMBOL(cuvidCreateVideoParser, "cuvidCreateVideoParser"); > + LOAD_SYMBOL(cuvidParseVideoData, "cuvidParseVideoData"); > + LOAD_SYMBOL(cuvidDestroyVideoParser, "cuvidDestroyVideoParser"); > + > + GENERIC_LOAD_FUNC_FINALE(cuvid); > +} > + > +static inline int nvenc_load_functions(NvencFunctions **functions) > +{ > + GENERIC_LOAD_FUNC_PREAMBLE(NvencFunctions, nvenc, NVENC_LIBNAME); > + > + LOAD_SYMBOL(NvEncodeAPICreateInstance, "NvEncodeAPICreateInstance"); > + LOAD_SYMBOL(NvEncodeAPIGetMaxSupportedVersion, > "NvEncodeAPIGetMaxSupportedVersion"); > + > + GENERIC_LOAD_FUNC_FINALE(nvenc); > +} > + > +#undef GENERIC_LOAD_FUNC_PREAMBLE > +#undef LOAD_LIBRARY > +#undef LOAD_SYMBOL > +#undef GENERIC_LOAD_FUNC_FINALE > +#undef GENERIC_FREE_FUNC > +#undef CUDA_LIBNAME > +#undef NVCUVID_LIBNAME > +#undef NVENC_LIBNAME > +#undef LIB_HANDLE > + > +#if defined(_WIN32) > +#undef dlopen > +#undef dlsym > +#undef dlclose > +#endif > + > +#endif > \ No newline at end of file > diff --git a/compat/cuda/dynlink_nvcuvid.h b/compat/cuda/dynlink_nvcuvid.h > new file mode 100644 > index 0000000..6c197e0 > --- /dev/null > +++ b/compat/cuda/dynlink_nvcuvid.h > @@ -0,0 +1,316 @@ > +/* > + * This copyright notice applies to this header file only: > + * > + * Copyright (c) 2010-2016 NVIDIA Corporation > + * > + * Permission is hereby granted, free of charge, to any person > + * obtaining a copy of this software and associated documentation > + * files (the "Software"), to deal in the Software without > + * restriction, including without limitation the rights to use, > + * copy, modify, merge, publish, distribute, sublicense, and/or sell > + * copies of the software, and to permit persons to whom the > + * software is furnished to do so, subject to the following > + * conditions: > + * > + * The above copyright notice and this permission notice shall be > + * included in all copies or substantial portions of the Software. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, > + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES > + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND > + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT > + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, > + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR > + * OTHER DEALINGS IN THE SOFTWARE. > + */ > + > +/** > + * \file nvcuvid.h > + * NvCuvid API provides Video Decoding interface to NVIDIA GPU devices. > + * \date 2015-2015 > + * This file contains the interface constants, structure definitions and > function prototypes. > + */ > + > +#if !defined(__NVCUVID_H__) > +#define __NVCUVID_H__ > + > +#include "compat/cuda/dynlink_cuviddec.h" > + > +#if defined(__cplusplus) > +extern "C" { > +#endif /* __cplusplus */ > + > +//////////////////////////////////////////////////////////////////////////////////////////////// > +// > +// High-level helper APIs for video sources > +// > + > +typedef void *CUvideosource; > +typedef void *CUvideoparser; > +typedef long long CUvideotimestamp; > + > +/** > + * \addtogroup VIDEO_PARSER Video Parser > + * @{ > + */ > + > +/*! > + * \enum cudaVideoState > + * Video Source State > + */ > +typedef enum { > + cudaVideoState_Error = -1, /**< Error state (invalid source) */ > + cudaVideoState_Stopped = 0, /**< Source is stopped (or reached > end-of-stream) */ > + cudaVideoState_Started = 1 /**< Source is running and delivering > data */ > +} cudaVideoState; > + > +/*! > + * \enum cudaAudioCodec > + * Audio compression > + */ > +typedef enum { > + cudaAudioCodec_MPEG1=0, /**< MPEG-1 Audio */ > + cudaAudioCodec_MPEG2, /**< MPEG-2 Audio */ > + cudaAudioCodec_MP3, /**< MPEG-1 Layer III Audio */ > + cudaAudioCodec_AC3, /**< Dolby Digital (AC3) Audio */ > + cudaAudioCodec_LPCM /**< PCM Audio */ > +} cudaAudioCodec; > + > +/*! > + * \struct CUVIDEOFORMAT > + * Video format > + */ > +typedef struct > +{ > + cudaVideoCodec codec; /**< Compression format */ > + /** > + * frame rate = numerator / denominator (for example: 30000/1001) > + */ > + struct { > + unsigned int numerator; /**< frame rate numerator (0 = > unspecified or variable frame rate) */ > + unsigned int denominator; /**< frame rate denominator (0 = > unspecified or variable frame rate) */ > + } frame_rate; > + unsigned char progressive_sequence; /**< 0=interlaced, 1=progressive > */ > + unsigned char bit_depth_luma_minus8; /**< high bit depth Luma */ > + unsigned char bit_depth_chroma_minus8; /**< high bit depth Chroma */ > + unsigned char reserved1; /**< Reserved for future use */ > + unsigned int coded_width; /**< coded frame width */ > + unsigned int coded_height; /**< coded frame height */ > + /** > + * area of the frame that should be displayed > + * typical example: > + * coded_width = 1920, coded_height = 1088 > + * display_area = { 0,0,1920,1080 } > + */ > + struct { > + int left; /**< left position of display > rect */ > + int top; /**< top position of display > rect */ > + int right; /**< right position of display > rect */ > + int bottom; /**< bottom position of display > rect */ > + } display_area; > + cudaVideoChromaFormat chroma_format; /**< Chroma format */ > + unsigned int bitrate; /**< video bitrate (bps, > 0=unknown) */ > + /** > + * Display Aspect Ratio = x:y (4:3, 16:9, etc) > + */ > + struct { > + int x; > + int y; > + } display_aspect_ratio; > + /** > + * Video Signal Description > + */ > + struct { > + unsigned char video_format : 3; > + unsigned char video_full_range_flag : 1; > + unsigned char reserved_zero_bits : 4; > + unsigned char color_primaries; > + unsigned char transfer_characteristics; > + unsigned char matrix_coefficients; > + } video_signal_description; > + unsigned int seqhdr_data_length; /**< Additional bytes > following (CUVIDEOFORMATEX) */ > +} CUVIDEOFORMAT; > + > +/*! > + * \struct CUVIDEOFORMATEX > + * Video format including raw sequence header information > + */ > +typedef struct > +{ > + CUVIDEOFORMAT format; > + unsigned char raw_seqhdr_data[1024]; > +} CUVIDEOFORMATEX; > + > +/*! > + * \struct CUAUDIOFORMAT > + * Audio Formats > + */ > +typedef struct > +{ > + cudaAudioCodec codec; /**< Compression format */ > + unsigned int channels; /**< number of audio channels */ > + unsigned int samplespersec; /**< sampling frequency */ > + unsigned int bitrate; /**< For uncompressed, can also be used to > determine bits per sample */ > + unsigned int reserved1; /**< Reserved for future use */ > + unsigned int reserved2; /**< Reserved for future use */ > +} CUAUDIOFORMAT; > + > + > +/*! > + * \enum CUvideopacketflags > + * Data packet flags > + */ > +typedef enum { > + CUVID_PKT_ENDOFSTREAM = 0x01, /**< Set when this is the last packet > for this stream */ > + CUVID_PKT_TIMESTAMP = 0x02, /**< Timestamp is valid */ > + CUVID_PKT_DISCONTINUITY = 0x04 /**< Set when a discontinuity has to > be signalled */ > +} CUvideopacketflags; > + > +/*! > + * \struct CUVIDSOURCEDATAPACKET > + * Data Packet > + */ > +typedef struct _CUVIDSOURCEDATAPACKET > +{ > + unsigned long flags; /**< Combination of CUVID_PKT_XXX flags > */ > + unsigned long payload_size; /**< number of bytes in the payload (may > be zero if EOS flag is set) */ > + const unsigned char *payload; /**< Pointer to packet payload data (may > be NULL if EOS flag is set) */ > + CUvideotimestamp timestamp; /**< Presentation timestamp (10MHz > clock), only valid if CUVID_PKT_TIMESTAMP flag is set */ > +} CUVIDSOURCEDATAPACKET; > + > +// Callback for packet delivery > +typedef int (CUDAAPI *PFNVIDSOURCECALLBACK)(void *, CUVIDSOURCEDATAPACKET *); > + > +/*! > + * \struct CUVIDSOURCEPARAMS > + * Source Params > + */ > +typedef struct _CUVIDSOURCEPARAMS > +{ > + unsigned int ulClockRate; /**< Timestamp units in Hz > (0=default=10000000Hz) */ > + unsigned int uReserved1[7]; /**< Reserved for future use > - set to zero */ > + void *pUserData; /**< Parameter passed in to > the data handlers */ > + PFNVIDSOURCECALLBACK pfnVideoDataHandler; /**< Called to deliver audio > packets */ > + PFNVIDSOURCECALLBACK pfnAudioDataHandler; /**< Called to deliver video > packets */ > + void *pvReserved2[8]; /**< Reserved for future use > - set to NULL */ > +} CUVIDSOURCEPARAMS; > + > +/*! > + * \enum CUvideosourceformat_flags > + * CUvideosourceformat_flags > + */ > +typedef enum { > + CUVID_FMT_EXTFORMATINFO = 0x100 /**< Return extended format > structure (CUVIDEOFORMATEX) */ > +} CUvideosourceformat_flags; > + > +#if !defined(__APPLE__) > +/** > + * \fn CUresult CUDAAPI cuvidCreateVideoSource(CUvideosource *pObj, const > char *pszFileName, CUVIDSOURCEPARAMS *pParams) > + * Create Video Source > + */ > +typedef CUresult CUDAAPI tcuvidCreateVideoSource(CUvideosource *pObj, const > char *pszFileName, CUVIDSOURCEPARAMS *pParams); > + > +/** > + * \fn CUresult CUDAAPI cuvidCreateVideoSourceW(CUvideosource *pObj, const > wchar_t *pwszFileName, CUVIDSOURCEPARAMS *pParams) > + * Create Video Source > + */ > +typedef CUresult CUDAAPI tcuvidCreateVideoSourceW(CUvideosource *pObj, const > wchar_t *pwszFileName, CUVIDSOURCEPARAMS *pParams); > + > +/** > + * \fn CUresult CUDAAPI cuvidDestroyVideoSource(CUvideosource obj) > + * Destroy Video Source > + */ > +typedef CUresult CUDAAPI tcuvidDestroyVideoSource(CUvideosource obj); > + > +/** > + * \fn CUresult CUDAAPI cuvidSetVideoSourceState(CUvideosource obj, > cudaVideoState state) > + * Set Video Source state > + */ > +typedef CUresult CUDAAPI tcuvidSetVideoSourceState(CUvideosource obj, > cudaVideoState state); > + > +/** > + * \fn cudaVideoState CUDAAPI cuvidGetVideoSourceState(CUvideosource obj) > + * Get Video Source state > + */ > +typedef cudaVideoState CUDAAPI tcuvidGetVideoSourceState(CUvideosource obj); > + > +/** > + * \fn CUresult CUDAAPI cuvidGetSourceVideoFormat(CUvideosource obj, > CUVIDEOFORMAT *pvidfmt, unsigned int flags) > + * Get Video Source Format > + */ > +typedef CUresult CUDAAPI tcuvidGetSourceVideoFormat(CUvideosource obj, > CUVIDEOFORMAT *pvidfmt, unsigned int flags); > + > +/** > + * \fn CUresult CUDAAPI cuvidGetSourceAudioFormat(CUvideosource obj, > CUAUDIOFORMAT *paudfmt, unsigned int flags) > + * Set Video Source state > + */ > +typedef CUresult CUDAAPI tcuvidGetSourceAudioFormat(CUvideosource obj, > CUAUDIOFORMAT *paudfmt, unsigned int flags); > + > +#endif > + > +/** > + * \struct CUVIDPARSERDISPINFO > + */ > +typedef struct _CUVIDPARSERDISPINFO > +{ > + int picture_index; /**< */ > + int progressive_frame; /**< */ > + int top_field_first; /**< */ > + int repeat_first_field; /**< Number of additional fields (1=ivtc, > 2=frame doubling, 4=frame tripling, -1=unpaired field) */ > + CUvideotimestamp timestamp; /**< */ > +} CUVIDPARSERDISPINFO; > + > +// > +// Parser callbacks > +// The parser will call these synchronously from within > cuvidParseVideoData(), whenever a picture is ready to > +// be decoded and/or displayed. > +// > +typedef int (CUDAAPI *PFNVIDSEQUENCECALLBACK)(void *, CUVIDEOFORMAT *); > +typedef int (CUDAAPI *PFNVIDDECODECALLBACK)(void *, CUVIDPICPARAMS *); > +typedef int (CUDAAPI *PFNVIDDISPLAYCALLBACK)(void *, CUVIDPARSERDISPINFO *); > + > +/** > + * \struct CUVIDPARSERPARAMS > + */ > +typedef struct _CUVIDPARSERPARAMS > +{ > + cudaVideoCodec CodecType; /**< cudaVideoCodec_XXX */ > + unsigned int ulMaxNumDecodeSurfaces; /**< Max # of decode surfaces > (parser will cycle through these) */ > + unsigned int ulClockRate; /**< Timestamp units in Hz > (0=default=10000000Hz) */ > + unsigned int ulErrorThreshold; /**< % Error threshold (0-100) > for calling pfnDecodePicture (100=always call pfnDecodePicture even if > picture bitstream is fully corrupted) */ > + unsigned int ulMaxDisplayDelay; /**< Max display queue delay > (improves pipelining of decode with display) - 0=no delay (recommended > values: 2..4) */ > + unsigned int uReserved1[5]; /**< Reserved for future use - > set to 0 */ > + void *pUserData; /**< User data for callbacks */ > + PFNVIDSEQUENCECALLBACK pfnSequenceCallback; /**< Called before decoding > frames and/or whenever there is a format change */ > + PFNVIDDECODECALLBACK pfnDecodePicture; /**< Called when a picture > is ready to be decoded (decode order) */ > + PFNVIDDISPLAYCALLBACK pfnDisplayPicture; /**< Called whenever a > picture is ready to be displayed (display order) */ > + void *pvReserved2[7]; /**< Reserved for future use > - set to NULL */ > + CUVIDEOFORMATEX *pExtVideoInfo; /**< [Optional] sequence > header data from system layer */ > +} CUVIDPARSERPARAMS; > + > +/** > + * \fn CUresult CUDAAPI cuvidCreateVideoParser(CUvideoparser *pObj, > CUVIDPARSERPARAMS *pParams) > + */ > +typedef CUresult CUDAAPI tcuvidCreateVideoParser(CUvideoparser *pObj, > CUVIDPARSERPARAMS *pParams); > + > +/** > + * \fn CUresult CUDAAPI cuvidParseVideoData(CUvideoparser obj, > CUVIDSOURCEDATAPACKET *pPacket) > + */ > +typedef CUresult CUDAAPI tcuvidParseVideoData(CUvideoparser obj, > CUVIDSOURCEDATAPACKET *pPacket); > + > +/** > + * \fn CUresult CUDAAPI cuvidDestroyVideoParser(CUvideoparser obj) > + */ > +typedef CUresult CUDAAPI tcuvidDestroyVideoParser(CUvideoparser obj); > + > +/** @} */ /* END VIDEO_PARSER */ > +//////////////////////////////////////////////////////////////////////////////////////////////// > + > +#if defined(__cplusplus) > +} > +#endif /* __cplusplus */ > + > +#endif // __NVCUVID_H__ > + > + > -- > 2.10.1 > > _______________________________________________ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > http://ffmpeg.org/mailman/listinfo/ffmpeg-devel _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel