> -----Original Message----- > From: Fu, Linjie > Sent: Tuesday, March 26, 2019 13:38 > To: ffmpeg-devel@ffmpeg.org > Cc: Fu, Linjie <linjie...@intel.com>; ChaoX A Liu <chaox.a....@intel.com> > Subject: [PATCH 1/3] lavc/qsvdec: add support for gpu_copy > > Add support for GPU copy when QSV decoders works in system memory > mode. > However, memory must be sequent and aligned with 128x64 to enable this > feature.(first introduced in FFmpeg 3.3.1) > > GPUCopy = MFX_GPUCOPY_ON leads to performance improvement up to > x10. > > CMD: > ffmpeg -init_hw_device qsv=hw -filter_hw_device hw -c:v h264_qsv > -gpu_copy on -i input.h264 -pix_fmt yuv420p out.yuv > > > Signed-off-by: Linjie Fu <linjie...@intel.com> > Signed-off-by: ChaoX A Liu <chaox.a....@intel.com> > --- > libavcodec/qsv.c | 27 +++++++++++++------- > libavcodec/qsv_internal.h | 6 ++--- > libavcodec/qsvdec.c | 53 ++++++++++++++++++++++++++++++++++---- > - > libavcodec/qsvdec.h | 2 ++ > libavcodec/qsvdec_h2645.c | 10 ++++++++ > libavcodec/qsvdec_other.c | 5 ++++ > libavcodec/qsvenc.c | 7 +++--- > 7 files changed, 89 insertions(+), 21 deletions(-) > > diff --git a/libavcodec/qsv.c b/libavcodec/qsv.c > index bb0d79588c..40e6c677cb 100644 > --- a/libavcodec/qsv.c > +++ b/libavcodec/qsv.c > @@ -277,15 +277,19 @@ load_plugin_fail: > } > > int ff_qsv_init_internal_session(AVCodecContext *avctx, mfxSession > *session, > - const char *load_plugins) > + const char *load_plugins, int gpu_copy) > { > - mfxIMPL impl = MFX_IMPL_AUTO_ANY; > - mfxVersion ver = { { QSV_VERSION_MINOR, QSV_VERSION_MAJOR } }; > + mfxIMPL impl = MFX_IMPL_AUTO_ANY; > + mfxVersion ver = { { QSV_VERSION_MINOR, > QSV_VERSION_MAJOR } }; > + mfxInitParam init_par = { MFX_IMPL_AUTO_ANY }; > > const char *desc; > int ret; > > - ret = MFXInit(impl, &ver, session); > + init_par.GPUCopy = gpu_copy; > + init_par.Implementation = impl; > + init_par.Version = ver; > + ret = MFXInitEx(init_par, session); > if (ret < 0) > return ff_qsv_print_error(avctx, ret, > "Error initializing an internal MFX > session"); > @@ -571,7 +575,8 @@ static mfxStatus qsv_frame_get_hdl(mfxHDL pthis, > mfxMemId mid, mfxHDL *hdl) > } > > int ff_qsv_init_session_device(AVCodecContext *avctx, mfxSession > *psession, > - AVBufferRef *device_ref, const char > *load_plugins) > + AVBufferRef *device_ref, const char > *load_plugins, > + int gpu_copy) > { > static const mfxHandleType handle_types[] = { > MFX_HANDLE_VA_DISPLAY, > @@ -581,11 +586,12 @@ int ff_qsv_init_session_device(AVCodecContext > *avctx, mfxSession *psession, > AVHWDeviceContext *device_ctx = (AVHWDeviceContext*)device_ref- > >data; > AVQSVDeviceContext *device_hwctx = device_ctx->hwctx; > mfxSession parent_session = device_hwctx->session; > + mfxInitParam init_par = { MFX_IMPL_AUTO_ANY }; > + mfxHDL handle = NULL; > > mfxSession session; > mfxVersion ver; > mfxIMPL impl; > - mfxHDL handle = NULL; > mfxHandleType handle_type; > mfxStatus err; > > @@ -611,7 +617,10 @@ int ff_qsv_init_session_device(AVCodecContext > *avctx, mfxSession *psession, > "from the session\n"); > } > > - err = MFXInit(impl, &ver, &session); > + init_par.GPUCopy = gpu_copy; > + init_par.Implementation = impl; > + init_par.Version = ver; > + err = MFXInitEx(init_par, &session); > if (err != MFX_ERR_NONE) > return ff_qsv_print_error(avctx, err, > "Error initializing a child MFX session"); > @@ -642,7 +651,7 @@ int ff_qsv_init_session_device(AVCodecContext > *avctx, mfxSession *psession, > > int ff_qsv_init_session_frames(AVCodecContext *avctx, mfxSession > *psession, > QSVFramesContext *qsv_frames_ctx, > - const char *load_plugins, int opaque) > + const char *load_plugins, int opaque, int > gpu_copy) > { > mfxFrameAllocator frame_allocator = { > .pthis = qsv_frames_ctx, > @@ -662,7 +671,7 @@ int ff_qsv_init_session_frames(AVCodecContext > *avctx, mfxSession *psession, > int ret; > > ret = ff_qsv_init_session_device(avctx, &session, > - frames_ctx->device_ref, load_plugins); > + frames_ctx->device_ref, load_plugins, > gpu_copy); > if (ret < 0) > return ret; > > diff --git a/libavcodec/qsv_internal.h b/libavcodec/qsv_internal.h > index 394c558883..8be6c3757c 100644 > --- a/libavcodec/qsv_internal.h > +++ b/libavcodec/qsv_internal.h > @@ -95,14 +95,14 @@ int ff_qsv_map_pixfmt(enum AVPixelFormat format, > uint32_t *fourcc); > enum AVPictureType ff_qsv_map_pictype(int mfx_pic_type); > > int ff_qsv_init_internal_session(AVCodecContext *avctx, mfxSession > *session, > - const char *load_plugins); > + const char *load_plugins, int gpu_copy); > > int ff_qsv_init_session_device(AVCodecContext *avctx, mfxSession > *psession, > - AVBufferRef *device_ref, const char > *load_plugins); > + AVBufferRef *device_ref, const char > *load_plugins, int > gpu_copy); > > int ff_qsv_init_session_frames(AVCodecContext *avctx, mfxSession > *session, > QSVFramesContext *qsv_frames_ctx, > - const char *load_plugins, int opaque); > + const char *load_plugins, int opaque, int > gpu_copy); > > int ff_qsv_find_surface_idx(QSVFramesContext *ctx, QSVFrame *frame); > > diff --git a/libavcodec/qsvdec.c b/libavcodec/qsvdec.c > index 4a0be811fb..5dd2b3834b 100644 > --- a/libavcodec/qsvdec.c > +++ b/libavcodec/qsvdec.c > @@ -34,9 +34,11 @@ > #include "libavutil/pixdesc.h" > #include "libavutil/pixfmt.h" > #include "libavutil/time.h" > +#include "libavutil/imgutils.h" > > #include "avcodec.h" > #include "internal.h" > +#include "decode.h" > #include "qsv.h" > #include "qsv_internal.h" > #include "qsvdec.h" > @@ -54,6 +56,31 @@ const AVCodecHWConfigInternal *ff_qsv_hw_configs[] > = { > NULL > }; > > +static int ff_qsv_get_continuous_buffer(AVCodecContext *avctx, AVFrame > *frame, AVBufferPool *pool) > +{ > + int ret = 0; > + > + ff_decode_frame_props(avctx, frame); > + > + frame->width = avctx->width; > + frame->height = avctx->height; > + frame->linesize[0] = FFALIGN(avctx->width, 128); > + frame->linesize[1] = frame->linesize[0]; > + frame->buf[0] = av_buffer_pool_get(pool); > + if (!frame->buf[0]) > + return AVERROR(ENOMEM); > + > + frame->data[0] = frame->buf[0]->data; > + frame->data[1] = frame->data[0] + > + frame->linesize[0] * FFALIGN(avctx->height, 64); > + > + ret = ff_attach_decode_data(frame); > + if (ret < 0) > + return ret; > + > + return 0; > +} > + > static int qsv_init_session(AVCodecContext *avctx, QSVContext *q, > mfxSession session, > AVBufferRef *hw_frames_ref, AVBufferRef > *hw_device_ref) > { > @@ -74,7 +101,8 @@ static int qsv_init_session(AVCodecContext *avctx, > QSVContext *q, mfxSession ses > > ret = ff_qsv_init_session_frames(avctx, &q->internal_session, > &q->frames_ctx, q->load_plugins, > - q->iopattern == > MFX_IOPATTERN_OUT_OPAQUE_MEMORY); > + q->iopattern == > MFX_IOPATTERN_OUT_OPAQUE_MEMORY, > + q->gpu_copy); > if (ret < 0) { > av_buffer_unref(&q->frames_ctx.hw_frames_ctx); > return ret; > @@ -88,7 +116,7 @@ static int qsv_init_session(AVCodecContext *avctx, > QSVContext *q, mfxSession ses > } > > ret = ff_qsv_init_session_device(avctx, &q->internal_session, > - hw_device_ref, q->load_plugins); > + hw_device_ref, q->load_plugins, > q->gpu_copy); > if (ret < 0) > return ret; > > @@ -96,7 +124,7 @@ static int qsv_init_session(AVCodecContext *avctx, > QSVContext *q, mfxSession ses > } else { > if (!q->internal_session) { > ret = ff_qsv_init_internal_session(avctx, &q->internal_session, > - q->load_plugins); > + q->load_plugins, q->gpu_copy); > if (ret < 0) > return ret; > } > @@ -213,6 +241,12 @@ static int qsv_decode_init(AVCodecContext *avctx, > QSVContext *q) > > q->frame_info = param.mfx.FrameInfo; > > + if (avctx->pix_fmt != AV_PIX_FMT_QSV) > + q->pool = av_buffer_pool_init(av_image_get_buffer_size(avctx- > >pix_fmt, > + > FFALIGN(avctx->width, 128), > + > FFALIGN(avctx->height, 64), 1), > + av_buffer_allocz); > + > return 0; > } > > @@ -220,9 +254,15 @@ static int alloc_frame(AVCodecContext *avctx, > QSVContext *q, QSVFrame *frame) > { > int ret; > > - ret = ff_get_buffer(avctx, frame->frame, AV_GET_BUFFER_FLAG_REF); > - if (ret < 0) > - return ret; > + if (!q->pool) { > + ret = ff_get_buffer(avctx, frame->frame, AV_GET_BUFFER_FLAG_REF); > + if (ret < 0) > + return ret; > + } else { > + ret = ff_qsv_get_continuous_buffer(avctx, frame->frame, q->pool); > + if (ret < 0) > + return ret; > + } > > if (frame->frame->format == AV_PIX_FMT_QSV) { > frame->surface = *(mfxFrameSurface1*)frame->frame->data[3]; > @@ -484,6 +524,7 @@ int ff_qsv_decode_close(QSVContext *q) > > av_buffer_unref(&q->frames_ctx.hw_frames_ctx); > av_buffer_unref(&q->frames_ctx.mids_buf); > + av_buffer_pool_uninit(&q->pool); > > return 0; > } > diff --git a/libavcodec/qsvdec.h b/libavcodec/qsvdec.h > index 111536caba..43ea03867e 100644 > --- a/libavcodec/qsvdec.h > +++ b/libavcodec/qsvdec.h > @@ -62,10 +62,12 @@ typedef struct QSVContext { > enum AVPixelFormat orig_pix_fmt; > uint32_t fourcc; > mfxFrameInfo frame_info; > + AVBufferPool *pool; > > // options set by the caller > int async_depth; > int iopattern; > + int gpu_copy; > > char *load_plugins; > > diff --git a/libavcodec/qsvdec_h2645.c b/libavcodec/qsvdec_h2645.c > index 9b49f5506e..3d1f1cbfac 100644 > --- a/libavcodec/qsvdec_h2645.c > +++ b/libavcodec/qsvdec_h2645.c > @@ -192,6 +192,11 @@ static const AVOption hevc_options[] = { > > { "load_plugins", "A :-separate list of hexadecimal plugin UIDs to load > in an > internal session", > OFFSET(qsv.load_plugins), AV_OPT_TYPE_STRING, { .str = "" }, 0, 0, > VD }, > + > + { "gpu_copy", "A GPU-accelerated memory copy for non-QSV pipelines", > OFFSET(qsv.gpu_copy), AV_OPT_TYPE_INT, { .i64 = > MFX_GPUCOPY_DEFAULT }, MFX_GPUCOPY_DEFAULT, > MFX_GPUCOPY_OFF, VD, "gpu_copy"}, > + { "default", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = > MFX_GPUCOPY_DEFAULT }, 0, 0, VD, "gpu_copy"}, > + { "on", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = MFX_GPUCOPY_ON }, > 0, 0, VD, "gpu_copy"}, > + { "off", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = MFX_GPUCOPY_OFF }, > 0, 0, VD, "gpu_copy"}, > { NULL }, > }; > > @@ -227,6 +232,11 @@ AVCodec ff_hevc_qsv_decoder = { > #if CONFIG_H264_QSV_DECODER > static const AVOption options[] = { > { "async_depth", "Internal parallelization depth, the higher the value > the > higher the latency.", OFFSET(qsv.async_depth), AV_OPT_TYPE_INT, { .i64 = > ASYNC_DEPTH_DEFAULT }, 1, INT_MAX, VD }, > + > + { "gpu_copy", "A GPU-accelerated memory copy for non-QSV pipelines", > OFFSET(qsv.gpu_copy), AV_OPT_TYPE_INT, { .i64 = > MFX_GPUCOPY_DEFAULT }, MFX_GPUCOPY_DEFAULT, > MFX_GPUCOPY_OFF, VD, "gpu_copy"}, > + { "default", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = > MFX_GPUCOPY_DEFAULT }, 0, 0, VD, "gpu_copy"}, > + { "on", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = MFX_GPUCOPY_ON }, > 0, 0, VD, "gpu_copy"}, > + { "off", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = MFX_GPUCOPY_OFF }, > 0, 0, VD, "gpu_copy"}, > { NULL }, > }; > > diff --git a/libavcodec/qsvdec_other.c b/libavcodec/qsvdec_other.c > index 03251d2c85..37237180fb 100644 > --- a/libavcodec/qsvdec_other.c > +++ b/libavcodec/qsvdec_other.c > @@ -169,6 +169,11 @@ static void qsv_decode_flush(AVCodecContext > *avctx) > #define VD AV_OPT_FLAG_VIDEO_PARAM | > AV_OPT_FLAG_DECODING_PARAM > static const AVOption options[] = { > { "async_depth", "Internal parallelization depth, the higher the value > the > higher the latency.", OFFSET(qsv.async_depth), AV_OPT_TYPE_INT, { .i64 = > ASYNC_DEPTH_DEFAULT }, 1, INT_MAX, VD }, > + > + { "gpu_copy", "A GPU-accelerated memory copy for non-QSV pipelines", > OFFSET(qsv.gpu_copy), AV_OPT_TYPE_INT, { .i64 = > MFX_GPUCOPY_DEFAULT }, MFX_GPUCOPY_DEFAULT, > MFX_GPUCOPY_OFF, VD, "gpu_copy"}, > + { "default", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = > MFX_GPUCOPY_DEFAULT }, 0, 0, VD, "gpu_copy"}, > + { "on", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = MFX_GPUCOPY_ON }, > 0, 0, VD, "gpu_copy"}, > + { "off", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = MFX_GPUCOPY_OFF }, > 0, 0, VD, "gpu_copy"}, > { NULL }, > }; > > diff --git a/libavcodec/qsvenc.c b/libavcodec/qsvenc.c > index 5aa020d47b..3d008ed527 100644 > --- a/libavcodec/qsvenc.c > +++ b/libavcodec/qsvenc.c > @@ -909,7 +909,8 @@ static int qsvenc_init_session(AVCodecContext > *avctx, QSVEncContext *q) > > ret = ff_qsv_init_session_frames(avctx, &q->internal_session, > &q->frames_ctx, q->load_plugins, > - q->param.IOPattern == > MFX_IOPATTERN_IN_OPAQUE_MEMORY); > + q->param.IOPattern == > MFX_IOPATTERN_IN_OPAQUE_MEMORY, > + MFX_GPUCOPY_OFF); > if (ret < 0) { > av_buffer_unref(&q->frames_ctx.hw_frames_ctx); > return ret; > @@ -918,14 +919,14 @@ static int qsvenc_init_session(AVCodecContext > *avctx, QSVEncContext *q) > q->session = q->internal_session; > } else if (avctx->hw_device_ctx) { > ret = ff_qsv_init_session_device(avctx, &q->internal_session, > - avctx->hw_device_ctx, > q->load_plugins); > + avctx->hw_device_ctx, > q->load_plugins, > MFX_GPUCOPY_OFF); > if (ret < 0) > return ret; > > q->session = q->internal_session; > } else { > ret = ff_qsv_init_internal_session(avctx, &q->internal_session, > - q->load_plugins); > + q->load_plugins, MFX_GPUCOPY_OFF); > if (ret < 0) > return ret; > > -- > 2.17.1
Ping? Any comments for this patch set? The decode performance can be improved obviously on some platform(6x for example) _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".