On 2017/6/14 7:41, Mark Thompson wrote: > On 07/06/17 01:53, Jun Zhao wrote: >> From 5c88956e36e7318cf1d1b7c41a9d4108fcf9d0a5 Mon Sep 17 00:00:00 2001 >> From: Jun Zhao <jun.z...@intel.com> >> Date: Fri, 12 May 2017 08:30:43 +0800 >> Subject: [PATCH] lavc/vaapi_encode_h26[45]: respect "slices" in h26[45] vaapi >> encoder. >> >> Enable multi-slice support in AVC/HEVC vaapi encoder. >> >> Signed-off-by: Wang, Yi A <yi.a.w...@intel.com> >> Signed-off-by: Jun Zhao <jun.z...@intel.com> >> --- > > I think this should be three patches: > >> libavcodec/vaapi_encode.c | 36 ++++++++++++++++++++++++++++++++---- >> libavcodec/vaapi_encode.h | 9 +++++++-- > > (1) Change the slice/parameter buffers to be allocated dynamically. > >> libavcodec/vaapi_encode_h264.c | 24 ++++++++++++++++++------ > > (2) Respect slices option in H.264 encoder. > >> libavcodec/vaapi_encode_h265.c | 28 ++++++++++++++++++++++------ > > (3) Respect slices option in H.265 encoder. > > > I'm not entirely sure that the first one is worth it - do you have a use-case > for very large numbers of slices? Given that VAAPI lacks the ability to > limit slices by size (for H.323 / RTP packet limits), the other use I can > think of is for parallelism or related conformance requirements, both of > which tend to want relatively small numbers. > For the first one, I don't have a user case for large number of slices, but I think dynamic allocate is a general solution if the driver/GPU HW change the max slices number support in GEN10, GEN11, ...
And I will split the patch as the comments. > >> 4 files changed, 79 insertions(+), 18 deletions(-) >> >> diff --git a/libavcodec/vaapi_encode.c b/libavcodec/vaapi_encode.c >> index 7e9c00f51d..14a3fba7b1 100644 >> --- a/libavcodec/vaapi_encode.c >> +++ b/libavcodec/vaapi_encode.c >> @@ -36,13 +36,18 @@ static int >> vaapi_encode_make_packed_header(AVCodecContext *avctx, >> VAAPIEncodeContext *ctx = avctx->priv_data; >> VAStatus vas; >> VABufferID param_buffer, data_buffer; >> + VABufferID *tmp; >> VAEncPackedHeaderParameterBuffer params = { >> .type = type, >> .bit_length = bit_len, >> .has_emulation_bytes = 1, >> }; >> >> - av_assert0(pic->nb_param_buffers + 2 <= MAX_PARAM_BUFFERS); >> + tmp = av_realloc_array(pic->param_buffers, sizeof(*tmp), >> (pic->nb_param_buffers + 2)); >> + if (!tmp) { >> + return AVERROR(ENOMEM); > > This failure case leaks the already-allocated buffers. Unfortunately you > can't use av_realloc_array(). > >> + } >> + pic->param_buffers = tmp; >> >> vas = vaCreateBuffer(ctx->hwctx->display, ctx->va_context, >> VAEncPackedHeaderParameterBufferType, >> @@ -77,9 +82,14 @@ static int vaapi_encode_make_param_buffer(AVCodecContext >> *avctx, >> { >> VAAPIEncodeContext *ctx = avctx->priv_data; >> VAStatus vas; >> + VABufferID *tmp; >> VABufferID buffer; >> >> - av_assert0(pic->nb_param_buffers + 1 <= MAX_PARAM_BUFFERS); >> + tmp = av_realloc_array(pic->param_buffers, sizeof(*tmp), >> (pic->nb_param_buffers + 1)); >> + if (!tmp) { >> + return AVERROR(ENOMEM); > > Same. > >> + } >> + pic->param_buffers = tmp; >> >> vas = vaCreateBuffer(ctx->hwctx->display, ctx->va_context, >> type, len, 1, data, &buffer); >> @@ -122,6 +132,8 @@ static int vaapi_encode_wait(AVCodecContext *avctx, >> // Input is definitely finished with now. >> av_frame_free(&pic->input_image); >> >> + av_freep(&pic->param_buffers); >> + >> pic->encode_complete = 1; >> return 0; >> } >> @@ -313,7 +325,10 @@ static int vaapi_encode_issue(AVCodecContext *avctx, >> } >> } >> >> - av_assert0(pic->nb_slices <= MAX_PICTURE_SLICES); >> + pic->slices = (VAAPIEncodeSlice **)av_malloc(sizeof(VAAPIEncodeSlice *) >> * pic->nb_slices); >> + if (pic->slices == NULL) >> + goto fail; >> + >> for (i = 0; i < pic->nb_slices; i++) { >> slice = av_mallocz(sizeof(*slice)); >> if (!slice) { >> @@ -322,7 +337,6 @@ static int vaapi_encode_issue(AVCodecContext *avctx, >> } >> slice->index = i; >> pic->slices[i] = slice; >> - > > Stray change? Will clean > >> if (ctx->codec->slice_params_size > 0) { >> slice->codec_slice_params = >> av_mallocz(ctx->codec->slice_params_size); >> if (!slice->codec_slice_params) { >> @@ -427,6 +441,8 @@ fail: >> vaDestroyBuffer(ctx->hwctx->display, pic->param_buffers[i]); >> fail_at_end: >> av_freep(&pic->codec_picture_params); >> + av_freep(&pic->param_buffers); >> + av_freep(&pic->slices); >> av_frame_free(&pic->recon_image); >> return err; >> } >> @@ -542,6 +558,8 @@ static int vaapi_encode_free(AVCodecContext *avctx, >> av_frame_free(&pic->input_image); >> av_frame_free(&pic->recon_image); >> >> + av_freep(&pic->param_buffers); >> + av_freep(&pic->slices); >> // Output buffer should already be destroyed. >> av_assert0(pic->output_buffer == VA_INVALID_ID); >> >> @@ -949,6 +967,7 @@ static av_cold int >> vaapi_encode_config_attributes(AVCodecContext *avctx) >> { VAConfigAttribRTFormat }, >> { VAConfigAttribRateControl }, >> { VAConfigAttribEncMaxRefFrames }, >> + { VAConfigAttribEncMaxSlices }, >> { VAConfigAttribEncPackedHeaders }, >> }; >> >> @@ -1079,6 +1098,15 @@ static av_cold int >> vaapi_encode_config_attributes(AVCodecContext *avctx) >> } >> } >> break; >> + case VAConfigAttribEncMaxSlices: >> + if (avctx->slices > attr[i].value) { >> + av_log(avctx, AV_LOG_ERROR, "Slices per frame more than %#x >> " >> + "is not supported.\n", attr[i].value); >> + err = AVERROR(EINVAL); >> + goto fail; >> + } >> + ctx->multi_slices_available = 1; >> + break; > > This can also be VA_ATTRIB_NOT_SUPPORTED, which presumably implies one slice > per frame only? Unfortunately it is always that with the i965 driver for > MPEG-2 despite the need for slices, so I'm not sure what you actually want to > do there. > >> case VAConfigAttribEncPackedHeaders: >> if (ctx->va_packed_headers & ~attr[i].value) { >> // This isn't fatal, but packed headers are always >> diff --git a/libavcodec/vaapi_encode.h b/libavcodec/vaapi_encode.h >> index 0edf27e4cb..4afe4fa103 100644 >> --- a/libavcodec/vaapi_encode.h >> +++ b/libavcodec/vaapi_encode.h >> @@ -73,7 +73,7 @@ typedef struct VAAPIEncodePicture { >> VASurfaceID recon_surface; >> >> int nb_param_buffers; >> - VABufferID param_buffers[MAX_PARAM_BUFFERS]; >> + VABufferID *param_buffers; >> >> AVBufferRef *output_buffer_ref; >> VABufferID output_buffer; >> @@ -85,7 +85,10 @@ typedef struct VAAPIEncodePicture { >> struct VAAPIEncodePicture *refs[MAX_PICTURE_REFERENCES]; >> >> int nb_slices; >> - VAAPIEncodeSlice *slices[MAX_PICTURE_SLICES]; >> + VAAPIEncodeSlice **slices; >> + int slice_of_mbs; >> + int slice_mod_mbs; >> + int last_mb_index; > > Macroblocks are not a generic concept - H.265 does not have them. I think > I'd prefer this calculated stuff to go in the private data of the individual > codes rather than in the generic header (the generic code never touches it, > after all). Will use a private struct for this > >> } VAAPIEncodePicture; >> >> typedef struct VAAPIEncodeContext { >> @@ -105,6 +108,8 @@ typedef struct VAAPIEncodeContext { >> // Supported packed headers (initially the desired set, modified >> // later to what is actually supported). >> unsigned int va_packed_headers; >> + // Supported multi-slices per frame >> + int multi_slices_available; >> >> // The required size of surfaces. This is probably the input >> // size (AVCodecContext.width|height) aligned up to whatever >> diff --git a/libavcodec/vaapi_encode_h264.c b/libavcodec/vaapi_encode_h264.c >> index 92e29554ed..f325346433 100644 >> --- a/libavcodec/vaapi_encode_h264.c >> +++ b/libavcodec/vaapi_encode_h264.c >> @@ -1002,7 +1002,16 @@ static int >> vaapi_encode_h264_init_picture_params(AVCodecContext *avctx, >> vpic->pic_fields.bits.idr_pic_flag = (pic->type == PICTURE_TYPE_IDR); >> vpic->pic_fields.bits.reference_pic_flag = (pic->type != >> PICTURE_TYPE_B); >> >> - pic->nb_slices = 1; >> + if (ctx->multi_slices_available) >> + avctx->slices = av_clip(avctx->slices, 1, priv->mb_height); >> + else >> + avctx->slices = 1; > > If the user requests slices they probably have some clear reason for doing so > (e.g. to work with some old device which only accepts 1080p in four slices). > Do we really want to silently ignore that if we can't actually do it? Will give a warning message when driver can't support multi-slices but user setting the "slices" > 1. > >> + >> + pic->nb_slices = avctx->slices; >> + >> + pic->slice_of_mbs = (priv->mb_width * priv->mb_height) / pic->nb_slices; >> + pic->slice_mod_mbs = (priv->mb_width * priv->mb_height) % >> pic->nb_slices; >> + pic->last_mb_index = 0; >> >> return 0; >> } >> @@ -1052,15 +1061,18 @@ static int >> vaapi_encode_h264_init_slice_params(AVCodecContext *avctx, >> av_assert0(0 && "invalid picture type"); >> } >> >> - // Only one slice per frame. >> - vslice->macroblock_address = 0; >> - vslice->num_macroblocks = priv->mb_width * priv->mb_height; >> + vslice->macroblock_address = pic->last_mb_index; >> + vslice->num_macroblocks = pic->slice_of_mbs + (pic->slice_mod_mbs > 0 ? >> 1 : 0); > > I think it would be cuter if rounded properly rather than top-loading the > rounding error. > > That is, for N in [0, slices) use: > macroblock_address = N * total_macroblocks / slices; > num_macroblocks = (N + 1) * total_macroblocks / slices - > macroblock_address; > > That makes it cut in half nicely on, for example, four slices in a 7x6 image, > rather than having a stray macroblock over the centre line. > > (That is: 1111111 vs. 1111111 > 1111222 1112222 > 2222222 2222222 > 2333333 3333333 > 3333444 3334444 > 4444444 4444444 ). > Will double-check this part >> + if (pic->slice_mod_mbs > 0) >> + pic->slice_mod_mbs --; >> + pic->last_mb_index += vslice->num_macroblocks; >> >> vslice->macroblock_info = VA_INVALID_ID; >> >> vslice->pic_parameter_set_id = vpic->pic_parameter_set_id; >> - vslice->idr_pic_id = priv->idr_pic_count++; >> - >> + vslice->idr_pic_id = priv->idr_pic_count; >> + if (pic->last_mb_index == priv->mb_width * priv->mb_height) >> + priv->idr_pic_count ++; >> vslice->pic_order_cnt_lsb = (pic->display_order - priv->last_idr_frame) >> & >> ((1 << (4 + >> vseq->seq_fields.bits.log2_max_pic_order_cnt_lsb_minus4)) - 1); >> >> diff --git a/libavcodec/vaapi_encode_h265.c b/libavcodec/vaapi_encode_h265.c >> index 6e008b7b9c..e930026184 100644 >> --- a/libavcodec/vaapi_encode_h265.c >> +++ b/libavcodec/vaapi_encode_h265.c >> @@ -1025,7 +1025,15 @@ static int >> vaapi_encode_h265_init_picture_params(AVCodecContext *avctx, >> av_assert0(0 && "invalid picture type"); >> } >> >> - pic->nb_slices = 1; >> + if (ctx->multi_slices_available) >> + avctx->slices = av_clip(avctx->slices, 1, priv->ctu_height); >> + else >> + avctx->slices = 1; >> + >> + pic->nb_slices = avctx->slices; >> + pic->slice_of_mbs = (priv->ctu_width * priv->ctu_height) / >> pic->nb_slices; >> + pic->slice_mod_mbs = (priv->ctu_width * priv->ctu_height) % >> pic->nb_slices; >> + pic->last_mb_index = 0; > > They are CTUs, don't call them macroblocks. > In split patch, will change this. >> >> return 0; >> } >> @@ -1048,9 +1056,13 @@ static int >> vaapi_encode_h265_init_slice_params(AVCodecContext *avctx, >> pslice = slice->priv_data; >> mslice = &pslice->misc_slice_params; >> >> - // Currently we only support one slice per frame. >> - vslice->slice_segment_address = 0; >> - vslice->num_ctu_in_slice = priv->ctu_width * priv->ctu_height; >> + vslice->slice_segment_address = pic->last_mb_index; >> + mslice->slice_segment_address = pic->last_mb_index; > > Duplication, oops. Just remove the one in VAAPIEncodeH265MiscSliceParams, I > think? Yes, I think we need to remove the slice_segment_address in VAAPIEncodeH265MiscSliceParams > >> + vslice->num_ctu_in_slice = pic->slice_of_mbs + (pic->slice_mod_mbs > 0 >> ? 1 : 0); > > Same point on rounding as above. > >> + >> + if (pic->slice_mod_mbs > 0) >> + pic->slice_mod_mbs --; >> + pic->last_mb_index += vslice->num_ctu_in_slice; >> >> switch (pic->type) { >> case PICTURE_TYPE_IDR: >> @@ -1104,9 +1116,13 @@ static int >> vaapi_encode_h265_init_slice_params(AVCodecContext *avctx, >> else >> vslice->slice_qp_delta = priv->fixed_qp_idr - vpic->pic_init_qp; >> >> - vslice->slice_fields.bits.last_slice_of_pic_flag = 1; >> + if (pic->last_mb_index == priv->ctu_width * priv->ctu_height) >> + vslice->slice_fields.bits.last_slice_of_pic_flag = 1; >> >> - mslice->first_slice_segment_in_pic_flag = 1; >> + if (vslice->slice_segment_address == 0) >> + mslice->first_slice_segment_in_pic_flag = 1; >> + else >> + mslice->first_slice_segment_in_pic_flag = 0; >> >> if (pic->type == PICTURE_TYPE_IDR) { >> // No reference pictures. >> -- >> 2.11.0 >> > _______________________________________________ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > http://ffmpeg.org/mailman/listinfo/ffmpeg-devel > _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel