On 2017/6/14 7:41, Mark Thompson wrote:
> On 07/06/17 01:53, Jun Zhao wrote:
>> From 5c88956e36e7318cf1d1b7c41a9d4108fcf9d0a5 Mon Sep 17 00:00:00 2001
>> From: Jun Zhao <jun.z...@intel.com>
>> Date: Fri, 12 May 2017 08:30:43 +0800
>> Subject: [PATCH] lavc/vaapi_encode_h26[45]: respect "slices" in h26[45] vaapi
>>  encoder.
>>
>> Enable multi-slice support in AVC/HEVC vaapi encoder.
>>
>> Signed-off-by: Wang, Yi A <yi.a.w...@intel.com>
>> Signed-off-by: Jun Zhao <jun.z...@intel.com>
>> ---
> 
> I think this should be three patches:
> 
>>  libavcodec/vaapi_encode.c      | 36 ++++++++++++++++++++++++++++++++----
>>  libavcodec/vaapi_encode.h      |  9 +++++++--
> 
> (1) Change the slice/parameter buffers to be allocated dynamically.
> 
>>  libavcodec/vaapi_encode_h264.c | 24 ++++++++++++++++++------
> 
> (2) Respect slices option in H.264 encoder.
> 
>>  libavcodec/vaapi_encode_h265.c | 28 ++++++++++++++++++++++------
> 
> (3) Respect slices option in H.265 encoder.
> 
> 
> I'm not entirely sure that the first one is worth it - do you have a use-case 
> for very large numbers of slices?  Given that VAAPI lacks the ability to 
> limit slices by size (for H.323 / RTP packet limits), the other use I can 
> think of is for parallelism or related conformance requirements, both of 
> which tend to want relatively small numbers.
> 
For the first one, I don't have a user case for large number of slices, but I 
think dynamic allocate is a general solution if the driver/GPU HW change the 
max slices number support in GEN10, GEN11, ...

And I will split the patch as the comments.

> 
>>  4 files changed, 79 insertions(+), 18 deletions(-)
>>
>> diff --git a/libavcodec/vaapi_encode.c b/libavcodec/vaapi_encode.c
>> index 7e9c00f51d..14a3fba7b1 100644
>> --- a/libavcodec/vaapi_encode.c
>> +++ b/libavcodec/vaapi_encode.c
>> @@ -36,13 +36,18 @@ static int 
>> vaapi_encode_make_packed_header(AVCodecContext *avctx,
>>      VAAPIEncodeContext *ctx = avctx->priv_data;
>>      VAStatus vas;
>>      VABufferID param_buffer, data_buffer;
>> +    VABufferID *tmp;
>>      VAEncPackedHeaderParameterBuffer params = {
>>          .type = type,
>>          .bit_length = bit_len,
>>          .has_emulation_bytes = 1,
>>      };
>>  
>> -    av_assert0(pic->nb_param_buffers + 2 <= MAX_PARAM_BUFFERS);
>> +    tmp = av_realloc_array(pic->param_buffers, sizeof(*tmp), 
>> (pic->nb_param_buffers + 2));
>> +    if (!tmp) {
>> +        return AVERROR(ENOMEM);
> 
> This failure case leaks the already-allocated buffers.  Unfortunately you 
> can't use av_realloc_array().
> 
>> +    }
>> +    pic->param_buffers = tmp;
>>  
>>      vas = vaCreateBuffer(ctx->hwctx->display, ctx->va_context,
>>                           VAEncPackedHeaderParameterBufferType,
>> @@ -77,9 +82,14 @@ static int vaapi_encode_make_param_buffer(AVCodecContext 
>> *avctx,
>>  {
>>      VAAPIEncodeContext *ctx = avctx->priv_data;
>>      VAStatus vas;
>> +    VABufferID *tmp;
>>      VABufferID buffer;
>>  
>> -    av_assert0(pic->nb_param_buffers + 1 <= MAX_PARAM_BUFFERS);
>> +    tmp = av_realloc_array(pic->param_buffers, sizeof(*tmp), 
>> (pic->nb_param_buffers + 1));
>> +    if (!tmp) {
>> +        return AVERROR(ENOMEM);
> 
> Same.
> 
>> +    }
>> +    pic->param_buffers = tmp;
>>  
>>      vas = vaCreateBuffer(ctx->hwctx->display, ctx->va_context,
>>                           type, len, 1, data, &buffer);
>> @@ -122,6 +132,8 @@ static int vaapi_encode_wait(AVCodecContext *avctx,
>>      // Input is definitely finished with now.
>>      av_frame_free(&pic->input_image);
>>  
>> +    av_freep(&pic->param_buffers);
>> +
>>      pic->encode_complete = 1;
>>      return 0;
>>  }
>> @@ -313,7 +325,10 @@ static int vaapi_encode_issue(AVCodecContext *avctx,
>>          }
>>      }
>>  
>> -    av_assert0(pic->nb_slices <= MAX_PICTURE_SLICES);
>> +    pic->slices = (VAAPIEncodeSlice **)av_malloc(sizeof(VAAPIEncodeSlice *) 
>> * pic->nb_slices);
>> +    if (pic->slices == NULL)
>> +        goto fail;
>> +
>>      for (i = 0; i < pic->nb_slices; i++) {
>>          slice = av_mallocz(sizeof(*slice));
>>          if (!slice) {
>> @@ -322,7 +337,6 @@ static int vaapi_encode_issue(AVCodecContext *avctx,
>>          }
>>          slice->index = i;
>>          pic->slices[i] = slice;
>> -
> 
> Stray change?

Will clean

> 
>>          if (ctx->codec->slice_params_size > 0) {
>>              slice->codec_slice_params = 
>> av_mallocz(ctx->codec->slice_params_size);
>>              if (!slice->codec_slice_params) {
>> @@ -427,6 +441,8 @@ fail:
>>          vaDestroyBuffer(ctx->hwctx->display, pic->param_buffers[i]);
>>  fail_at_end:
>>      av_freep(&pic->codec_picture_params);
>> +    av_freep(&pic->param_buffers);
>> +    av_freep(&pic->slices);
>>      av_frame_free(&pic->recon_image);
>>      return err;
>>  }
>> @@ -542,6 +558,8 @@ static int vaapi_encode_free(AVCodecContext *avctx,
>>      av_frame_free(&pic->input_image);
>>      av_frame_free(&pic->recon_image);
>>  
>> +    av_freep(&pic->param_buffers);
>> +    av_freep(&pic->slices);
>>      // Output buffer should already be destroyed.
>>      av_assert0(pic->output_buffer == VA_INVALID_ID);
>>  
>> @@ -949,6 +967,7 @@ static av_cold int 
>> vaapi_encode_config_attributes(AVCodecContext *avctx)
>>          { VAConfigAttribRTFormat         },
>>          { VAConfigAttribRateControl      },
>>          { VAConfigAttribEncMaxRefFrames  },
>> +        { VAConfigAttribEncMaxSlices     },
>>          { VAConfigAttribEncPackedHeaders },
>>      };
>>  
>> @@ -1079,6 +1098,15 @@ static av_cold int 
>> vaapi_encode_config_attributes(AVCodecContext *avctx)
>>              }
>>          }
>>          break;
>> +        case VAConfigAttribEncMaxSlices:
>> +            if (avctx->slices > attr[i].value) {
>> +                av_log(avctx, AV_LOG_ERROR, "Slices per frame more than %#x 
>> "
>> +                       "is not supported.\n", attr[i].value);
>> +                err = AVERROR(EINVAL);
>> +                goto fail;
>> +            }
>> +            ctx->multi_slices_available = 1;
>> +            break;
> 
> This can also be VA_ATTRIB_NOT_SUPPORTED, which presumably implies one slice 
> per frame only?  Unfortunately it is always that with the i965 driver for 
> MPEG-2 despite the need for slices, so I'm not sure what you actually want to 
> do there.
> 
>>          case VAConfigAttribEncPackedHeaders:
>>              if (ctx->va_packed_headers & ~attr[i].value) {
>>                  // This isn't fatal, but packed headers are always
>> diff --git a/libavcodec/vaapi_encode.h b/libavcodec/vaapi_encode.h
>> index 0edf27e4cb..4afe4fa103 100644
>> --- a/libavcodec/vaapi_encode.h
>> +++ b/libavcodec/vaapi_encode.h
>> @@ -73,7 +73,7 @@ typedef struct VAAPIEncodePicture {
>>      VASurfaceID     recon_surface;
>>  
>>      int          nb_param_buffers;
>> -    VABufferID      param_buffers[MAX_PARAM_BUFFERS];
>> +    VABufferID      *param_buffers;
>>  
>>      AVBufferRef    *output_buffer_ref;
>>      VABufferID      output_buffer;
>> @@ -85,7 +85,10 @@ typedef struct VAAPIEncodePicture {
>>      struct VAAPIEncodePicture *refs[MAX_PICTURE_REFERENCES];
>>  
>>      int          nb_slices;
>> -    VAAPIEncodeSlice *slices[MAX_PICTURE_SLICES];
>> +    VAAPIEncodeSlice **slices;
>> +    int          slice_of_mbs;
>> +    int          slice_mod_mbs;
>> +    int          last_mb_index;
> 
> Macroblocks are not a generic concept - H.265 does not have them.  I think 
> I'd prefer this calculated stuff to go in the private data of the individual 
> codes rather than in the generic header (the generic code never touches it, 
> after all).

Will use a private struct for this

> 
>>  } VAAPIEncodePicture;
>>  
>>  typedef struct VAAPIEncodeContext {
>> @@ -105,6 +108,8 @@ typedef struct VAAPIEncodeContext {
>>      // Supported packed headers (initially the desired set, modified
>>      // later to what is actually supported).
>>      unsigned int    va_packed_headers;
>> +    // Supported multi-slices per frame
>> +    int          multi_slices_available;
>>  
>>      // The required size of surfaces.  This is probably the input
>>      // size (AVCodecContext.width|height) aligned up to whatever
>> diff --git a/libavcodec/vaapi_encode_h264.c b/libavcodec/vaapi_encode_h264.c
>> index 92e29554ed..f325346433 100644
>> --- a/libavcodec/vaapi_encode_h264.c
>> +++ b/libavcodec/vaapi_encode_h264.c
>> @@ -1002,7 +1002,16 @@ static int 
>> vaapi_encode_h264_init_picture_params(AVCodecContext *avctx,
>>      vpic->pic_fields.bits.idr_pic_flag = (pic->type == PICTURE_TYPE_IDR);
>>      vpic->pic_fields.bits.reference_pic_flag = (pic->type != 
>> PICTURE_TYPE_B);
>>  
>> -    pic->nb_slices = 1;
>> +    if (ctx->multi_slices_available)
>> +        avctx->slices = av_clip(avctx->slices, 1, priv->mb_height);
>> +    else
>> +        avctx->slices = 1;
> 
> If the user requests slices they probably have some clear reason for doing so 
> (e.g. to work with some old device which only accepts 1080p in four slices).  
> Do we really want to silently ignore that if we can't actually do it?

Will give a warning message when driver can't support multi-slices but user 
setting the "slices" > 1.

> 
>> +
>> +    pic->nb_slices = avctx->slices;
>> +
>> +    pic->slice_of_mbs = (priv->mb_width * priv->mb_height) / pic->nb_slices;
>> +    pic->slice_mod_mbs = (priv->mb_width * priv->mb_height) % 
>> pic->nb_slices;
>> +    pic->last_mb_index = 0;
>>  
>>      return 0;
>>  }
>> @@ -1052,15 +1061,18 @@ static int 
>> vaapi_encode_h264_init_slice_params(AVCodecContext *avctx,
>>          av_assert0(0 && "invalid picture type");
>>      }
>>  
>> -    // Only one slice per frame.
>> -    vslice->macroblock_address = 0;
>> -    vslice->num_macroblocks = priv->mb_width * priv->mb_height;
>> +    vslice->macroblock_address = pic->last_mb_index;
>> +    vslice->num_macroblocks = pic->slice_of_mbs + (pic->slice_mod_mbs > 0 ? 
>> 1 : 0);
> 
> I think it would be cuter if rounded properly rather than top-loading the 
> rounding error.
> 
> That is, for N in [0, slices) use:
>     macroblock_address = N * total_macroblocks / slices;
>     num_macroblocks = (N + 1) * total_macroblocks / slices - 
> macroblock_address;
> 
> That makes it cut in half nicely on, for example, four slices in a 7x6 image, 
> rather than having a stray macroblock over the centre line.
> 
> (That is: 1111111  vs.  1111111
>           1111222       1112222
>           2222222       2222222
>           2333333       3333333
>           3333444       3334444
>           4444444       4444444 ).
> 

Will double-check this part

>> +    if (pic->slice_mod_mbs > 0)
>> +        pic->slice_mod_mbs --;
>> +    pic->last_mb_index += vslice->num_macroblocks;
>>  
>>      vslice->macroblock_info = VA_INVALID_ID;
>>  
>>      vslice->pic_parameter_set_id = vpic->pic_parameter_set_id;
>> -    vslice->idr_pic_id = priv->idr_pic_count++;
>> -
>> +    vslice->idr_pic_id = priv->idr_pic_count;
>> +    if (pic->last_mb_index == priv->mb_width * priv->mb_height)
>> +        priv->idr_pic_count ++;
>>      vslice->pic_order_cnt_lsb = (pic->display_order - priv->last_idr_frame) 
>> &
>>          ((1 << (4 + 
>> vseq->seq_fields.bits.log2_max_pic_order_cnt_lsb_minus4)) - 1);
>>  
>> diff --git a/libavcodec/vaapi_encode_h265.c b/libavcodec/vaapi_encode_h265.c
>> index 6e008b7b9c..e930026184 100644
>> --- a/libavcodec/vaapi_encode_h265.c
>> +++ b/libavcodec/vaapi_encode_h265.c
>> @@ -1025,7 +1025,15 @@ static int 
>> vaapi_encode_h265_init_picture_params(AVCodecContext *avctx,
>>          av_assert0(0 && "invalid picture type");
>>      }
>>  
>> -    pic->nb_slices = 1;
>> +    if (ctx->multi_slices_available)
>> +        avctx->slices = av_clip(avctx->slices, 1, priv->ctu_height);
>> +    else
>> +        avctx->slices = 1;
>> +
>> +    pic->nb_slices = avctx->slices;
>> +    pic->slice_of_mbs = (priv->ctu_width * priv->ctu_height) / 
>> pic->nb_slices;
>> +    pic->slice_mod_mbs = (priv->ctu_width * priv->ctu_height) % 
>> pic->nb_slices;
>> +    pic->last_mb_index = 0;
> 
> They are CTUs, don't call them macroblocks.
> 
In split patch, will change this.

>>  
>>      return 0;
>>  }
>> @@ -1048,9 +1056,13 @@ static int 
>> vaapi_encode_h265_init_slice_params(AVCodecContext *avctx,
>>      pslice = slice->priv_data;
>>      mslice = &pslice->misc_slice_params;
>>  
>> -    // Currently we only support one slice per frame.
>> -    vslice->slice_segment_address = 0;
>> -    vslice->num_ctu_in_slice = priv->ctu_width * priv->ctu_height;
>> +    vslice->slice_segment_address = pic->last_mb_index;
>> +    mslice->slice_segment_address = pic->last_mb_index;
> 
> Duplication, oops.  Just remove the one in VAAPIEncodeH265MiscSliceParams, I 
> think?

Yes, I think we need to remove the slice_segment_address in 
VAAPIEncodeH265MiscSliceParams

> 
>> +    vslice->num_ctu_in_slice = pic->slice_of_mbs + (pic->slice_mod_mbs > 0 
>> ? 1 : 0);
> 
> Same point on rounding as above.
> 
>> +
>> +    if (pic->slice_mod_mbs > 0)
>> +        pic->slice_mod_mbs --;
>> +    pic->last_mb_index += vslice->num_ctu_in_slice;
>>  
>>      switch (pic->type) {
>>      case PICTURE_TYPE_IDR:
>> @@ -1104,9 +1116,13 @@ static int 
>> vaapi_encode_h265_init_slice_params(AVCodecContext *avctx,
>>      else
>>          vslice->slice_qp_delta = priv->fixed_qp_idr - vpic->pic_init_qp;
>>  
>> -    vslice->slice_fields.bits.last_slice_of_pic_flag = 1;
>> +    if (pic->last_mb_index == priv->ctu_width * priv->ctu_height)
>> +        vslice->slice_fields.bits.last_slice_of_pic_flag = 1;
>>  
>> -    mslice->first_slice_segment_in_pic_flag = 1;
>> +    if (vslice->slice_segment_address == 0)
>> +        mslice->first_slice_segment_in_pic_flag = 1;
>> +    else
>> +        mslice->first_slice_segment_in_pic_flag = 0;
>>  
>>      if (pic->type == PICTURE_TYPE_IDR) {
>>          // No reference pictures.
>> -- 
>> 2.11.0
>>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
> 
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel

Reply via email to