On 07/06/17 01:53, Jun Zhao wrote:
> From 5c88956e36e7318cf1d1b7c41a9d4108fcf9d0a5 Mon Sep 17 00:00:00 2001
> From: Jun Zhao <jun.z...@intel.com>
> Date: Fri, 12 May 2017 08:30:43 +0800
> Subject: [PATCH] lavc/vaapi_encode_h26[45]: respect "slices" in h26[45] vaapi
>  encoder.
> 
> Enable multi-slice support in AVC/HEVC vaapi encoder.
> 
> Signed-off-by: Wang, Yi A <yi.a.w...@intel.com>
> Signed-off-by: Jun Zhao <jun.z...@intel.com>
> ---

I think this should be three patches:

>  libavcodec/vaapi_encode.c      | 36 ++++++++++++++++++++++++++++++++----
>  libavcodec/vaapi_encode.h      |  9 +++++++--

(1) Change the slice/parameter buffers to be allocated dynamically.

>  libavcodec/vaapi_encode_h264.c | 24 ++++++++++++++++++------

(2) Respect slices option in H.264 encoder.

>  libavcodec/vaapi_encode_h265.c | 28 ++++++++++++++++++++++------

(3) Respect slices option in H.265 encoder.


I'm not entirely sure that the first one is worth it - do you have a use-case 
for very large numbers of slices?  Given that VAAPI lacks the ability to limit 
slices by size (for H.323 / RTP packet limits), the other use I can think of is 
for parallelism or related conformance requirements, both of which tend to want 
relatively small numbers.


>  4 files changed, 79 insertions(+), 18 deletions(-)
> 
> diff --git a/libavcodec/vaapi_encode.c b/libavcodec/vaapi_encode.c
> index 7e9c00f51d..14a3fba7b1 100644
> --- a/libavcodec/vaapi_encode.c
> +++ b/libavcodec/vaapi_encode.c
> @@ -36,13 +36,18 @@ static int vaapi_encode_make_packed_header(AVCodecContext 
> *avctx,
>      VAAPIEncodeContext *ctx = avctx->priv_data;
>      VAStatus vas;
>      VABufferID param_buffer, data_buffer;
> +    VABufferID *tmp;
>      VAEncPackedHeaderParameterBuffer params = {
>          .type = type,
>          .bit_length = bit_len,
>          .has_emulation_bytes = 1,
>      };
>  
> -    av_assert0(pic->nb_param_buffers + 2 <= MAX_PARAM_BUFFERS);
> +    tmp = av_realloc_array(pic->param_buffers, sizeof(*tmp), 
> (pic->nb_param_buffers + 2));
> +    if (!tmp) {
> +        return AVERROR(ENOMEM);

This failure case leaks the already-allocated buffers.  Unfortunately you can't 
use av_realloc_array().

> +    }
> +    pic->param_buffers = tmp;
>  
>      vas = vaCreateBuffer(ctx->hwctx->display, ctx->va_context,
>                           VAEncPackedHeaderParameterBufferType,
> @@ -77,9 +82,14 @@ static int vaapi_encode_make_param_buffer(AVCodecContext 
> *avctx,
>  {
>      VAAPIEncodeContext *ctx = avctx->priv_data;
>      VAStatus vas;
> +    VABufferID *tmp;
>      VABufferID buffer;
>  
> -    av_assert0(pic->nb_param_buffers + 1 <= MAX_PARAM_BUFFERS);
> +    tmp = av_realloc_array(pic->param_buffers, sizeof(*tmp), 
> (pic->nb_param_buffers + 1));
> +    if (!tmp) {
> +        return AVERROR(ENOMEM);

Same.

> +    }
> +    pic->param_buffers = tmp;
>  
>      vas = vaCreateBuffer(ctx->hwctx->display, ctx->va_context,
>                           type, len, 1, data, &buffer);
> @@ -122,6 +132,8 @@ static int vaapi_encode_wait(AVCodecContext *avctx,
>      // Input is definitely finished with now.
>      av_frame_free(&pic->input_image);
>  
> +    av_freep(&pic->param_buffers);
> +
>      pic->encode_complete = 1;
>      return 0;
>  }
> @@ -313,7 +325,10 @@ static int vaapi_encode_issue(AVCodecContext *avctx,
>          }
>      }
>  
> -    av_assert0(pic->nb_slices <= MAX_PICTURE_SLICES);
> +    pic->slices = (VAAPIEncodeSlice **)av_malloc(sizeof(VAAPIEncodeSlice *) 
> * pic->nb_slices);
> +    if (pic->slices == NULL)
> +        goto fail;
> +
>      for (i = 0; i < pic->nb_slices; i++) {
>          slice = av_mallocz(sizeof(*slice));
>          if (!slice) {
> @@ -322,7 +337,6 @@ static int vaapi_encode_issue(AVCodecContext *avctx,
>          }
>          slice->index = i;
>          pic->slices[i] = slice;
> -

Stray change?

>          if (ctx->codec->slice_params_size > 0) {
>              slice->codec_slice_params = 
> av_mallocz(ctx->codec->slice_params_size);
>              if (!slice->codec_slice_params) {
> @@ -427,6 +441,8 @@ fail:
>          vaDestroyBuffer(ctx->hwctx->display, pic->param_buffers[i]);
>  fail_at_end:
>      av_freep(&pic->codec_picture_params);
> +    av_freep(&pic->param_buffers);
> +    av_freep(&pic->slices);
>      av_frame_free(&pic->recon_image);
>      return err;
>  }
> @@ -542,6 +558,8 @@ static int vaapi_encode_free(AVCodecContext *avctx,
>      av_frame_free(&pic->input_image);
>      av_frame_free(&pic->recon_image);
>  
> +    av_freep(&pic->param_buffers);
> +    av_freep(&pic->slices);
>      // Output buffer should already be destroyed.
>      av_assert0(pic->output_buffer == VA_INVALID_ID);
>  
> @@ -949,6 +967,7 @@ static av_cold int 
> vaapi_encode_config_attributes(AVCodecContext *avctx)
>          { VAConfigAttribRTFormat         },
>          { VAConfigAttribRateControl      },
>          { VAConfigAttribEncMaxRefFrames  },
> +        { VAConfigAttribEncMaxSlices     },
>          { VAConfigAttribEncPackedHeaders },
>      };
>  
> @@ -1079,6 +1098,15 @@ static av_cold int 
> vaapi_encode_config_attributes(AVCodecContext *avctx)
>              }
>          }
>          break;
> +        case VAConfigAttribEncMaxSlices:
> +            if (avctx->slices > attr[i].value) {
> +                av_log(avctx, AV_LOG_ERROR, "Slices per frame more than %#x "
> +                       "is not supported.\n", attr[i].value);
> +                err = AVERROR(EINVAL);
> +                goto fail;
> +            }
> +            ctx->multi_slices_available = 1;
> +            break;

This can also be VA_ATTRIB_NOT_SUPPORTED, which presumably implies one slice 
per frame only?  Unfortunately it is always that with the i965 driver for 
MPEG-2 despite the need for slices, so I'm not sure what you actually want to 
do there.

>          case VAConfigAttribEncPackedHeaders:
>              if (ctx->va_packed_headers & ~attr[i].value) {
>                  // This isn't fatal, but packed headers are always
> diff --git a/libavcodec/vaapi_encode.h b/libavcodec/vaapi_encode.h
> index 0edf27e4cb..4afe4fa103 100644
> --- a/libavcodec/vaapi_encode.h
> +++ b/libavcodec/vaapi_encode.h
> @@ -73,7 +73,7 @@ typedef struct VAAPIEncodePicture {
>      VASurfaceID     recon_surface;
>  
>      int          nb_param_buffers;
> -    VABufferID      param_buffers[MAX_PARAM_BUFFERS];
> +    VABufferID      *param_buffers;
>  
>      AVBufferRef    *output_buffer_ref;
>      VABufferID      output_buffer;
> @@ -85,7 +85,10 @@ typedef struct VAAPIEncodePicture {
>      struct VAAPIEncodePicture *refs[MAX_PICTURE_REFERENCES];
>  
>      int          nb_slices;
> -    VAAPIEncodeSlice *slices[MAX_PICTURE_SLICES];
> +    VAAPIEncodeSlice **slices;
> +    int          slice_of_mbs;
> +    int          slice_mod_mbs;
> +    int          last_mb_index;

Macroblocks are not a generic concept - H.265 does not have them.  I think I'd 
prefer this calculated stuff to go in the private data of the individual codes 
rather than in the generic header (the generic code never touches it, after 
all).

>  } VAAPIEncodePicture;
>  
>  typedef struct VAAPIEncodeContext {
> @@ -105,6 +108,8 @@ typedef struct VAAPIEncodeContext {
>      // Supported packed headers (initially the desired set, modified
>      // later to what is actually supported).
>      unsigned int    va_packed_headers;
> +    // Supported multi-slices per frame
> +    int          multi_slices_available;
>  
>      // The required size of surfaces.  This is probably the input
>      // size (AVCodecContext.width|height) aligned up to whatever
> diff --git a/libavcodec/vaapi_encode_h264.c b/libavcodec/vaapi_encode_h264.c
> index 92e29554ed..f325346433 100644
> --- a/libavcodec/vaapi_encode_h264.c
> +++ b/libavcodec/vaapi_encode_h264.c
> @@ -1002,7 +1002,16 @@ static int 
> vaapi_encode_h264_init_picture_params(AVCodecContext *avctx,
>      vpic->pic_fields.bits.idr_pic_flag = (pic->type == PICTURE_TYPE_IDR);
>      vpic->pic_fields.bits.reference_pic_flag = (pic->type != PICTURE_TYPE_B);
>  
> -    pic->nb_slices = 1;
> +    if (ctx->multi_slices_available)
> +        avctx->slices = av_clip(avctx->slices, 1, priv->mb_height);
> +    else
> +        avctx->slices = 1;

If the user requests slices they probably have some clear reason for doing so 
(e.g. to work with some old device which only accepts 1080p in four slices).  
Do we really want to silently ignore that if we can't actually do it?

> +
> +    pic->nb_slices = avctx->slices;
> +
> +    pic->slice_of_mbs = (priv->mb_width * priv->mb_height) / pic->nb_slices;
> +    pic->slice_mod_mbs = (priv->mb_width * priv->mb_height) % pic->nb_slices;
> +    pic->last_mb_index = 0;
>  
>      return 0;
>  }
> @@ -1052,15 +1061,18 @@ static int 
> vaapi_encode_h264_init_slice_params(AVCodecContext *avctx,
>          av_assert0(0 && "invalid picture type");
>      }
>  
> -    // Only one slice per frame.
> -    vslice->macroblock_address = 0;
> -    vslice->num_macroblocks = priv->mb_width * priv->mb_height;
> +    vslice->macroblock_address = pic->last_mb_index;
> +    vslice->num_macroblocks = pic->slice_of_mbs + (pic->slice_mod_mbs > 0 ? 
> 1 : 0);

I think it would be cuter if rounded properly rather than top-loading the 
rounding error.

That is, for N in [0, slices) use:
    macroblock_address = N * total_macroblocks / slices;
    num_macroblocks = (N + 1) * total_macroblocks / slices - macroblock_address;

That makes it cut in half nicely on, for example, four slices in a 7x6 image, 
rather than having a stray macroblock over the centre line.

(That is: 1111111  vs.  1111111
          1111222       1112222
          2222222       2222222
          2333333       3333333
          3333444       3334444
          4444444       4444444 ).

> +    if (pic->slice_mod_mbs > 0)
> +        pic->slice_mod_mbs --;
> +    pic->last_mb_index += vslice->num_macroblocks;
>  
>      vslice->macroblock_info = VA_INVALID_ID;
>  
>      vslice->pic_parameter_set_id = vpic->pic_parameter_set_id;
> -    vslice->idr_pic_id = priv->idr_pic_count++;
> -
> +    vslice->idr_pic_id = priv->idr_pic_count;
> +    if (pic->last_mb_index == priv->mb_width * priv->mb_height)
> +        priv->idr_pic_count ++;
>      vslice->pic_order_cnt_lsb = (pic->display_order - priv->last_idr_frame) &
>          ((1 << (4 + 
> vseq->seq_fields.bits.log2_max_pic_order_cnt_lsb_minus4)) - 1);
>  
> diff --git a/libavcodec/vaapi_encode_h265.c b/libavcodec/vaapi_encode_h265.c
> index 6e008b7b9c..e930026184 100644
> --- a/libavcodec/vaapi_encode_h265.c
> +++ b/libavcodec/vaapi_encode_h265.c
> @@ -1025,7 +1025,15 @@ static int 
> vaapi_encode_h265_init_picture_params(AVCodecContext *avctx,
>          av_assert0(0 && "invalid picture type");
>      }
>  
> -    pic->nb_slices = 1;
> +    if (ctx->multi_slices_available)
> +        avctx->slices = av_clip(avctx->slices, 1, priv->ctu_height);
> +    else
> +        avctx->slices = 1;
> +
> +    pic->nb_slices = avctx->slices;
> +    pic->slice_of_mbs = (priv->ctu_width * priv->ctu_height) / 
> pic->nb_slices;
> +    pic->slice_mod_mbs = (priv->ctu_width * priv->ctu_height) % 
> pic->nb_slices;
> +    pic->last_mb_index = 0;

They are CTUs, don't call them macroblocks.

>  
>      return 0;
>  }
> @@ -1048,9 +1056,13 @@ static int 
> vaapi_encode_h265_init_slice_params(AVCodecContext *avctx,
>      pslice = slice->priv_data;
>      mslice = &pslice->misc_slice_params;
>  
> -    // Currently we only support one slice per frame.
> -    vslice->slice_segment_address = 0;
> -    vslice->num_ctu_in_slice = priv->ctu_width * priv->ctu_height;
> +    vslice->slice_segment_address = pic->last_mb_index;
> +    mslice->slice_segment_address = pic->last_mb_index;

Duplication, oops.  Just remove the one in VAAPIEncodeH265MiscSliceParams, I 
think?

> +    vslice->num_ctu_in_slice = pic->slice_of_mbs + (pic->slice_mod_mbs > 0 ? 
> 1 : 0);

Same point on rounding as above.

> +
> +    if (pic->slice_mod_mbs > 0)
> +        pic->slice_mod_mbs --;
> +    pic->last_mb_index += vslice->num_ctu_in_slice;
>  
>      switch (pic->type) {
>      case PICTURE_TYPE_IDR:
> @@ -1104,9 +1116,13 @@ static int 
> vaapi_encode_h265_init_slice_params(AVCodecContext *avctx,
>      else
>          vslice->slice_qp_delta = priv->fixed_qp_idr - vpic->pic_init_qp;
>  
> -    vslice->slice_fields.bits.last_slice_of_pic_flag = 1;
> +    if (pic->last_mb_index == priv->ctu_width * priv->ctu_height)
> +        vslice->slice_fields.bits.last_slice_of_pic_flag = 1;
>  
> -    mslice->first_slice_segment_in_pic_flag = 1;
> +    if (vslice->slice_segment_address == 0)
> +        mslice->first_slice_segment_in_pic_flag = 1;
> +    else
> +        mslice->first_slice_segment_in_pic_flag = 0;
>  
>      if (pic->type == PICTURE_TYPE_IDR) {
>          // No reference pictures.
> -- 
> 2.11.0
> 
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel

Reply via email to