On 29/05/2024 00:53, Wu, Tong1 wrote:
From: ffmpeg-devel <ffmpeg-devel-boun...@ffmpeg.org> On Behalf Of Lynne
via ffmpeg-devel
Sent: Wednesday, May 29, 2024 1:08 AM
To: ffmpeg-devel@ffmpeg.org
Cc: Lynne <d...@lynne.ee>
Subject: Re: [FFmpeg-devel] [PATCH v12 15/15] avcodec/hw_base_encode: add
avctx pointer for FFHWBaseEncodeContext

On 28/05/2024 17:48, tong1.wu-at-intel....@ffmpeg.org wrote:
From: Tong Wu <tong1...@intel.com>

An avctx pointer is added to FFHWBaseEncodeContext. This is to make
FFHWBaseEncodeContext a standalone component for ff_hw_base_*
functions.
This patch also removes some unnecessary AVCodecContext arguments.

Signed-off-by: Tong Wu <tong1...@intel.com>
---
   libavcodec/d3d12va_encode.c |  6 +++---
   libavcodec/hw_base_encode.c | 31 +++++++++++++------------------
   libavcodec/hw_base_encode.h |  8 +++++---
   libavcodec/vaapi_encode.c   |  6 +++---
   4 files changed, 24 insertions(+), 27 deletions(-)

diff --git a/libavcodec/d3d12va_encode.c b/libavcodec/d3d12va_encode.c
index 0fbf8eb07c..6d3a53c6ca 100644
--- a/libavcodec/d3d12va_encode.c
+++ b/libavcodec/d3d12va_encode.c
@@ -1351,7 +1351,7 @@ static int
d3d12va_encode_create_recon_frames(AVCodecContext *avctx)
       enum AVPixelFormat recon_format;
       int err;

-    err = ff_hw_base_get_recon_format(avctx, NULL, &recon_format);
+    err = ff_hw_base_get_recon_format(base_ctx, NULL, &recon_format);
       if (err < 0)
           return err;

@@ -1398,7 +1398,7 @@ int ff_d3d12va_encode_init(AVCodecContext
*avctx)
       int err;
       HRESULT hr;

-    err = ff_hw_base_encode_init(avctx);
+    err = ff_hw_base_encode_init(avctx, base_ctx);
       if (err < 0)
           goto fail;

@@ -1552,7 +1552,7 @@ int ff_d3d12va_encode_close(AVCodecContext
*avctx)
       D3D12_OBJECT_RELEASE(ctx->video_device3);
       D3D12_OBJECT_RELEASE(ctx->device3);

-    ff_hw_base_encode_close(avctx);
+    ff_hw_base_encode_close(base_ctx);

       return 0;
   }
diff --git a/libavcodec/hw_base_encode.c b/libavcodec/hw_base_encode.c
index 92f69bb78c..88efdf672c 100644
--- a/libavcodec/hw_base_encode.c
+++ b/libavcodec/hw_base_encode.c
@@ -94,14 +94,13 @@ static void
hw_base_encode_remove_refs(FFHWBaseEncodePicture *pic, int level)
       pic->ref_removed[level] = 1;
   }

-static void hw_base_encode_set_b_pictures(AVCodecContext *avctx,
+static void hw_base_encode_set_b_pictures(FFHWBaseEncodeContext *ctx,
                                             FFHWBaseEncodePicture *start,
                                             FFHWBaseEncodePicture *end,
                                             FFHWBaseEncodePicture *prev,
                                             int current_depth,
                                             FFHWBaseEncodePicture **last)
   {
-    FFHWBaseEncodeContext *ctx = avctx->priv_data;
       FFHWBaseEncodePicture *pic, *next, *ref;
       int i, len;

@@ -148,20 +147,19 @@ static void
hw_base_encode_set_b_pictures(AVCodecContext *avctx,
               hw_base_encode_add_ref(pic, ref, 0, 1, 0);

           if (i > 1)
-            hw_base_encode_set_b_pictures(avctx, start, pic, pic,
+            hw_base_encode_set_b_pictures(ctx, start, pic, pic,
                                             current_depth + 1, &next);
           else
               next = pic;

-        hw_base_encode_set_b_pictures(avctx, pic, end, next,
+        hw_base_encode_set_b_pictures(ctx, pic, end, next,
                                         current_depth + 1, last);
       }
   }

-static void hw_base_encode_add_next_prev(AVCodecContext *avctx,
+static void hw_base_encode_add_next_prev(FFHWBaseEncodeContext
*ctx,
                                            FFHWBaseEncodePicture *pic)
   {
-    FFHWBaseEncodeContext *ctx = avctx->priv_data;
       int i;

       if (!pic)
@@ -333,12 +331,12 @@ static int
hw_base_encode_pick_next(AVCodecContext *avctx,
       }

       if (b_counter > 0) {
-        hw_base_encode_set_b_pictures(avctx, start, pic, pic, 1,
+        hw_base_encode_set_b_pictures(ctx, start, pic, pic, 1,
                                         &prev);
       } else {
           prev = pic;
       }
-    hw_base_encode_add_next_prev(avctx, prev);
+    hw_base_encode_add_next_prev(ctx, prev);

       return 0;
   }
@@ -687,9 +685,9 @@ int ff_hw_base_init_gop_structure(AVCodecContext
*avctx, uint32_t ref_l0, uint32
       return 0;
   }

-int ff_hw_base_get_recon_format(AVCodecContext *avctx, const void
*hwconfig, enum AVPixelFormat *fmt)
+int ff_hw_base_get_recon_format(FFHWBaseEncodeContext *ctx, const
void *hwconfig,
+                                enum AVPixelFormat *fmt)
   {
-    FFHWBaseEncodeContext *ctx = avctx->priv_data;
       AVHWFramesConstraints *constraints = NULL;
       enum AVPixelFormat recon_format;
       int err, i;
@@ -722,14 +720,14 @@ int
ff_hw_base_get_recon_format(AVCodecContext *avctx, const void *hwconfig,
enu
           // No idea what to use; copy input format.
           recon_format = ctx->input_frames->sw_format;
       }
-    av_log(avctx, AV_LOG_DEBUG, "Using %s as format of "
+    av_log(ctx->avctx, AV_LOG_DEBUG, "Using %s as format of "
              "reconstructed frames.\n", av_get_pix_fmt_name(recon_format));

       if (ctx->surface_width  < constraints->min_width  ||
           ctx->surface_height < constraints->min_height ||
           ctx->surface_width  > constraints->max_width ||
           ctx->surface_height > constraints->max_height) {
-        av_log(avctx, AV_LOG_ERROR, "Hardware does not support encoding at
"
+        av_log(ctx->avctx, AV_LOG_ERROR, "Hardware does not support
encoding at "
                  "size %dx%d (constraints: width %d-%d height %d-%d).\n",
                  ctx->surface_width, ctx->surface_height,
                  constraints->min_width,  constraints->max_width,
@@ -756,10 +754,9 @@ int
ff_hw_base_encode_free(FFHWBaseEncodePicture *pic)
       return 0;
   }

-int ff_hw_base_encode_init(AVCodecContext *avctx)
+int ff_hw_base_encode_init(AVCodecContext *avctx,
FFHWBaseEncodeContext *ctx)
   {
-    FFHWBaseEncodeContext *ctx = avctx->priv_data;
-
+    ctx->avctx = avctx;
       ctx->frame = av_frame_alloc();
       if (!ctx->frame)
           return AVERROR(ENOMEM);
@@ -789,10 +786,8 @@ int ff_hw_base_encode_init(AVCodecContext
*avctx)
       return 0;
   }

-int ff_hw_base_encode_close(AVCodecContext *avctx)
+int ff_hw_base_encode_close(FFHWBaseEncodeContext *ctx)
   {
-    FFHWBaseEncodeContext *ctx = avctx->priv_data;
-
       av_fifo_freep2(&ctx->encode_fifo);

       av_frame_free(&ctx->frame);
diff --git a/libavcodec/hw_base_encode.h b/libavcodec/hw_base_encode.h
index 15ef3d7ac6..13c1fc0f69 100644
--- a/libavcodec/hw_base_encode.h
+++ b/libavcodec/hw_base_encode.h
@@ -116,6 +116,7 @@ typedef struct FFHWEncodePictureOperation {

   typedef struct FFHWBaseEncodeContext {
       const AVClass *class;
+    AVCodecContext *avctx;

       // Hardware-specific hooks.
       const struct FFHWEncodePictureOperation *op;
@@ -222,13 +223,14 @@ int
ff_hw_base_encode_receive_packet(AVCodecContext *avctx, AVPacket *pkt);
   int ff_hw_base_init_gop_structure(AVCodecContext *avctx, uint32_t ref_l0,
uint32_t ref_l1,
                                     int flags, int prediction_pre_only);

-int ff_hw_base_get_recon_format(AVCodecContext *avctx, const void
*hwconfig, enum AVPixelFormat *fmt);
+int ff_hw_base_get_recon_format(FFHWBaseEncodeContext *ctx, const
void *hwconfig,
+                                enum AVPixelFormat *fmt);

   int ff_hw_base_encode_free(FFHWBaseEncodePicture *pic);

-int ff_hw_base_encode_init(AVCodecContext *avctx);
+int ff_hw_base_encode_init(AVCodecContext *avctx,
FFHWBaseEncodeContext *ctx);

-int ff_hw_base_encode_close(AVCodecContext *avctx);
+int ff_hw_base_encode_close(FFHWBaseEncodeContext *ctx);

   #define HW_BASE_ENCODE_COMMON_OPTIONS \
       { "idr_interval", \
diff --git a/libavcodec/vaapi_encode.c b/libavcodec/vaapi_encode.c
index b35a23e852..0693e77548 100644
--- a/libavcodec/vaapi_encode.c
+++ b/libavcodec/vaapi_encode.c
@@ -2059,7 +2059,7 @@ static av_cold int
vaapi_encode_create_recon_frames(AVCodecContext *avctx)
       }
       hwconfig->config_id = ctx->va_config;

-    err = ff_hw_base_get_recon_format(avctx, (const void*)hwconfig,
&recon_format);
+    err = ff_hw_base_get_recon_format(base_ctx, (const void*)hwconfig,
&recon_format);
       if (err < 0)
           goto fail;

@@ -2106,7 +2106,7 @@ av_cold int ff_vaapi_encode_init(AVCodecContext
*avctx)
       VAStatus vas;
       int err;

-    err = ff_hw_base_encode_init(avctx);
+    err = ff_hw_base_encode_init(avctx, base_ctx);
       if (err < 0)
           goto fail;

@@ -2313,7 +2313,7 @@ av_cold int
ff_vaapi_encode_close(AVCodecContext *avctx)
       av_freep(&ctx->codec_sequence_params);
       av_freep(&ctx->codec_picture_params);

-    ff_hw_base_encode_close(avctx);
+    ff_hw_base_encode_close(base_ctx);

       return 0;
   }

Err, you missed ff_hw_base_encode_set_output_property,
ff_hw_base_encode_receive_packet and ff_hw_base_init_gop_structure?

Rest looks better.

ff_hw_base_encode_receive_packet is directly bind to int (*receive_packet)(struct 
AVCodecContext *avctx, struct AVPacket *avpkt); I thought maybe this should not be 
changed? For the other two functions, components in avctx are used. If they need to 
be changed, I could either take both FFHWBaseEncodeContext and AVCodecContext as 
arguments or use ctx->avctx->* to get the components which one do you think 
better. Thank you.

Take both FFHWBaseEncodeContext and AVCodecContext as arguments, and use it to get fields such as gop_size, max_b_frames and so on.

*Do not use* avctx->priv_data anywhere in hw_base_encode. Any usage of it has to be switched to a user-given FFHWBaseEncodeContext via a function argument.

ff_hw_base_encode_receive_packet needs to be switched to (FFHWBaseEncodeContext, AVCodecContext...) as well, as it uses avctx->priv_data.

Hardware encoders should have small wrappers for receive_packet which would call ff_hw_base_encode_receive_packet. It's not much, just 3 lines.

Hardware encoders could still use FFHWBaseEncodeContext as their own main encode context, but they would no longer be required to use it as their main context. It'll make sense with some code:

For example, d3d12va_encode's wrapper function would be:
static int receive_packet(avctx...)
{
    return ff_hw_base_encode_receive_packet(avctx->priv_data, avctx...)
}

Vulkan's wrapper would, on the other hand look like:
static int receive_packet(avctx...)
{
    VulkanEncodeContext *vkenc = avctx->priv_data;
    return ff_hw_base_encode_receive_packet(vkenc->hw_base, avctx...)
}

You should also rename FFHWBaseEncodeContext.avctx to FFHWBaseEncodeContext.log_ctx with a type of void *, and use it for av_log where needed. It would be set to avctx in ff_hw_base_encode_init(), but encoders could override it if they needed to by modifying the context. If frame threading is ever implemented, this change would make it easier, as the context for each call would be exlicitly known.

Thanks

Attachment: OpenPGP_0xA2FEA5F03F034464.asc
Description: OpenPGP public key

Attachment: OpenPGP_signature.asc
Description: OpenPGP digital signature

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

Reply via email to