On 10.09.2024 20:10, Koushik Dutta wrote:
The crop filter has no effect on scale_cuda:

-vf crop=100:100,scale_cuda=300x300

Hardware frames (AV_PIX_FMT_FLAG_HWACCEL) are expected to use the crop_* 
properties,
as seen in the implementation vf_crop.c.

The current workaround is to hwdownload the full frame
and perform the crop on CPU.
---
  libavfilter/vf_scale_cuda.c  | 15 ++++++++++-----
  libavfilter/vf_scale_cuda.cu | 22 ++++++++++++++--------
  2 files changed, 24 insertions(+), 13 deletions(-)

diff --git a/libavfilter/vf_scale_cuda.c b/libavfilter/vf_scale_cuda.c
index 54a340949d..eb8beee771 100644
--- a/libavfilter/vf_scale_cuda.c
+++ b/libavfilter/vf_scale_cuda.c
@@ -407,7 +407,7 @@ fail:
  }
static int call_resize_kernel(AVFilterContext *ctx, CUfunction func,
-                              CUtexObject src_tex[4], int src_width, int 
src_height,
+                              CUtexObject src_tex[4], int src_left, int 
src_top, int src_width, int src_height,
                                AVFrame *out_frame, int dst_width, int 
dst_height, int dst_pitch)
  {
      CUDAScaleContext *s = ctx->priv;
@@ -422,7 +422,7 @@ static int call_resize_kernel(AVFilterContext *ctx, 
CUfunction func,
          &src_tex[0], &src_tex[1], &src_tex[2], &src_tex[3],
          &dst_devptr[0], &dst_devptr[1], &dst_devptr[2], &dst_devptr[3],
          &dst_width, &dst_height, &dst_pitch,
-        &src_width, &src_height, &s->param
+        &src_left, &src_top, &src_width, &src_height, &s->param
      };
return CHECK_CU(cu->cuLaunchKernel(func,
@@ -440,6 +440,9 @@ static int scalecuda_resize(AVFilterContext *ctx,
CUtexObject tex[4] = { 0, 0, 0, 0 }; + int crop_width = (in->width - in->crop_right) - in->crop_left;
+    int crop_height = (in->height - in->crop_bottom) - in->crop_top;

Bit of a nit, but I don't think the parenthesis are neccesary.

+
      ret = CHECK_CU(cu->cuCtxPushCurrent(cuda_ctx));
      if (ret < 0)
          return ret;
@@ -477,7 +480,7 @@ static int scalecuda_resize(AVFilterContext *ctx,
// scale primary plane(s). Usually Y (and A), or single plane of RGB frames.
      ret = call_resize_kernel(ctx, s->cu_func,
-                             tex, in->width, in->height,
+                             tex, in->crop_left, in->crop_top, crop_width, 
crop_height,
                               out, out->width, out->height, out->linesize[0]);
      if (ret < 0)
          goto exit;
@@ -485,8 +488,10 @@ static int scalecuda_resize(AVFilterContext *ctx,
      if (s->out_planes > 1) {
          // scale UV plane. Scale function sets both U and V plane, or 
singular interleaved plane.
          ret = call_resize_kernel(ctx, s->cu_func_uv, tex,
-                                 AV_CEIL_RSHIFT(in->width, 
s->in_desc->log2_chroma_w),
-                                 AV_CEIL_RSHIFT(in->height, 
s->in_desc->log2_chroma_h),
+                                 AV_CEIL_RSHIFT(in->crop_left, 
s->in_desc->log2_chroma_w),
+                                 AV_CEIL_RSHIFT(in->crop_top, 
s->in_desc->log2_chroma_h),
+                                 AV_CEIL_RSHIFT(crop_width, 
s->in_desc->log2_chroma_w),
+                                 AV_CEIL_RSHIFT(crop_height, 
s->in_desc->log2_chroma_h),
                                   out,
                                   AV_CEIL_RSHIFT(out->width, 
s->out_desc->log2_chroma_w),
                                   AV_CEIL_RSHIFT(out->height, 
s->out_desc->log2_chroma_h),
diff --git a/libavfilter/vf_scale_cuda.cu b/libavfilter/vf_scale_cuda.cu
index de06ba9433..271b55cd5d 100644
--- a/libavfilter/vf_scale_cuda.cu
+++ b/libavfilter/vf_scale_cuda.cu
@@ -26,6 +26,7 @@
  template<typename T>
  using subsample_function_t = T (*)(cudaTextureObject_t tex, int xo, int yo,
                                     int dst_width, int dst_height,
+                                   int src_left, int src_top,
                                     int src_width, int src_height,
                                     int bit_depth, float param);
@@ -64,11 +65,12 @@ static inline __device__ ushort conv_16to10(ushort in)
               subsample_function_t<in_T_uv> subsample_func_uv>                 
                 \
      __device__ static inline void N(cudaTextureObject_t src_tex[4], T 
*dst[4], int xo, int yo, \
                                      int dst_width, int dst_height, int 
dst_pitch,              \
-                                    int src_width, int src_height, float param)
+                                    int src_left, int src_top, int src_width, 
int src_height, float param)
#define SUB_F(m, plane) \
      subsample_func_##m(src_tex[plane], xo, yo, \
                         dst_width, dst_height,  \
+                       src_left, src_top,      \
                         src_width, src_height,  \
                         in_bit_depth, param)
@@ -1063,13 +1065,14 @@ template<typename T>
  __device__ static inline T Subsample_Nearest(cudaTextureObject_t tex,
                                               int xo, int yo,
                                               int dst_width, int dst_height,
+                                             int src_left, int src_top,
                                               int src_width, int src_height,
                                               int bit_depth, float param)
  {
      float hscale = (float)src_width / (float)dst_width;
      float vscale = (float)src_height / (float)dst_height;
-    float xi = (xo + 0.5f) * hscale;
-    float yi = (yo + 0.5f) * vscale;
+    float xi = (xo + 0.5f) * hscale + src_left;
+    float yi = (yo + 0.5f) * vscale + src_top;
return tex2D<T>(tex, xi, yi);
  }
@@ -1078,13 +1081,14 @@ template<typename T>
  __device__ static inline T Subsample_Bilinear(cudaTextureObject_t tex,
                                                int xo, int yo,
                                                int dst_width, int dst_height,
+                                              int src_left, int src_top,
                                                int src_width, int src_height,
                                                int bit_depth, float param)
  {
      float hscale = (float)src_width / (float)dst_width;
      float vscale = (float)src_height / (float)dst_height;
-    float xi = (xo + 0.5f) * hscale;
-    float yi = (yo + 0.5f) * vscale;
+    float xi = (xo + 0.5f) * hscale + src_left;
+    float yi = (yo + 0.5f) * vscale + src_top;
      // 3-tap filter weights are {wh,1.0,wh} and {wv,1.0,wv}
      float wh = min(max(0.5f * (hscale - 1.0f), 0.0f), 1.0f);
      float wv = min(max(0.5f * (vscale - 1.0f), 0.0f), 1.0f);
@@ -1109,13 +1113,14 @@ template<typename T, coeffs_function_t coeffs_function>
  __device__ static inline T Subsample_Bicubic(cudaTextureObject_t tex,
                                               int xo, int yo,
                                               int dst_width, int dst_height,
+                                             int src_left, int src_top,
                                               int src_width, int src_height,
                                               int bit_depth, float param)
  {
      float hscale = (float)src_width / (float)dst_width;
      float vscale = (float)src_height / (float)dst_height;
-    float xi = (xo + 0.5f) * hscale - 0.5f;
-    float yi = (yo + 0.5f) * vscale - 0.5f;
+    float xi = (xo + 0.5f) * hscale - 0.5f + src_left;
+    float yi = (yo + 0.5f) * vscale - 0.5f + src_top;
      float px = floor(xi);
      float py = floor(yi);
      float fx = xi - px;
@@ -1147,7 +1152,7 @@ __device__ static inline T 
Subsample_Bicubic(cudaTextureObject_t tex,
      cudaTextureObject_t src_tex_2, cudaTextureObject_t src_tex_3, \
      T *dst_0, T *dst_1, T *dst_2, T *dst_3,                       \
      int dst_width, int dst_height, int dst_pitch,                 \
-    int src_width, int src_height, float param
+    int src_left, int src_top, int src_width, int src_height, float param
#define SUBSAMPLE(Convert, T) \
      cudaTextureObject_t src_tex[4] =                    \
@@ -1159,6 +1164,7 @@ __device__ static inline T 
Subsample_Bicubic(cudaTextureObject_t tex,
      Convert(                                            \
          src_tex, dst, xo, yo,                           \
          dst_width, dst_height, dst_pitch,               \
+        src_left, src_top,                              \
          src_width, src_height, param);
extern "C" {

Looks good to me otherwise, will give it a test later.
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

Reply via email to