This is an automated email from the git hooks/post-receive script. Git pushed a commit to branch release/8.0 in repository ffmpeg.
commit b445273608118a0595013505ef665098ffd47220 Author: Niklas Haas <[email protected]> AuthorDate: Mon Jun 22 18:51:01 2026 +0200 Commit: Marvin Scholz <[email protected]> CommitDate: Mon Jun 29 14:10:13 2026 +0200 avfilter/vf_scale_cuda: add generic 1D filter kernel This can be useful for any sort of separable filtering with arbitrary weights. Signed-off-by: Niklas Haas <[email protected]> (cherry-picked from commit 469281fa50eea53c806fc5a4f1fe38e311c582c0) Signed-off-by: Marvin Scholz <[email protected]> --- libavfilter/vf_scale_cuda.cu | 79 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 79 insertions(+) diff --git a/libavfilter/vf_scale_cuda.cu b/libavfilter/vf_scale_cuda.cu index a2a38b7cd8..211ac8dcb9 100644 --- a/libavfilter/vf_scale_cuda.cu +++ b/libavfilter/vf_scale_cuda.cu @@ -1183,6 +1183,43 @@ __device__ static inline T Subsample_Bicubic(cudaTextureObject_t tex, #undef PIX } +enum ScaleDir { + SCALE_DIR_X, + SCALE_DIR_Y, +}; + +template<typename T, int dir> +__device__ static inline T Subsample_Generic(cudaTextureObject_t tex, + int xo, int yo, + int dst_width, int dst_height, + int src_left, int src_top, + int src_width, int src_height, + int bit_depth, float param, + const float *weights, const int *offsets, + int filter_size) +{ + const float factor = bit_depth > 8 ? 0xFFFF : 0xFF; + + floatT sum; + vec_set_scalar(sum, 0.0f); + + if (dir == SCALE_DIR_X) { + const float *row = &weights[xo * filter_size]; + const float x = 0.5f + src_left + offsets[xo]; + const float y = 0.5f + src_top + yo; + for (int i = 0; i < filter_size; i++) + sum += tex2D<floatT>(tex, x + i, y) * row[i]; + } else { + const float *col = &weights[yo * filter_size]; + const float x = 0.5f + src_left + xo; + const float y = 0.5f + src_top + offsets[yo]; + for (int i = 0; i < filter_size; i++) + sum += tex2D<floatT>(tex, x, y + i) * col[i]; + } + + return from_floatN<T, floatT>(sum * factor); +} + /// --- FUNCTION EXPORTS --- #define KERNEL_ARGS(T) CUDAScaleKernelParams params @@ -1370,4 +1407,46 @@ LANCZOS_KERNELS_RGB(rgb0) LANCZOS_KERNELS_RGB(bgr0) LANCZOS_KERNELS_RGB(rgba) LANCZOS_KERNELS_RGB(bgra) + +#define GENERIC_KERNEL(D, DIR, C, S) \ + __global__ void Subsample_Generic_##D##_##C##S( \ + KERNEL_ARGS(Convert_##C::out_T##S)) \ + { \ + SUBSAMPLE((Convert_##C::Convert##S< \ + Subsample_Generic<Convert_##C::in_T, DIR>, \ + Subsample_Generic<Convert_##C::in_T_uv, DIR> >), \ + Convert_##C::out_T##S) \ + } + +#define GENERIC_KERNEL_RAW(C) \ + GENERIC_KERNEL(h, SCALE_DIR_X, C,) \ + GENERIC_KERNEL(h, SCALE_DIR_X, C,_uv) \ + GENERIC_KERNEL(v, SCALE_DIR_Y, C,) \ + GENERIC_KERNEL(v, SCALE_DIR_Y, C,_uv) + +#define GENERIC_KERNELS(C) \ + GENERIC_KERNEL_RAW(planar8_ ## C) \ + GENERIC_KERNEL_RAW(planar10_ ## C) \ + GENERIC_KERNEL_RAW(planar16_ ## C) \ + GENERIC_KERNEL_RAW(semiplanar8_ ## C) \ + GENERIC_KERNEL_RAW(semiplanar10_ ## C) \ + GENERIC_KERNEL_RAW(semiplanar16_ ## C) + +#define GENERIC_KERNELS_RGB(C) \ + GENERIC_KERNEL_RAW(rgb0_ ## C) \ + GENERIC_KERNEL_RAW(bgr0_ ## C) \ + GENERIC_KERNEL_RAW(rgba_ ## C) \ + GENERIC_KERNEL_RAW(bgra_ ## C) + +GENERIC_KERNELS(planar8) +GENERIC_KERNELS(planar10) +GENERIC_KERNELS(planar16) +GENERIC_KERNELS(semiplanar8) +GENERIC_KERNELS(semiplanar10) +GENERIC_KERNELS(semiplanar16) + +GENERIC_KERNELS_RGB(rgb0) +GENERIC_KERNELS_RGB(bgr0) +GENERIC_KERNELS_RGB(rgba) +GENERIC_KERNELS_RGB(bgra) } _______________________________________________ ffmpeg-cvslog mailing list -- [email protected] To unsubscribe send an email to [email protected]
