The branch, master has been updated
via 75b85675916337bc0993f02a70279b96e5903153 (commit)
via 04b5e25d353a4b9de2a1bf0802a8138d42dfe85b (commit)
via 9c76d7db863c11ff968d71e4970fc2359303b65f (commit)
from 0cd75dbfa0fc6c213cf9240b3c03c809070c5209 (commit)
- Log -----------------------------------------------------------------
commit 75b85675916337bc0993f02a70279b96e5903153
Author: Diego de Souza <[email protected]>
AuthorDate: Tue Nov 18 17:16:43 2025 +0100
Commit: Timo Rothenpieler <[email protected]>
CommitDate: Thu Nov 27 22:11:57 2025 +0100
avfilter/scale_cuda: Add support for 4:2:2 chroma subsampling
The supported YUV pixel formats were separated between planar
and semiplanar. This approach reduces the number of CUDA kernels
for all pixel formats.
This patch:
1. Adds support for YUV 4:2:2 planar and semi-planar formats:
yuv422p, yuv422p10, nv16, p210, p216
2. Implements new conversion structures and kernel definitions
for planar and semi-planar formats
Signed-off-by: Diego de Souza <[email protected]>
diff --git a/libavfilter/Makefile b/libavfilter/Makefile
index d56a458e45..67814c0d77 100644
--- a/libavfilter/Makefile
+++ b/libavfilter/Makefile
@@ -678,6 +678,7 @@ SKIPHEADERS-$(CONFIG_QSVVPP) += qsvvpp.h
stack_internal.h
SKIPHEADERS-$(CONFIG_OPENCL) += opencl.h
SKIPHEADERS-$(CONFIG_VAAPI) += vaapi_vpp.h stack_internal.h
SKIPHEADERS-$(CONFIG_VULKAN) += vulkan_filter.h
+SKIPHEADERS-$(CONFIG_SCALE_CUDA_FILTER) += vf_scale_cuda.h
TOOLS = graph2dot
TESTPROGS = drawutils filtfmts formats integral
diff --git a/libavfilter/version.h b/libavfilter/version.h
index 4a69d6be98..776321d1fc 100644
--- a/libavfilter/version.h
+++ b/libavfilter/version.h
@@ -32,7 +32,7 @@
#include "version_major.h"
#define LIBAVFILTER_VERSION_MINOR 10
-#define LIBAVFILTER_VERSION_MICRO 100
+#define LIBAVFILTER_VERSION_MICRO 101
#define LIBAVFILTER_VERSION_INT AV_VERSION_INT(LIBAVFILTER_VERSION_MAJOR, \
diff --git a/libavfilter/vf_scale_cuda.c b/libavfilter/vf_scale_cuda.c
index 88a6e20610..5fd757161b 100644
--- a/libavfilter/vf_scale_cuda.c
+++ b/libavfilter/vf_scale_cuda.c
@@ -39,17 +39,29 @@
#include "cuda/load_helper.h"
#include "vf_scale_cuda.h"
-static const enum AVPixelFormat supported_formats[] = {
- AV_PIX_FMT_YUV420P,
- AV_PIX_FMT_NV12,
- AV_PIX_FMT_YUV444P,
- AV_PIX_FMT_P010,
- AV_PIX_FMT_P016,
- AV_PIX_FMT_YUV444P16,
- AV_PIX_FMT_0RGB32,
- AV_PIX_FMT_0BGR32,
- AV_PIX_FMT_RGB32,
- AV_PIX_FMT_BGR32,
+struct format_entry {
+ enum AVPixelFormat format;
+ char name[13];
+};
+
+static const struct format_entry supported_formats[] = {
+ {AV_PIX_FMT_YUV420P, "planar8"},
+ {AV_PIX_FMT_YUV422P, "planar8"},
+ {AV_PIX_FMT_YUV444P, "planar8"},
+ {AV_PIX_FMT_YUV420P10,"planar10"},
+ {AV_PIX_FMT_YUV422P10,"planar10"},
+ {AV_PIX_FMT_YUV444P10,"planar10"},
+ {AV_PIX_FMT_YUV444P16,"planar16"},
+ {AV_PIX_FMT_NV12, "semiplanar8"},
+ {AV_PIX_FMT_NV16, "semiplanar8"},
+ {AV_PIX_FMT_P010, "semiplanar10"},
+ {AV_PIX_FMT_P210, "semiplanar10"},
+ {AV_PIX_FMT_P016, "semiplanar16"},
+ {AV_PIX_FMT_P216, "semiplanar16"},
+ {AV_PIX_FMT_0RGB32, "bgr0"},
+ {AV_PIX_FMT_0BGR32, "rgb0"},
+ {AV_PIX_FMT_RGB32, "bgra"},
+ {AV_PIX_FMT_BGR32, "rgba"},
};
#define DIV_UP(a, b) ( ((a) + (b) - 1) / (b) )
@@ -184,14 +196,20 @@ fail:
static int format_is_supported(enum AVPixelFormat fmt)
{
- int i;
-
- for (i = 0; i < FF_ARRAY_ELEMS(supported_formats); i++)
- if (supported_formats[i] == fmt)
+ for (int i = 0; i < FF_ARRAY_ELEMS(supported_formats); i++)
+ if (supported_formats[i].format == fmt)
return 1;
return 0;
}
+static const char* get_format_name(enum AVPixelFormat fmt)
+{
+ for (int i = 0; i < FF_ARRAY_ELEMS(supported_formats); i++)
+ if (supported_formats[i].format == fmt)
+ return supported_formats[i].name;
+ return NULL;
+}
+
static av_cold void set_format_info(AVFilterContext *ctx, enum AVPixelFormat
in_format, enum AVPixelFormat out_format)
{
CUDAScaleContext *s = ctx->priv;
@@ -284,8 +302,8 @@ static av_cold int cudascale_load_functions(AVFilterContext
*ctx)
char buf[128];
int ret;
- const char *in_fmt_name = av_get_pix_fmt_name(s->in_fmt);
- const char *out_fmt_name = av_get_pix_fmt_name(s->out_fmt);
+ const char *in_fmt_name = get_format_name(s->in_fmt);
+ const char *out_fmt_name = get_format_name(s->out_fmt);
const char *function_infix = "";
@@ -335,11 +353,13 @@ static av_cold int
cudascale_load_functions(AVFilterContext *ctx)
ret = AVERROR(ENOSYS);
goto fail;
}
+ av_log(ctx, AV_LOG_DEBUG, "Luma filter: %s (%s -> %s)\n", buf,
av_get_pix_fmt_name(s->in_fmt), av_get_pix_fmt_name(s->out_fmt));
snprintf(buf, sizeof(buf), "Subsample_%s_%s_%s_uv", function_infix,
in_fmt_name, out_fmt_name);
ret = CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_uv, s->cu_module, buf));
if (ret < 0)
goto fail;
+ av_log(ctx, AV_LOG_DEBUG, "Chroma filter: %s (%s -> %s)\n", buf,
av_get_pix_fmt_name(s->in_fmt), av_get_pix_fmt_name(s->out_fmt));
fail:
CHECK_CU(cu->cuCtxPopCurrent(&dummy));
@@ -416,26 +436,35 @@ fail:
static int call_resize_kernel(AVFilterContext *ctx, CUfunction func,
CUtexObject src_tex[4], int src_left, int
src_top, int src_width, int src_height,
- AVFrame *out_frame, int dst_width, int
dst_height, int dst_pitch)
+ AVFrame *out_frame, int dst_width, int
dst_height, int dst_pitch, int mpeg_range)
{
CUDAScaleContext *s = ctx->priv;
CudaFunctions *cu = s->hwctx->internal->cuda_dl;
- CUdeviceptr dst_devptr[4] = {
- (CUdeviceptr)out_frame->data[0], (CUdeviceptr)out_frame->data[1],
- (CUdeviceptr)out_frame->data[2], (CUdeviceptr)out_frame->data[3]
+ CUDAScaleKernelParams params = {
+ .src_tex = {src_tex[0], src_tex[1], src_tex[2], src_tex[3]},
+ .dst = {
+ (CUdeviceptr)out_frame->data[0],
+ (CUdeviceptr)out_frame->data[1],
+ (CUdeviceptr)out_frame->data[2],
+ (CUdeviceptr)out_frame->data[3]
+ },
+ .dst_width = dst_width,
+ .dst_height = dst_height,
+ .dst_pitch = dst_pitch,
+ .src_left = src_left,
+ .src_top = src_top,
+ .src_width = src_width,
+ .src_height = src_height,
+ .param = s->param,
+ .mpeg_range = mpeg_range
};
- void *args_uchar[] = {
- &src_tex[0], &src_tex[1], &src_tex[2], &src_tex[3],
- &dst_devptr[0], &dst_devptr[1], &dst_devptr[2], &dst_devptr[3],
- &dst_width, &dst_height, &dst_pitch,
- &src_left, &src_top, &src_width, &src_height, &s->param
- };
+ void *args[] = { ¶ms };
return CHECK_CU(cu->cuLaunchKernel(func,
DIV_UP(dst_width, BLOCKX),
DIV_UP(dst_height, BLOCKY), 1,
- BLOCKX, BLOCKY, 1, 0, s->cu_stream,
args_uchar, NULL));
+ BLOCKX, BLOCKY, 1, 0, s->cu_stream,
args, NULL));
}
static int scalecuda_resize(AVFilterContext *ctx,
@@ -445,6 +474,7 @@ static int scalecuda_resize(AVFilterContext *ctx,
CudaFunctions *cu = s->hwctx->internal->cuda_dl;
CUcontext dummy, cuda_ctx = s->hwctx->cuda_ctx;
int i, ret;
+ int mpeg_range = in->color_range != AVCOL_RANGE_JPEG;
CUtexObject tex[4] = { 0, 0, 0, 0 };
@@ -489,7 +519,7 @@ static int scalecuda_resize(AVFilterContext *ctx,
// scale primary plane(s). Usually Y (and A), or single plane of RGB
frames.
ret = call_resize_kernel(ctx, s->cu_func,
tex, in->crop_left, in->crop_top, crop_width,
crop_height,
- out, out->width, out->height, out->linesize[0]);
+ out, out->width, out->height, out->linesize[0],
mpeg_range);
if (ret < 0)
goto exit;
@@ -503,7 +533,7 @@ static int scalecuda_resize(AVFilterContext *ctx,
out,
AV_CEIL_RSHIFT(out->width,
s->out_desc->log2_chroma_w),
AV_CEIL_RSHIFT(out->height,
s->out_desc->log2_chroma_h),
- out->linesize[1]);
+ out->linesize[1], mpeg_range);
if (ret < 0)
goto exit;
}
diff --git a/libavfilter/vf_scale_cuda.cu b/libavfilter/vf_scale_cuda.cu
index 271b55cd5d..d674c0885a 100644
--- a/libavfilter/vf_scale_cuda.cu
+++ b/libavfilter/vf_scale_cuda.cu
@@ -35,9 +35,16 @@ using subsample_function_t = T (*)(cudaTextureObject_t tex,
int xo, int yo,
static const ushort mask_10bit = 0xFFC0;
static const ushort mask_16bit = 0xFFFF;
-static inline __device__ ushort conv_8to16(uchar in, ushort mask)
+static inline __device__ ushort conv_8to16(uchar in, ushort mask, int
mpeg_range)
{
- return ((ushort)in | ((ushort)in << 8)) & mask;
+ ushort shifted = (ushort)in << 8;
+ return mpeg_range ? shifted : ((shifted | ((ushort)in )) & mask);
+}
+
+static inline __device__ ushort conv_8to10pl(uchar in, int mpeg_range)
+{
+ ushort shifted = (ushort)in << 2;
+ return mpeg_range ? shifted : (shifted | ((ushort)in >> 6));
}
static inline __device__ uchar conv_16to8(ushort in)
@@ -50,9 +57,21 @@ static inline __device__ uchar conv_10to8(ushort in)
return in >> 8;
}
-static inline __device__ ushort conv_10to16(ushort in)
+static inline __device__ uchar conv_10to8pl(ushort in)
+{
+ return in >> 2;
+}
+
+static inline __device__ ushort conv_10to16(ushort in, int mpeg_range)
{
- return in | (in >> 10);
+ ushort shifted = (in >> 10);
+ return mpeg_range ? in : (in | shifted);
+}
+
+static inline __device__ ushort conv_10to16pl(ushort in, int mpeg_range)
+{
+ ushort shifted = (in << 6);
+ return mpeg_range ? shifted : (shifted | (in >> 4));
}
static inline __device__ ushort conv_16to10(ushort in)
@@ -60,12 +79,18 @@ static inline __device__ ushort conv_16to10(ushort in)
return in & mask_10bit;
}
+static inline __device__ ushort conv_16to10pl(ushort in)
+{
+ return in >> 6;
+}
+
#define DEF_F(N, T) \
template<subsample_function_t<in_T> subsample_func_y,
\
subsample_function_t<in_T_uv> subsample_func_uv>
\
__device__ static inline void N(cudaTextureObject_t src_tex[4], T *dst[4],
int xo, int yo, \
int dst_width, int dst_height, int
dst_pitch, \
- int src_left, int src_top, int src_width,
int src_height, float param)
+ int src_left, int src_top, int src_width,
int src_height, \
+ float param, int mpeg_range)
#define SUB_F(m, plane) \
subsample_func_##m(src_tex[plane], xo, yo, \
@@ -81,9 +106,9 @@ static inline __device__ ushort conv_16to10(ushort in)
#define DEFAULT_DST(n) \
dst[n][yo*FIXED_PITCH+xo]
-// yuv420p->X
+// planar8->X
-struct Convert_yuv420p_yuv420p
+struct Convert_planar8_planar8
{
static const int in_bit_depth = 8;
typedef uchar in_T;
@@ -103,71 +128,69 @@ struct Convert_yuv420p_yuv420p
}
};
-struct Convert_yuv420p_nv12
+struct Convert_planar8_planar10
{
static const int in_bit_depth = 8;
typedef uchar in_T;
typedef uchar in_T_uv;
- typedef uchar out_T;
- typedef uchar2 out_T_uv;
+ typedef ushort out_T;
+ typedef ushort out_T_uv;
DEF_F(Convert, out_T)
{
- DEFAULT_DST(0) = SUB_F(y, 0);
+ DEFAULT_DST(0) = conv_8to10pl(SUB_F(y, 0), mpeg_range);
}
DEF_F(Convert_uv, out_T_uv)
{
- DEFAULT_DST(1) = make_uchar2(
- SUB_F(uv, 1),
- SUB_F(uv, 2)
- );
+ DEFAULT_DST(1) = conv_8to10pl(SUB_F(uv, 1), mpeg_range);
+ DEFAULT_DST(2) = conv_8to10pl(SUB_F(uv, 2), mpeg_range);
}
};
-struct Convert_yuv420p_yuv444p
+struct Convert_planar8_planar16
{
static const int in_bit_depth = 8;
typedef uchar in_T;
typedef uchar in_T_uv;
- typedef uchar out_T;
- typedef uchar out_T_uv;
+ typedef ushort out_T;
+ typedef ushort out_T_uv;
DEF_F(Convert, out_T)
{
- DEFAULT_DST(0) = SUB_F(y, 0);
+ DEFAULT_DST(0) = conv_8to16(SUB_F(y, 0), mask_16bit, mpeg_range);
}
DEF_F(Convert_uv, out_T_uv)
{
- DEFAULT_DST(1) = SUB_F(uv, 1);
- DEFAULT_DST(2) = SUB_F(uv, 2);
+ DEFAULT_DST(1) = conv_8to16(SUB_F(uv, 1), mask_16bit, mpeg_range);
+ DEFAULT_DST(2) = conv_8to16(SUB_F(uv, 2), mask_16bit, mpeg_range);
}
};
-struct Convert_yuv420p_p010le
+struct Convert_planar8_semiplanar8
{
static const int in_bit_depth = 8;
typedef uchar in_T;
typedef uchar in_T_uv;
- typedef ushort out_T;
- typedef ushort2 out_T_uv;
+ typedef uchar out_T;
+ typedef uchar2 out_T_uv;
DEF_F(Convert, out_T)
{
- DEFAULT_DST(0) = conv_8to16(SUB_F(y, 0), mask_10bit);
+ DEFAULT_DST(0) = SUB_F(y, 0);
}
DEF_F(Convert_uv, out_T_uv)
{
- DEFAULT_DST(1) = make_ushort2(
- conv_8to16(SUB_F(uv, 1), mask_10bit),
- conv_8to16(SUB_F(uv, 2), mask_10bit)
+ DEFAULT_DST(1) = make_uchar2(
+ SUB_F(uv, 1),
+ SUB_F(uv, 2)
);
}
};
-struct Convert_yuv420p_p016le
+struct Convert_planar8_semiplanar10
{
static const int in_bit_depth = 8;
typedef uchar in_T;
@@ -177,68 +200,71 @@ struct Convert_yuv420p_p016le
DEF_F(Convert, out_T)
{
- DEFAULT_DST(0) = conv_8to16(SUB_F(y, 0), mask_16bit);
+ DEFAULT_DST(0) = conv_8to16(SUB_F(y, 0), mask_10bit, mpeg_range);
}
DEF_F(Convert_uv, out_T_uv)
{
DEFAULT_DST(1) = make_ushort2(
- conv_8to16(SUB_F(uv, 1), mask_16bit),
- conv_8to16(SUB_F(uv, 2), mask_16bit)
+ conv_8to16(SUB_F(uv, 1), mask_10bit, mpeg_range),
+ conv_8to16(SUB_F(uv, 2), mask_10bit, mpeg_range)
);
}
};
-struct Convert_yuv420p_yuv444p16le
+struct Convert_planar8_semiplanar16
{
static const int in_bit_depth = 8;
typedef uchar in_T;
typedef uchar in_T_uv;
typedef ushort out_T;
- typedef ushort out_T_uv;
+ typedef ushort2 out_T_uv;
DEF_F(Convert, out_T)
{
- DEFAULT_DST(0) = conv_8to16(SUB_F(y, 0), mask_16bit);
+ DEFAULT_DST(0) = conv_8to16(SUB_F(y, 0), mask_16bit, mpeg_range);
}
DEF_F(Convert_uv, out_T_uv)
{
- DEFAULT_DST(1) = conv_8to16(SUB_F(uv, 1), mask_16bit);
- DEFAULT_DST(2) = conv_8to16(SUB_F(uv, 2), mask_16bit);
+ DEFAULT_DST(1) = make_ushort2(
+ conv_8to16(SUB_F(uv, 1), mask_16bit, mpeg_range),
+ conv_8to16(SUB_F(uv, 2), mask_16bit, mpeg_range)
+ );
}
};
-// nv12->X
-struct Convert_nv12_yuv420p
+
+// planar10->X
+
+struct Convert_planar10_planar8
{
- static const int in_bit_depth = 8;
- typedef uchar in_T;
- typedef uchar2 in_T_uv;
+ static const int in_bit_depth = 10;
+ typedef ushort in_T;
+ typedef ushort in_T_uv;
typedef uchar out_T;
typedef uchar out_T_uv;
DEF_F(Convert, out_T)
{
- DEFAULT_DST(0) = SUB_F(y, 0);
+ DEFAULT_DST(0) = conv_10to8pl(SUB_F(y, 0));
}
DEF_F(Convert_uv, out_T_uv)
{
- in_T_uv res = SUB_F(uv, 1);
- DEFAULT_DST(1) = res.x;
- DEFAULT_DST(2) = res.y;
+ DEFAULT_DST(1) = conv_10to8pl(SUB_F(uv, 1));
+ DEFAULT_DST(2) = conv_10to8pl(SUB_F(uv, 2));
}
};
-struct Convert_nv12_nv12
+struct Convert_planar10_planar10
{
- static const int in_bit_depth = 8;
- typedef uchar in_T;
- typedef uchar2 in_T_uv;
- typedef uchar out_T;
- typedef uchar2 out_T_uv;
+ static const int in_bit_depth = 10;
+ typedef ushort in_T;
+ typedef ushort in_T_uv;
+ typedef ushort out_T;
+ typedef ushort out_T_uv;
DEF_F(Convert, out_T)
{
@@ -248,148 +274,145 @@ struct Convert_nv12_nv12
DEF_F(Convert_uv, out_T_uv)
{
DEFAULT_DST(1) = SUB_F(uv, 1);
+ DEFAULT_DST(2) = SUB_F(uv, 2);
}
};
-struct Convert_nv12_yuv444p
+struct Convert_planar10_planar16
{
- static const int in_bit_depth = 8;
- typedef uchar in_T;
- typedef uchar2 in_T_uv;
- typedef uchar out_T;
- typedef uchar out_T_uv;
+ static const int in_bit_depth = 10;
+ typedef ushort in_T;
+ typedef ushort in_T_uv;
+ typedef ushort out_T;
+ typedef ushort out_T_uv;
DEF_F(Convert, out_T)
{
- DEFAULT_DST(0) = SUB_F(y, 0);
+ DEFAULT_DST(0) = conv_10to16pl(SUB_F(y, 0), mpeg_range);
}
DEF_F(Convert_uv, out_T_uv)
{
- in_T_uv res = SUB_F(uv, 1);
- DEFAULT_DST(1) = res.x;
- DEFAULT_DST(2) = res.y;
+ DEFAULT_DST(1) = conv_10to16pl(SUB_F(uv, 1), mpeg_range);
+ DEFAULT_DST(2) = conv_10to16pl(SUB_F(uv, 2), mpeg_range);
}
};
-struct Convert_nv12_p010le
+struct Convert_planar10_semiplanar8
{
- static const int in_bit_depth = 8;
- typedef uchar in_T;
- typedef uchar2 in_T_uv;
- typedef ushort out_T;
- typedef ushort2 out_T_uv;
+ static const int in_bit_depth = 10;
+ typedef ushort in_T;
+ typedef ushort in_T_uv;
+ typedef uchar out_T;
+ typedef uchar2 out_T_uv;
DEF_F(Convert, out_T)
{
- DEFAULT_DST(0) = conv_8to16(SUB_F(y, 0), mask_10bit);
+ DEFAULT_DST(0) = conv_10to8pl(SUB_F(y, 0));
}
DEF_F(Convert_uv, out_T_uv)
{
- in_T_uv res = SUB_F(uv, 1);
- DEFAULT_DST(1) = make_ushort2(
- conv_8to16(res.x, mask_10bit),
- conv_8to16(res.y, mask_10bit)
+ DEFAULT_DST(1) = make_uchar2(
+ conv_10to8pl(SUB_F(uv, 1)),
+ conv_10to8pl(SUB_F(uv, 2))
);
}
};
-struct Convert_nv12_p016le
+struct Convert_planar10_semiplanar10
{
- static const int in_bit_depth = 8;
- typedef uchar in_T;
- typedef uchar2 in_T_uv;
+ static const int in_bit_depth = 10;
+ typedef ushort in_T;
+ typedef ushort in_T_uv;
typedef ushort out_T;
typedef ushort2 out_T_uv;
DEF_F(Convert, out_T)
{
- DEFAULT_DST(0) = conv_8to16(SUB_F(y, 0), mask_16bit);
+ DEFAULT_DST(0) = (SUB_F(y, 0) << 6);
}
DEF_F(Convert_uv, out_T_uv)
{
- in_T_uv res = SUB_F(uv, 1);
DEFAULT_DST(1) = make_ushort2(
- conv_8to16(res.x, mask_16bit),
- conv_8to16(res.y, mask_16bit)
+ (SUB_F(uv, 1) << 6),
+ (SUB_F(uv, 2) << 6)
);
}
};
-struct Convert_nv12_yuv444p16le
+struct Convert_planar10_semiplanar16
{
- static const int in_bit_depth = 8;
- typedef uchar in_T;
- typedef uchar2 in_T_uv;
+ static const int in_bit_depth = 10;
+ typedef ushort in_T;
+ typedef ushort in_T_uv;
typedef ushort out_T;
- typedef ushort out_T_uv;
+ typedef ushort2 out_T_uv;
DEF_F(Convert, out_T)
{
- DEFAULT_DST(0) = conv_8to16(SUB_F(y, 0), mask_16bit);
+ DEFAULT_DST(0) = conv_10to16pl(SUB_F(y, 0), mpeg_range);
}
DEF_F(Convert_uv, out_T_uv)
{
- in_T_uv res = SUB_F(uv, 1);
- DEFAULT_DST(1) = conv_8to16(res.x, mask_16bit);
- DEFAULT_DST(2) = conv_8to16(res.y, mask_16bit);
+ DEFAULT_DST(1) = make_ushort2(
+ conv_10to16pl(SUB_F(uv, 1), mpeg_range),
+ conv_10to16pl(SUB_F(uv, 2), mpeg_range)
+ );
}
};
-// yuv444p->X
+// planar16->X
-struct Convert_yuv444p_yuv420p
+struct Convert_planar16_planar8
{
- static const int in_bit_depth = 8;
- typedef uchar in_T;
- typedef uchar in_T_uv;
+ static const int in_bit_depth = 16;
+ typedef ushort in_T;
+ typedef ushort in_T_uv;
typedef uchar out_T;
typedef uchar out_T_uv;
DEF_F(Convert, out_T)
{
- DEFAULT_DST(0) = SUB_F(y, 0);
+ DEFAULT_DST(0) = conv_16to8(SUB_F(y, 0));
}
DEF_F(Convert_uv, out_T_uv)
{
- DEFAULT_DST(1) = SUB_F(uv, 1);
- DEFAULT_DST(2) = SUB_F(uv, 2);
+ DEFAULT_DST(1) = conv_16to8(SUB_F(uv, 1));
+ DEFAULT_DST(2) = conv_16to8(SUB_F(uv, 2));
}
};
-struct Convert_yuv444p_nv12
+struct Convert_planar16_planar10
{
- static const int in_bit_depth = 8;
- typedef uchar in_T;
- typedef uchar in_T_uv;
- typedef uchar out_T;
- typedef uchar2 out_T_uv;
+ static const int in_bit_depth = 16;
+ typedef ushort in_T;
+ typedef ushort in_T_uv;
+ typedef ushort out_T;
+ typedef ushort out_T_uv;
DEF_F(Convert, out_T)
{
- DEFAULT_DST(0) = SUB_F(y, 0);
+ DEFAULT_DST(0) = conv_16to10pl(SUB_F(y, 0));
}
DEF_F(Convert_uv, out_T_uv)
{
- DEFAULT_DST(1) = make_uchar2(
- SUB_F(uv, 1),
- SUB_F(uv, 2)
- );
+ DEFAULT_DST(1) = conv_16to10pl(SUB_F(uv, 1));
+ DEFAULT_DST(2) = conv_16to10pl(SUB_F(uv, 2));
}
};
-struct Convert_yuv444p_yuv444p
+struct Convert_planar16_planar16
{
- static const int in_bit_depth = 8;
- typedef uchar in_T;
- typedef uchar in_T_uv;
- typedef uchar out_T;
- typedef uchar out_T_uv;
+ static const int in_bit_depth = 16;
+ typedef ushort in_T;
+ typedef ushort in_T_uv;
+ typedef ushort out_T;
+ typedef ushort out_T_uv;
DEF_F(Convert, out_T)
{
@@ -403,144 +426,144 @@ struct Convert_yuv444p_yuv444p
}
};
-struct Convert_yuv444p_p010le
+struct Convert_planar16_semiplanar8
{
- static const int in_bit_depth = 8;
- typedef uchar in_T;
- typedef uchar in_T_uv;
- typedef ushort out_T;
- typedef ushort2 out_T_uv;
+ static const int in_bit_depth = 16;
+ typedef ushort in_T;
+ typedef ushort in_T_uv;
+ typedef uchar out_T;
+ typedef uchar2 out_T_uv;
DEF_F(Convert, out_T)
{
- DEFAULT_DST(0) = conv_8to16(SUB_F(y, 0), mask_10bit);
+ DEFAULT_DST(0) = conv_16to8(SUB_F(y, 0));
}
DEF_F(Convert_uv, out_T_uv)
{
- DEFAULT_DST(1) = make_ushort2(
- conv_8to16(SUB_F(uv, 1), mask_10bit),
- conv_8to16(SUB_F(uv, 2), mask_10bit)
+ DEFAULT_DST(1) = make_uchar2(
+ conv_16to8(SUB_F(uv, 1)),
+ conv_16to8(SUB_F(uv, 2))
);
}
};
-struct Convert_yuv444p_p016le
+struct Convert_planar16_semiplanar10
{
- static const int in_bit_depth = 8;
- typedef uchar in_T;
- typedef uchar in_T_uv;
+ static const int in_bit_depth = 16;
+ typedef ushort in_T;
+ typedef ushort in_T_uv;
typedef ushort out_T;
typedef ushort2 out_T_uv;
DEF_F(Convert, out_T)
{
- DEFAULT_DST(0) = conv_8to16(SUB_F(y, 0), mask_16bit);
+ DEFAULT_DST(0) = conv_16to10(SUB_F(y, 0));
}
DEF_F(Convert_uv, out_T_uv)
{
DEFAULT_DST(1) = make_ushort2(
- conv_8to16(SUB_F(uv, 1), mask_16bit),
- conv_8to16(SUB_F(uv, 2), mask_16bit)
+ conv_16to10(SUB_F(uv, 1)),
+ conv_16to10(SUB_F(uv, 2))
);
}
};
-struct Convert_yuv444p_yuv444p16le
+struct Convert_planar16_semiplanar16
{
- static const int in_bit_depth = 8;
- typedef uchar in_T;
- typedef uchar in_T_uv;
+ static const int in_bit_depth = 16;
+ typedef ushort in_T;
+ typedef ushort in_T_uv;
typedef ushort out_T;
- typedef ushort out_T_uv;
+ typedef ushort2 out_T_uv;
DEF_F(Convert, out_T)
{
- DEFAULT_DST(0) = conv_8to16(SUB_F(y, 0), mask_16bit);
+ DEFAULT_DST(0) = SUB_F(y, 0);
}
DEF_F(Convert_uv, out_T_uv)
{
- DEFAULT_DST(1) = conv_8to16(SUB_F(uv, 1), mask_16bit);
- DEFAULT_DST(2) = conv_8to16(SUB_F(uv, 2), mask_16bit);
+ DEFAULT_DST(1) = make_ushort2(
+ SUB_F(uv, 1),
+ SUB_F(uv, 2)
+ );
}
};
-// p010le->X
+// semiplanar8->X
-struct Convert_p010le_yuv420p
+struct Convert_semiplanar8_planar8
{
- static const int in_bit_depth = 10;
- typedef ushort in_T;
- typedef ushort2 in_T_uv;
+ static const int in_bit_depth = 8;
+ typedef uchar in_T;
+ typedef uchar2 in_T_uv;
typedef uchar out_T;
typedef uchar out_T_uv;
DEF_F(Convert, out_T)
{
- DEFAULT_DST(0) = conv_10to8(SUB_F(y, 0));
+ DEFAULT_DST(0) = SUB_F(y, 0);
}
DEF_F(Convert_uv, out_T_uv)
{
in_T_uv res = SUB_F(uv, 1);
- DEFAULT_DST(1) = conv_10to8(res.x);
- DEFAULT_DST(2) = conv_10to8(res.y);
+ DEFAULT_DST(1) = res.x;
+ DEFAULT_DST(2) = res.y;
}
};
-struct Convert_p010le_nv12
+struct Convert_semiplanar8_planar10
{
- static const int in_bit_depth = 10;
- typedef ushort in_T;
- typedef ushort2 in_T_uv;
- typedef uchar out_T;
- typedef uchar2 out_T_uv;
+ static const int in_bit_depth = 8;
+ typedef uchar in_T;
+ typedef uchar2 in_T_uv;
+ typedef ushort out_T;
+ typedef ushort out_T_uv;
DEF_F(Convert, out_T)
{
- DEFAULT_DST(0) = conv_10to8(SUB_F(y, 0));
+ DEFAULT_DST(0) = conv_8to10pl(SUB_F(y, 0), mpeg_range);
}
DEF_F(Convert_uv, out_T_uv)
{
in_T_uv res = SUB_F(uv, 1);
- DEFAULT_DST(1) = make_uchar2(
- conv_10to8(res.x),
- conv_10to8(res.y)
- );
+ DEFAULT_DST(1) = conv_8to10pl(res.x, mpeg_range);
+ DEFAULT_DST(2) = conv_8to10pl(res.y, mpeg_range);
}
};
-struct Convert_p010le_yuv444p
+struct Convert_semiplanar8_planar16
{
- static const int in_bit_depth = 10;
- typedef ushort in_T;
- typedef ushort2 in_T_uv;
- typedef uchar out_T;
- typedef uchar out_T_uv;
+ static const int in_bit_depth = 8;
+ typedef uchar in_T;
+ typedef uchar2 in_T_uv;
+ typedef ushort out_T;
+ typedef ushort out_T_uv;
DEF_F(Convert, out_T)
{
- DEFAULT_DST(0) = conv_10to8(SUB_F(y, 0));
+ DEFAULT_DST(0) = conv_8to16(SUB_F(y, 0), mask_16bit, mpeg_range);
}
DEF_F(Convert_uv, out_T_uv)
{
in_T_uv res = SUB_F(uv, 1);
- DEFAULT_DST(1) = conv_10to8(res.x);
- DEFAULT_DST(2) = conv_10to8(res.y);
+ DEFAULT_DST(1) = conv_8to16(res.x, mask_16bit, mpeg_range);
+ DEFAULT_DST(2) = conv_8to16(res.y, mask_16bit, mpeg_range);
}
};
-struct Convert_p010le_p010le
+struct Convert_semiplanar8_semiplanar8
{
- static const int in_bit_depth = 10;
- typedef ushort in_T;
- typedef ushort2 in_T_uv;
- typedef ushort out_T;
- typedef ushort2 out_T_uv;
+ static const int in_bit_depth = 8;
+ typedef uchar in_T;
+ typedef uchar2 in_T_uv;
+ typedef uchar out_T;
+ typedef uchar2 out_T_uv;
DEF_F(Convert, out_T)
{
@@ -553,55 +576,57 @@ struct Convert_p010le_p010le
}
};
-struct Convert_p010le_p016le
+struct Convert_semiplanar8_semiplanar10
{
- static const int in_bit_depth = 10;
- typedef ushort in_T;
- typedef ushort2 in_T_uv;
+ static const int in_bit_depth = 8;
+ typedef uchar in_T;
+ typedef uchar2 in_T_uv;
typedef ushort out_T;
typedef ushort2 out_T_uv;
DEF_F(Convert, out_T)
{
- DEFAULT_DST(0) = conv_10to16(SUB_F(y, 0));
+ DEFAULT_DST(0) = conv_8to16(SUB_F(y, 0), mask_10bit, mpeg_range);
}
DEF_F(Convert_uv, out_T_uv)
{
in_T_uv res = SUB_F(uv, 1);
DEFAULT_DST(1) = make_ushort2(
- conv_10to16(res.x),
- conv_10to16(res.y)
+ conv_8to16(res.x, mask_10bit, mpeg_range),
+ conv_8to16(res.y, mask_10bit, mpeg_range)
);
}
};
-struct Convert_p010le_yuv444p16le
+struct Convert_semiplanar8_semiplanar16
{
- static const int in_bit_depth = 10;
- typedef ushort in_T;
- typedef ushort2 in_T_uv;
+ static const int in_bit_depth = 8;
+ typedef uchar in_T;
+ typedef uchar2 in_T_uv;
typedef ushort out_T;
- typedef ushort out_T_uv;
+ typedef ushort2 out_T_uv;
DEF_F(Convert, out_T)
{
- DEFAULT_DST(0) = conv_10to16(SUB_F(y, 0));
+ DEFAULT_DST(0) = conv_8to16(SUB_F(y, 0), mask_16bit, mpeg_range);
}
DEF_F(Convert_uv, out_T_uv)
{
in_T_uv res = SUB_F(uv, 1);
- DEFAULT_DST(1) = conv_10to16(res.x);
- DEFAULT_DST(2) = conv_10to16(res.y);
+ DEFAULT_DST(1) = make_ushort2(
+ conv_8to16(res.x, mask_16bit, mpeg_range),
+ conv_8to16(res.y, mask_16bit, mpeg_range)
+ );
}
};
-// p016le->X
+// semiplanar10->X
-struct Convert_p016le_yuv420p
+struct Convert_semiplanar10_planar8
{
- static const int in_bit_depth = 16;
+ static const int in_bit_depth = 10;
typedef ushort in_T;
typedef ushort2 in_T_uv;
typedef uchar out_T;
@@ -609,87 +634,85 @@ struct Convert_p016le_yuv420p
DEF_F(Convert, out_T)
{
- DEFAULT_DST(0) = conv_16to8(SUB_F(y, 0));
+ DEFAULT_DST(0) = conv_10to8(SUB_F(y, 0));
}
DEF_F(Convert_uv, out_T_uv)
{
in_T_uv res = SUB_F(uv, 1);
- DEFAULT_DST(1) = conv_16to8(res.x);
- DEFAULT_DST(2) = conv_16to8(res.y);
+ DEFAULT_DST(1) = conv_10to8(res.x);
+ DEFAULT_DST(2) = conv_10to8(res.y);
}
};
-struct Convert_p016le_nv12
+struct Convert_semiplanar10_planar10
{
- static const int in_bit_depth = 16;
+ static const int in_bit_depth = 10;
typedef ushort in_T;
typedef ushort2 in_T_uv;
- typedef uchar out_T;
- typedef uchar2 out_T_uv;
+ typedef ushort out_T;
+ typedef ushort out_T_uv;
DEF_F(Convert, out_T)
{
- DEFAULT_DST(0) = conv_16to8(SUB_F(y, 0));
+ DEFAULT_DST(0) = SUB_F(y, 0) >> 6;
}
DEF_F(Convert_uv, out_T_uv)
{
in_T_uv res = SUB_F(uv, 1);
- DEFAULT_DST(1) = make_uchar2(
- conv_16to8(res.x),
- conv_16to8(res.y)
- );
+ DEFAULT_DST(1) = res.x >> 6;
+ DEFAULT_DST(2) = res.y >> 6;
}
};
-struct Convert_p016le_yuv444p
+struct Convert_semiplanar10_planar16
{
- static const int in_bit_depth = 16;
+ static const int in_bit_depth = 10;
typedef ushort in_T;
typedef ushort2 in_T_uv;
- typedef uchar out_T;
- typedef uchar out_T_uv;
+ typedef ushort out_T;
+ typedef ushort out_T_uv;
DEF_F(Convert, out_T)
{
- DEFAULT_DST(0) = conv_16to8(SUB_F(y, 0));
+ DEFAULT_DST(0) = conv_10to16(SUB_F(y, 0), mpeg_range);
}
DEF_F(Convert_uv, out_T_uv)
{
in_T_uv res = SUB_F(uv, 1);
- DEFAULT_DST(1) = conv_16to8(res.x);
- DEFAULT_DST(2) = conv_16to8(res.y);
+ DEFAULT_DST(1) = conv_10to16(res.x, mpeg_range);
+ DEFAULT_DST(2) = conv_10to16(res.y, mpeg_range);
}
};
-struct Convert_p016le_p010le
+struct Convert_semiplanar10_semiplanar8
{
- static const int in_bit_depth = 16;
+ static const int in_bit_depth = 10;
typedef ushort in_T;
typedef ushort2 in_T_uv;
- typedef ushort out_T;
- typedef ushort2 out_T_uv;
+ typedef uchar out_T;
+ typedef uchar2 out_T_uv;
DEF_F(Convert, out_T)
{
- DEFAULT_DST(0) = conv_16to10(SUB_F(y, 0));
+ DEFAULT_DST(0) = conv_10to8(SUB_F(y, 0));
}
DEF_F(Convert_uv, out_T_uv)
{
in_T_uv res = SUB_F(uv, 1);
- DEFAULT_DST(1) = make_ushort2(
- conv_16to10(res.x),
- conv_16to10(res.y)
+ DEFAULT_DST(1) = make_uchar2(
+ conv_10to8(res.x),
+ conv_10to8(res.y)
);
}
};
-struct Convert_p016le_p016le
+struct Convert_semiplanar10_semiplanar10
{
- static const int in_bit_depth = 16;
+ static const int in_bit_depth = 10;
typedef ushort in_T;
typedef ushort2 in_T_uv;
typedef ushort out_T;
@@ -706,34 +729,37 @@ struct Convert_p016le_p016le
}
};
-struct Convert_p016le_yuv444p16le
+struct Convert_semiplanar10_semiplanar16
{
- static const int in_bit_depth = 16;
+ static const int in_bit_depth = 10;
typedef ushort in_T;
typedef ushort2 in_T_uv;
typedef ushort out_T;
- typedef ushort out_T_uv;
+ typedef ushort2 out_T_uv;
DEF_F(Convert, out_T)
{
- DEFAULT_DST(0) = SUB_F(y, 0);
+ DEFAULT_DST(0) = conv_10to16(SUB_F(y, 0), mpeg_range);
}
DEF_F(Convert_uv, out_T_uv)
{
in_T_uv res = SUB_F(uv, 1);
- DEFAULT_DST(1) = res.x;
- DEFAULT_DST(2) = res.y;
+ DEFAULT_DST(1) = make_ushort2(
+ conv_10to16(res.x, mpeg_range),
+ conv_10to16(res.y, mpeg_range)
+ );
}
};
-// yuv444p16le->X
-struct Convert_yuv444p16le_yuv420p
+// semiplanar16->X
+
+struct Convert_semiplanar16_planar8
{
static const int in_bit_depth = 16;
typedef ushort in_T;
- typedef ushort in_T_uv;
+ typedef ushort2 in_T_uv;
typedef uchar out_T;
typedef uchar out_T_uv;
@@ -744,104 +770,107 @@ struct Convert_yuv444p16le_yuv420p
DEF_F(Convert_uv, out_T_uv)
{
- DEFAULT_DST(1) = conv_16to8(SUB_F(uv, 1));
- DEFAULT_DST(2) = conv_16to8(SUB_F(uv, 2));
+ in_T_uv res = SUB_F(uv, 1);
+ DEFAULT_DST(1) = conv_16to8(res.x);
+ DEFAULT_DST(2) = conv_16to8(res.y);
}
};
-struct Convert_yuv444p16le_nv12
+struct Convert_semiplanar16_planar10
{
static const int in_bit_depth = 16;
typedef ushort in_T;
- typedef ushort in_T_uv;
- typedef uchar out_T;
- typedef uchar2 out_T_uv;
+ typedef ushort2 in_T_uv;
+ typedef ushort out_T;
+ typedef ushort out_T_uv;
DEF_F(Convert, out_T)
{
- DEFAULT_DST(0) = conv_16to8(SUB_F(y, 0));
+ DEFAULT_DST(0) = conv_16to10pl(SUB_F(y, 0));
}
DEF_F(Convert_uv, out_T_uv)
{
- DEFAULT_DST(1) = make_uchar2(
- conv_16to8(SUB_F(uv, 1)),
- conv_16to8(SUB_F(uv, 2))
- );
+ in_T_uv res = SUB_F(uv, 1);
+ DEFAULT_DST(1) = conv_16to10pl(res.x);
+ DEFAULT_DST(2) = conv_16to10pl(res.y);
}
};
-struct Convert_yuv444p16le_yuv444p
+struct Convert_semiplanar16_planar16
{
static const int in_bit_depth = 16;
typedef ushort in_T;
- typedef ushort in_T_uv;
- typedef uchar out_T;
- typedef uchar out_T_uv;
+ typedef ushort2 in_T_uv;
+ typedef ushort out_T;
+ typedef ushort out_T_uv;
DEF_F(Convert, out_T)
{
- DEFAULT_DST(0) = conv_16to8(SUB_F(y, 0));
+ DEFAULT_DST(0) = SUB_F(y, 0);
}
DEF_F(Convert_uv, out_T_uv)
{
- DEFAULT_DST(1) = conv_16to8(SUB_F(uv, 1));
- DEFAULT_DST(2) = conv_16to8(SUB_F(uv, 2));
+ in_T_uv res = SUB_F(uv, 1);
+ DEFAULT_DST(1) = res.x;
+ DEFAULT_DST(2) = res.y;
}
};
-struct Convert_yuv444p16le_p010le
+struct Convert_semiplanar16_semiplanar8
{
static const int in_bit_depth = 16;
typedef ushort in_T;
- typedef ushort in_T_uv;
- typedef ushort out_T;
- typedef ushort2 out_T_uv;
+ typedef ushort2 in_T_uv;
+ typedef uchar out_T;
+ typedef uchar2 out_T_uv;
DEF_F(Convert, out_T)
{
- DEFAULT_DST(0) = conv_16to10(SUB_F(y, 0));
+ DEFAULT_DST(0) = conv_16to8(SUB_F(y, 0));
}
DEF_F(Convert_uv, out_T_uv)
{
- DEFAULT_DST(1) = make_ushort2(
- conv_16to10(SUB_F(uv, 1)),
- conv_16to10(SUB_F(uv, 2))
+ in_T_uv res = SUB_F(uv, 1);
+ DEFAULT_DST(1) = make_uchar2(
+ conv_16to8(res.x),
+ conv_16to8(res.y)
);
}
};
-struct Convert_yuv444p16le_p016le
+struct Convert_semiplanar16_semiplanar10
{
static const int in_bit_depth = 16;
typedef ushort in_T;
- typedef ushort in_T_uv;
+ typedef ushort2 in_T_uv;
typedef ushort out_T;
typedef ushort2 out_T_uv;
DEF_F(Convert, out_T)
{
- DEFAULT_DST(0) = SUB_F(y, 0);
+ DEFAULT_DST(0) = conv_16to10(SUB_F(y, 0));
}
DEF_F(Convert_uv, out_T_uv)
{
+ in_T_uv res = SUB_F(uv, 1);
DEFAULT_DST(1) = make_ushort2(
- SUB_F(uv, 1),
- SUB_F(uv, 2)
+ conv_16to10(res.x),
+ conv_16to10(res.y)
);
}
};
-struct Convert_yuv444p16le_yuv444p16le
+struct Convert_semiplanar16_semiplanar16
{
static const int in_bit_depth = 16;
typedef ushort in_T;
- typedef ushort in_T_uv;
+ typedef ushort2 in_T_uv;
typedef ushort out_T;
- typedef ushort out_T_uv;
+ typedef ushort2 out_T_uv;
DEF_F(Convert, out_T)
{
@@ -851,7 +880,6 @@ struct Convert_yuv444p16le_yuv444p16le
DEF_F(Convert_uv, out_T_uv)
{
DEFAULT_DST(1) = SUB_F(uv, 1);
- DEFAULT_DST(2) = SUB_F(uv, 2);
}
};
@@ -930,7 +958,7 @@ struct Convert_bgr0_bgra
res.x,
res.y,
res.z,
- 1
+ 0xFF
);
}
@@ -954,7 +982,7 @@ struct Convert_bgr0_rgba
res.z,
res.y,
res.x,
- 1
+ 0xFF
);
}
@@ -978,7 +1006,7 @@ struct Convert_rgb0_bgra
res.z,
res.y,
res.x,
- 1
+ 0xFF
);
}
@@ -1002,7 +1030,7 @@ struct Convert_rgb0_rgba
res.x,
res.y,
res.z,
- 1
+ 0xFF
);
}
@@ -1147,25 +1175,26 @@ __device__ static inline T
Subsample_Bicubic(cudaTextureObject_t tex,
/// --- FUNCTION EXPORTS ---
-#define KERNEL_ARGS(T) \
- cudaTextureObject_t src_tex_0, cudaTextureObject_t src_tex_1, \
- cudaTextureObject_t src_tex_2, cudaTextureObject_t src_tex_3, \
- T *dst_0, T *dst_1, T *dst_2, T *dst_3, \
- int dst_width, int dst_height, int dst_pitch, \
- int src_left, int src_top, int src_width, int src_height, float param
+#define KERNEL_ARGS(T) CUDAScaleKernelParams params
#define SUBSAMPLE(Convert, T) \
- cudaTextureObject_t src_tex[4] = \
- { src_tex_0, src_tex_1, src_tex_2, src_tex_3 }; \
- T *dst[4] = { dst_0, dst_1, dst_2, dst_3 }; \
+ cudaTextureObject_t src_tex[4] = { \
+ params.src_tex[0], params.src_tex[1], \
+ params.src_tex[2], params.src_tex[3] \
+ }; \
+ T *dst[4] = { \
+ (T*)params.dst[0], (T*)params.dst[1], \
+ (T*)params.dst[2], (T*)params.dst[3] \
+ }; \
int xo = blockIdx.x * blockDim.x + threadIdx.x; \
int yo = blockIdx.y * blockDim.y + threadIdx.y; \
- if (yo >= dst_height || xo >= dst_width) return; \
+ if (yo >= params.dst_height || xo >= params.dst_width) return; \
Convert( \
src_tex, dst, xo, yo, \
- dst_width, dst_height, dst_pitch, \
- src_left, src_top, \
- src_width, src_height, param);
+ params.dst_width, params.dst_height, params.dst_pitch, \
+ params.src_left, params.src_top, \
+ params.src_width, params.src_height, \
+ params.param, params.mpeg_range);
extern "C" {
@@ -1184,12 +1213,12 @@ extern "C" {
NEAREST_KERNEL(C,_uv)
#define NEAREST_KERNELS(C) \
- NEAREST_KERNEL_RAW(yuv420p_ ## C) \
- NEAREST_KERNEL_RAW(nv12_ ## C) \
- NEAREST_KERNEL_RAW(yuv444p_ ## C) \
- NEAREST_KERNEL_RAW(p010le_ ## C) \
- NEAREST_KERNEL_RAW(p016le_ ## C) \
- NEAREST_KERNEL_RAW(yuv444p16le_ ## C)
+ NEAREST_KERNEL_RAW(planar8_ ## C) \
+ NEAREST_KERNEL_RAW(planar10_ ## C) \
+ NEAREST_KERNEL_RAW(planar16_ ## C) \
+ NEAREST_KERNEL_RAW(semiplanar8_ ## C) \
+ NEAREST_KERNEL_RAW(semiplanar10_ ## C) \
+ NEAREST_KERNEL_RAW(semiplanar16_ ## C)
#define NEAREST_KERNELS_RGB(C) \
NEAREST_KERNEL_RAW(rgb0_ ## C) \
@@ -1197,12 +1226,12 @@ extern "C" {
NEAREST_KERNEL_RAW(rgba_ ## C) \
NEAREST_KERNEL_RAW(bgra_ ## C) \
-NEAREST_KERNELS(yuv420p)
-NEAREST_KERNELS(nv12)
-NEAREST_KERNELS(yuv444p)
-NEAREST_KERNELS(p010le)
-NEAREST_KERNELS(p016le)
-NEAREST_KERNELS(yuv444p16le)
+NEAREST_KERNELS(planar8)
+NEAREST_KERNELS(planar10)
+NEAREST_KERNELS(planar16)
+NEAREST_KERNELS(semiplanar8)
+NEAREST_KERNELS(semiplanar10)
+NEAREST_KERNELS(semiplanar16)
NEAREST_KERNELS_RGB(rgb0)
NEAREST_KERNELS_RGB(bgr0)
@@ -1224,12 +1253,12 @@ NEAREST_KERNELS_RGB(bgra)
BILINEAR_KERNEL(C,_uv)
#define BILINEAR_KERNELS(C) \
- BILINEAR_KERNEL_RAW(yuv420p_ ## C) \
- BILINEAR_KERNEL_RAW(nv12_ ## C) \
- BILINEAR_KERNEL_RAW(yuv444p_ ## C) \
- BILINEAR_KERNEL_RAW(p010le_ ## C) \
- BILINEAR_KERNEL_RAW(p016le_ ## C) \
- BILINEAR_KERNEL_RAW(yuv444p16le_ ## C)
+ BILINEAR_KERNEL_RAW(planar8_ ## C) \
+ BILINEAR_KERNEL_RAW(planar10_ ## C) \
+ BILINEAR_KERNEL_RAW(planar16_ ## C) \
+ BILINEAR_KERNEL_RAW(semiplanar8_ ## C) \
+ BILINEAR_KERNEL_RAW(semiplanar10_ ## C) \
+ BILINEAR_KERNEL_RAW(semiplanar16_ ## C)
#define BILINEAR_KERNELS_RGB(C) \
BILINEAR_KERNEL_RAW(rgb0_ ## C) \
@@ -1237,12 +1266,12 @@ NEAREST_KERNELS_RGB(bgra)
BILINEAR_KERNEL_RAW(rgba_ ## C) \
BILINEAR_KERNEL_RAW(bgra_ ## C)
-BILINEAR_KERNELS(yuv420p)
-BILINEAR_KERNELS(nv12)
-BILINEAR_KERNELS(yuv444p)
-BILINEAR_KERNELS(p010le)
-BILINEAR_KERNELS(p016le)
-BILINEAR_KERNELS(yuv444p16le)
+BILINEAR_KERNELS(planar8)
+BILINEAR_KERNELS(planar10)
+BILINEAR_KERNELS(planar16)
+BILINEAR_KERNELS(semiplanar8)
+BILINEAR_KERNELS(semiplanar10)
+BILINEAR_KERNELS(semiplanar16)
BILINEAR_KERNELS_RGB(rgb0)
BILINEAR_KERNELS_RGB(bgr0)
@@ -1264,12 +1293,12 @@ BILINEAR_KERNELS_RGB(bgra)
BICUBIC_KERNEL(C,_uv)
#define BICUBIC_KERNELS(C) \
- BICUBIC_KERNEL_RAW(yuv420p_ ## C) \
- BICUBIC_KERNEL_RAW(nv12_ ## C) \
- BICUBIC_KERNEL_RAW(yuv444p_ ## C) \
- BICUBIC_KERNEL_RAW(p010le_ ## C) \
- BICUBIC_KERNEL_RAW(p016le_ ## C) \
- BICUBIC_KERNEL_RAW(yuv444p16le_ ## C)
+ BICUBIC_KERNEL_RAW(planar8_ ## C) \
+ BICUBIC_KERNEL_RAW(planar10_ ## C) \
+ BICUBIC_KERNEL_RAW(planar16_ ## C) \
+ BICUBIC_KERNEL_RAW(semiplanar8_ ## C) \
+ BICUBIC_KERNEL_RAW(semiplanar10_ ## C) \
+ BICUBIC_KERNEL_RAW(semiplanar16_ ## C)
#define BICUBIC_KERNELS_RGB(C) \
BICUBIC_KERNEL_RAW(rgb0_ ## C) \
@@ -1277,12 +1306,12 @@ BILINEAR_KERNELS_RGB(bgra)
BICUBIC_KERNEL_RAW(rgba_ ## C) \
BICUBIC_KERNEL_RAW(bgra_ ## C)
-BICUBIC_KERNELS(yuv420p)
-BICUBIC_KERNELS(nv12)
-BICUBIC_KERNELS(yuv444p)
-BICUBIC_KERNELS(p010le)
-BICUBIC_KERNELS(p016le)
-BICUBIC_KERNELS(yuv444p16le)
+BICUBIC_KERNELS(planar8)
+BICUBIC_KERNELS(planar10)
+BICUBIC_KERNELS(planar16)
+BICUBIC_KERNELS(semiplanar8)
+BICUBIC_KERNELS(semiplanar10)
+BICUBIC_KERNELS(semiplanar16)
BICUBIC_KERNELS_RGB(rgb0)
BICUBIC_KERNELS_RGB(bgr0)
@@ -1304,12 +1333,12 @@ BICUBIC_KERNELS_RGB(bgra)
LANCZOS_KERNEL(C,_uv)
#define LANCZOS_KERNELS(C) \
- LANCZOS_KERNEL_RAW(yuv420p_ ## C) \
- LANCZOS_KERNEL_RAW(nv12_ ## C) \
- LANCZOS_KERNEL_RAW(yuv444p_ ## C) \
- LANCZOS_KERNEL_RAW(p010le_ ## C) \
- LANCZOS_KERNEL_RAW(p016le_ ## C) \
- LANCZOS_KERNEL_RAW(yuv444p16le_ ## C)
+ LANCZOS_KERNEL_RAW(planar8_ ## C) \
+ LANCZOS_KERNEL_RAW(planar10_ ## C) \
+ LANCZOS_KERNEL_RAW(planar16_ ## C) \
+ LANCZOS_KERNEL_RAW(semiplanar8_ ## C) \
+ LANCZOS_KERNEL_RAW(semiplanar10_ ## C) \
+ LANCZOS_KERNEL_RAW(semiplanar16_ ## C)
#define LANCZOS_KERNELS_RGB(C) \
LANCZOS_KERNEL_RAW(rgb0_ ## C) \
@@ -1317,12 +1346,12 @@ BICUBIC_KERNELS_RGB(bgra)
LANCZOS_KERNEL_RAW(rgba_ ## C) \
LANCZOS_KERNEL_RAW(bgra_ ## C)
-LANCZOS_KERNELS(yuv420p)
-LANCZOS_KERNELS(nv12)
-LANCZOS_KERNELS(yuv444p)
-LANCZOS_KERNELS(p010le)
-LANCZOS_KERNELS(p016le)
-LANCZOS_KERNELS(yuv444p16le)
+LANCZOS_KERNELS(planar8)
+LANCZOS_KERNELS(planar10)
+LANCZOS_KERNELS(planar16)
+LANCZOS_KERNELS(semiplanar8)
+LANCZOS_KERNELS(semiplanar10)
+LANCZOS_KERNELS(semiplanar16)
LANCZOS_KERNELS_RGB(rgb0)
LANCZOS_KERNELS_RGB(bgr0)
diff --git a/libavfilter/vf_scale_cuda.h b/libavfilter/vf_scale_cuda.h
index 40d5b9cfac..81fd8061e3 100644
--- a/libavfilter/vf_scale_cuda.h
+++ b/libavfilter/vf_scale_cuda.h
@@ -23,6 +23,28 @@
#ifndef AVFILTER_SCALE_CUDA_H
#define AVFILTER_SCALE_CUDA_H
+#if defined(__CUDACC__) || defined(__CUDA__)
+#include <stdint.h>
+typedef cudaTextureObject_t CUtexObject;
+typedef uint8_t* CUdeviceptr;
+#else
+#include <ffnvcodec/dynlink_cuda.h>
+#endif
+
#define SCALE_CUDA_PARAM_DEFAULT 999999.0f
+typedef struct {
+ CUtexObject src_tex[4];
+ CUdeviceptr dst[4];
+ int dst_width;
+ int dst_height;
+ int dst_pitch;
+ int src_left;
+ int src_top;
+ int src_width;
+ int src_height;
+ float param;
+ int mpeg_range;
+} CUDAScaleKernelParams;
+
#endif
commit 04b5e25d353a4b9de2a1bf0802a8138d42dfe85b
Author: Diego de Souza <[email protected]>
AuthorDate: Thu Nov 13 09:49:45 2025 +0100
Commit: Timo Rothenpieler <[email protected]>
CommitDate: Thu Nov 27 22:11:57 2025 +0100
avfilter/hwupload_cuda: Expands pixel formats support
Add support for uploading additional pixel formats to NVIDIA GPUs:
- Planar formats (yuv420p10, yuv422p, yuv422p10, yuv444p10)
- Semiplanar formats (nv16, p210, p216)
Signed-off-by: Diego de Souza <[email protected]>
diff --git a/libavfilter/vf_hwupload_cuda.c b/libavfilter/vf_hwupload_cuda.c
index b505f8b298..34f959ca50 100644
--- a/libavfilter/vf_hwupload_cuda.c
+++ b/libavfilter/vf_hwupload_cuda.c
@@ -59,9 +59,9 @@ static int cudaupload_query_formats(const AVFilterContext
*ctx,
int ret;
static const enum AVPixelFormat input_pix_fmts[] = {
- AV_PIX_FMT_NV12, AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUVA420P,
AV_PIX_FMT_YUV444P,
- AV_PIX_FMT_P010, AV_PIX_FMT_P016, AV_PIX_FMT_YUV444P16,
- AV_PIX_FMT_0RGB32, AV_PIX_FMT_0BGR32,
+ AV_PIX_FMT_NV12, AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUVA420P,
AV_PIX_FMT_NV16, AV_PIX_FMT_YUV422P, AV_PIX_FMT_YUV444P,
+ AV_PIX_FMT_P010, AV_PIX_FMT_P016, AV_PIX_FMT_P210, AV_PIX_FMT_P216,
AV_PIX_FMT_YUV420P10, AV_PIX_FMT_YUV422P10, AV_PIX_FMT_YUV444P10,
AV_PIX_FMT_YUV444P16,
+ AV_PIX_FMT_0RGB32, AV_PIX_FMT_0BGR32, AV_PIX_FMT_RGB32,
AV_PIX_FMT_BGR32,
#if CONFIG_VULKAN
AV_PIX_FMT_VULKAN,
#endif
commit 9c76d7db863c11ff968d71e4970fc2359303b65f
Author: Diego de Souza <[email protected]>
AuthorDate: Wed Nov 12 20:08:45 2025 +0100
Commit: Timo Rothenpieler <[email protected]>
CommitDate: Thu Nov 27 22:11:57 2025 +0100
avutil/hwcontext_cuda: Expands pixel formats support
Add support for additional pixel formats in CUDA hardware context:
- Planar formats (yuv420p10, yuv422p, yuv422p10, yuv444p10)
- Semiplanar formats (nv16, p210, p216)
Signed-off-by: Diego de Souza <[email protected]>
diff --git a/libavutil/hwcontext_cuda.c b/libavutil/hwcontext_cuda.c
index 10d3399537..b0b65b2446 100644
--- a/libavutil/hwcontext_cuda.c
+++ b/libavutil/hwcontext_cuda.c
@@ -50,6 +50,10 @@ static const enum AVPixelFormat supported_formats[] = {
AV_PIX_FMT_P016,
AV_PIX_FMT_P210,
AV_PIX_FMT_P216,
+ AV_PIX_FMT_YUV422P,
+ AV_PIX_FMT_YUV420P10,
+ AV_PIX_FMT_YUV422P10,
+ AV_PIX_FMT_YUV444P10,
AV_PIX_FMT_YUV444P10MSB,
AV_PIX_FMT_YUV444P12MSB,
AV_PIX_FMT_YUV444P16,
diff --git a/libavutil/version.h b/libavutil/version.h
index db250d5c9e..d058e94425 100644
--- a/libavutil/version.h
+++ b/libavutil/version.h
@@ -80,7 +80,7 @@
#define LIBAVUTIL_VERSION_MAJOR 60
#define LIBAVUTIL_VERSION_MINOR 19
-#define LIBAVUTIL_VERSION_MICRO 100
+#define LIBAVUTIL_VERSION_MICRO 101
#define LIBAVUTIL_VERSION_INT AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \
LIBAVUTIL_VERSION_MINOR, \
-----------------------------------------------------------------------
Summary of changes:
libavfilter/Makefile | 1 +
libavfilter/version.h | 2 +-
libavfilter/vf_hwupload_cuda.c | 6 +-
libavfilter/vf_scale_cuda.c | 90 ++++--
libavfilter/vf_scale_cuda.cu | 689 +++++++++++++++++++++--------------------
libavfilter/vf_scale_cuda.h | 22 ++
libavutil/hwcontext_cuda.c | 4 +
libavutil/version.h | 2 +-
8 files changed, 451 insertions(+), 365 deletions(-)
hooks/post-receive
--
_______________________________________________
ffmpeg-cvslog mailing list -- [email protected]
To unsubscribe send an email to [email protected]