[FFmpeg-cvslog] [ffmpeg] branch master updated. 75b8567591 avfilter/scale_cuda: Add support for 4:2:2 chroma subsampling

ffmpeg-git--- via ffmpeg-cvslog Thu, 27 Nov 2025 13:30:31 -0800

The branch, master has been updated
       via  75b85675916337bc0993f02a70279b96e5903153 (commit)
       via  04b5e25d353a4b9de2a1bf0802a8138d42dfe85b (commit)
       via  9c76d7db863c11ff968d71e4970fc2359303b65f (commit)
      from  0cd75dbfa0fc6c213cf9240b3c03c809070c5209 (commit)



- Log -----------------------------------------------------------------
commit 75b85675916337bc0993f02a70279b96e5903153
Author:     Diego de Souza <[email protected]>
AuthorDate: Tue Nov 18 17:16:43 2025 +0100
Commit:     Timo Rothenpieler <[email protected]>
CommitDate: Thu Nov 27 22:11:57 2025 +0100

    avfilter/scale_cuda: Add support for 4:2:2 chroma subsampling
    
    The supported YUV pixel formats were separated between planar
    and semiplanar. This approach reduces the number of CUDA kernels
    for all pixel formats.
    
    This patch:
    1. Adds support for YUV 4:2:2 planar and semi-planar formats:
            yuv422p, yuv422p10, nv16, p210, p216
    2. Implements new conversion structures and kernel definitions
            for planar and semi-planar formats
    
    Signed-off-by: Diego de Souza <[email protected]>

diff --git a/libavfilter/Makefile b/libavfilter/Makefile
index d56a458e45..67814c0d77 100644
--- a/libavfilter/Makefile
+++ b/libavfilter/Makefile
@@ -678,6 +678,7 @@ SKIPHEADERS-$(CONFIG_QSVVPP)                 += qsvvpp.h 
stack_internal.h
 SKIPHEADERS-$(CONFIG_OPENCL)                 += opencl.h
 SKIPHEADERS-$(CONFIG_VAAPI)                  += vaapi_vpp.h stack_internal.h
 SKIPHEADERS-$(CONFIG_VULKAN)                 += vulkan_filter.h
+SKIPHEADERS-$(CONFIG_SCALE_CUDA_FILTER)      += vf_scale_cuda.h
 
 TOOLS     = graph2dot
 TESTPROGS = drawutils filtfmts formats integral
diff --git a/libavfilter/version.h b/libavfilter/version.h
index 4a69d6be98..776321d1fc 100644
--- a/libavfilter/version.h
+++ b/libavfilter/version.h
@@ -32,7 +32,7 @@
 #include "version_major.h"
 
 #define LIBAVFILTER_VERSION_MINOR  10
-#define LIBAVFILTER_VERSION_MICRO 100
+#define LIBAVFILTER_VERSION_MICRO 101
 
 
 #define LIBAVFILTER_VERSION_INT AV_VERSION_INT(LIBAVFILTER_VERSION_MAJOR, \
diff --git a/libavfilter/vf_scale_cuda.c b/libavfilter/vf_scale_cuda.c
index 88a6e20610..5fd757161b 100644
--- a/libavfilter/vf_scale_cuda.c
+++ b/libavfilter/vf_scale_cuda.c
@@ -39,17 +39,29 @@
 #include "cuda/load_helper.h"
 #include "vf_scale_cuda.h"
 
-static const enum AVPixelFormat supported_formats[] = {
-    AV_PIX_FMT_YUV420P,
-    AV_PIX_FMT_NV12,
-    AV_PIX_FMT_YUV444P,
-    AV_PIX_FMT_P010,
-    AV_PIX_FMT_P016,
-    AV_PIX_FMT_YUV444P16,
-    AV_PIX_FMT_0RGB32,
-    AV_PIX_FMT_0BGR32,
-    AV_PIX_FMT_RGB32,
-    AV_PIX_FMT_BGR32,
+struct format_entry {
+    enum AVPixelFormat format;
+    char name[13];
+};
+
+static const struct format_entry supported_formats[] = {
+    {AV_PIX_FMT_YUV420P,  "planar8"},
+    {AV_PIX_FMT_YUV422P,  "planar8"},
+    {AV_PIX_FMT_YUV444P,  "planar8"},
+    {AV_PIX_FMT_YUV420P10,"planar10"},
+    {AV_PIX_FMT_YUV422P10,"planar10"},
+    {AV_PIX_FMT_YUV444P10,"planar10"},
+    {AV_PIX_FMT_YUV444P16,"planar16"},
+    {AV_PIX_FMT_NV12,     "semiplanar8"},
+    {AV_PIX_FMT_NV16,     "semiplanar8"},
+    {AV_PIX_FMT_P010,     "semiplanar10"},
+    {AV_PIX_FMT_P210,     "semiplanar10"},
+    {AV_PIX_FMT_P016,     "semiplanar16"},
+    {AV_PIX_FMT_P216,     "semiplanar16"},
+    {AV_PIX_FMT_0RGB32,   "bgr0"},
+    {AV_PIX_FMT_0BGR32,   "rgb0"},
+    {AV_PIX_FMT_RGB32,    "bgra"},
+    {AV_PIX_FMT_BGR32,    "rgba"},
 };
 
 #define DIV_UP(a, b) ( ((a) + (b) - 1) / (b) )
@@ -184,14 +196,20 @@ fail:
 
 static int format_is_supported(enum AVPixelFormat fmt)
 {
-    int i;
-
-    for (i = 0; i < FF_ARRAY_ELEMS(supported_formats); i++)
-        if (supported_formats[i] == fmt)
+    for (int i = 0; i < FF_ARRAY_ELEMS(supported_formats); i++)
+        if (supported_formats[i].format == fmt)
             return 1;
     return 0;
 }
 
+static const char* get_format_name(enum AVPixelFormat fmt)
+{
+    for (int i = 0; i < FF_ARRAY_ELEMS(supported_formats); i++)
+        if (supported_formats[i].format == fmt)
+            return supported_formats[i].name;
+    return NULL;
+}
+
 static av_cold void set_format_info(AVFilterContext *ctx, enum AVPixelFormat 
in_format, enum AVPixelFormat out_format)
 {
     CUDAScaleContext *s = ctx->priv;
@@ -284,8 +302,8 @@ static av_cold int cudascale_load_functions(AVFilterContext 
*ctx)
     char buf[128];
     int ret;
 
-    const char *in_fmt_name = av_get_pix_fmt_name(s->in_fmt);
-    const char *out_fmt_name = av_get_pix_fmt_name(s->out_fmt);
+    const char *in_fmt_name = get_format_name(s->in_fmt);
+    const char *out_fmt_name = get_format_name(s->out_fmt);
 
     const char *function_infix = "";
 
@@ -335,11 +353,13 @@ static av_cold int 
cudascale_load_functions(AVFilterContext *ctx)
         ret = AVERROR(ENOSYS);
         goto fail;
     }
+    av_log(ctx, AV_LOG_DEBUG, "Luma filter: %s (%s -> %s)\n", buf, 
av_get_pix_fmt_name(s->in_fmt), av_get_pix_fmt_name(s->out_fmt));
 
     snprintf(buf, sizeof(buf), "Subsample_%s_%s_%s_uv", function_infix, 
in_fmt_name, out_fmt_name);
     ret = CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_uv, s->cu_module, buf));
     if (ret < 0)
         goto fail;
+    av_log(ctx, AV_LOG_DEBUG, "Chroma filter: %s (%s -> %s)\n", buf, 
av_get_pix_fmt_name(s->in_fmt), av_get_pix_fmt_name(s->out_fmt));
 
 fail:
     CHECK_CU(cu->cuCtxPopCurrent(&dummy));
@@ -416,26 +436,35 @@ fail:
 
 static int call_resize_kernel(AVFilterContext *ctx, CUfunction func,
                               CUtexObject src_tex[4], int src_left, int 
src_top, int src_width, int src_height,
-                              AVFrame *out_frame, int dst_width, int 
dst_height, int dst_pitch)
+                              AVFrame *out_frame, int dst_width, int 
dst_height, int dst_pitch, int mpeg_range)
 {
     CUDAScaleContext *s = ctx->priv;
     CudaFunctions *cu = s->hwctx->internal->cuda_dl;
 
-    CUdeviceptr dst_devptr[4] = {
-        (CUdeviceptr)out_frame->data[0], (CUdeviceptr)out_frame->data[1],
-        (CUdeviceptr)out_frame->data[2], (CUdeviceptr)out_frame->data[3]
+    CUDAScaleKernelParams params = {
+        .src_tex = {src_tex[0], src_tex[1], src_tex[2], src_tex[3]},
+        .dst = {
+            (CUdeviceptr)out_frame->data[0],
+            (CUdeviceptr)out_frame->data[1],
+            (CUdeviceptr)out_frame->data[2],
+            (CUdeviceptr)out_frame->data[3]
+        },
+        .dst_width = dst_width,
+        .dst_height = dst_height,
+        .dst_pitch = dst_pitch,
+        .src_left = src_left,
+        .src_top = src_top,
+        .src_width = src_width,
+        .src_height = src_height,
+        .param = s->param,
+        .mpeg_range = mpeg_range
     };
 
-    void *args_uchar[] = {
-        &src_tex[0], &src_tex[1], &src_tex[2], &src_tex[3],
-        &dst_devptr[0], &dst_devptr[1], &dst_devptr[2], &dst_devptr[3],
-        &dst_width, &dst_height, &dst_pitch,
-        &src_left, &src_top, &src_width, &src_height, &s->param
-    };
+    void *args[] = { &params };
 
     return CHECK_CU(cu->cuLaunchKernel(func,
                                        DIV_UP(dst_width, BLOCKX), 
DIV_UP(dst_height, BLOCKY), 1,
-                                       BLOCKX, BLOCKY, 1, 0, s->cu_stream, 
args_uchar, NULL));
+                                       BLOCKX, BLOCKY, 1, 0, s->cu_stream, 
args, NULL));
 }
 
 static int scalecuda_resize(AVFilterContext *ctx,
@@ -445,6 +474,7 @@ static int scalecuda_resize(AVFilterContext *ctx,
     CudaFunctions *cu = s->hwctx->internal->cuda_dl;
     CUcontext dummy, cuda_ctx = s->hwctx->cuda_ctx;
     int i, ret;
+    int mpeg_range = in->color_range != AVCOL_RANGE_JPEG;
 
     CUtexObject tex[4] = { 0, 0, 0, 0 };
 
@@ -489,7 +519,7 @@ static int scalecuda_resize(AVFilterContext *ctx,
     // scale primary plane(s). Usually Y (and A), or single plane of RGB 
frames.
     ret = call_resize_kernel(ctx, s->cu_func,
                              tex, in->crop_left, in->crop_top, crop_width, 
crop_height,
-                             out, out->width, out->height, out->linesize[0]);
+                             out, out->width, out->height, out->linesize[0], 
mpeg_range);
     if (ret < 0)
         goto exit;
 
@@ -503,7 +533,7 @@ static int scalecuda_resize(AVFilterContext *ctx,
                                  out,
                                  AV_CEIL_RSHIFT(out->width, 
s->out_desc->log2_chroma_w),
                                  AV_CEIL_RSHIFT(out->height, 
s->out_desc->log2_chroma_h),
-                                 out->linesize[1]);
+                                 out->linesize[1], mpeg_range);
         if (ret < 0)
             goto exit;
     }
diff --git a/libavfilter/vf_scale_cuda.cu b/libavfilter/vf_scale_cuda.cu
index 271b55cd5d..d674c0885a 100644
--- a/libavfilter/vf_scale_cuda.cu
+++ b/libavfilter/vf_scale_cuda.cu
@@ -35,9 +35,16 @@ using subsample_function_t = T (*)(cudaTextureObject_t tex, 
int xo, int yo,
 static const ushort mask_10bit = 0xFFC0;
 static const ushort mask_16bit = 0xFFFF;
 
-static inline __device__ ushort conv_8to16(uchar in, ushort mask)
+static inline __device__ ushort conv_8to16(uchar in, ushort mask, int 
mpeg_range)
 {
-    return ((ushort)in | ((ushort)in << 8)) & mask;
+    ushort shifted = (ushort)in << 8;
+    return mpeg_range ? shifted : ((shifted | ((ushort)in )) & mask);
+}
+
+static inline __device__ ushort conv_8to10pl(uchar in, int mpeg_range)
+{
+    ushort shifted = (ushort)in << 2;
+    return mpeg_range ? shifted : (shifted | ((ushort)in >> 6));
 }
 
 static inline __device__ uchar conv_16to8(ushort in)
@@ -50,9 +57,21 @@ static inline __device__ uchar conv_10to8(ushort in)
     return in >> 8;
 }
 
-static inline __device__ ushort conv_10to16(ushort in)
+static inline __device__ uchar conv_10to8pl(ushort in)
+{
+    return in >> 2;
+}
+
+static inline __device__ ushort conv_10to16(ushort in, int mpeg_range)
 {
-    return in | (in >> 10);
+    ushort shifted = (in >> 10);
+    return mpeg_range ? in : (in | shifted);
+}
+
+static inline __device__ ushort conv_10to16pl(ushort in, int mpeg_range)
+{
+    ushort shifted = (in << 6);
+    return mpeg_range ? shifted : (shifted | (in >> 4));
 }
 
 static inline __device__ ushort conv_16to10(ushort in)
@@ -60,12 +79,18 @@ static inline __device__ ushort conv_16to10(ushort in)
     return in & mask_10bit;
 }
 
+static inline __device__ ushort conv_16to10pl(ushort in)
+{
+    return in >> 6;
+}
+
 #define DEF_F(N, T) \
     template<subsample_function_t<in_T> subsample_func_y,                      
                \
              subsample_function_t<in_T_uv> subsample_func_uv>                  
                \
     __device__ static inline void N(cudaTextureObject_t src_tex[4], T *dst[4], 
int xo, int yo, \
                                     int dst_width, int dst_height, int 
dst_pitch,              \
-                                    int src_left, int src_top, int src_width, 
int src_height, float param)
+                                    int src_left, int src_top, int src_width, 
int src_height,  \
+                                    float param, int mpeg_range)
 
 #define SUB_F(m, plane) \
     subsample_func_##m(src_tex[plane], xo, yo, \
@@ -81,9 +106,9 @@ static inline __device__ ushort conv_16to10(ushort in)
 #define DEFAULT_DST(n) \
     dst[n][yo*FIXED_PITCH+xo]
 
-// yuv420p->X
+// planar8->X
 
-struct Convert_yuv420p_yuv420p
+struct Convert_planar8_planar8
 {
     static const int in_bit_depth = 8;
     typedef uchar in_T;
@@ -103,71 +128,69 @@ struct Convert_yuv420p_yuv420p
     }
 };
 
-struct Convert_yuv420p_nv12
+struct Convert_planar8_planar10
 {
     static const int in_bit_depth = 8;
     typedef uchar in_T;
     typedef uchar in_T_uv;
-    typedef uchar out_T;
-    typedef uchar2 out_T_uv;
+    typedef ushort out_T;
+    typedef ushort out_T_uv;
 
     DEF_F(Convert, out_T)
     {
-        DEFAULT_DST(0) = SUB_F(y, 0);
+        DEFAULT_DST(0) = conv_8to10pl(SUB_F(y, 0), mpeg_range);
     }
 
     DEF_F(Convert_uv, out_T_uv)
     {
-        DEFAULT_DST(1) = make_uchar2(
-            SUB_F(uv, 1),
-            SUB_F(uv, 2)
-        );
+        DEFAULT_DST(1) = conv_8to10pl(SUB_F(uv, 1), mpeg_range);
+        DEFAULT_DST(2) = conv_8to10pl(SUB_F(uv, 2), mpeg_range);
     }
 };
 
-struct Convert_yuv420p_yuv444p
+struct Convert_planar8_planar16
 {
     static const int in_bit_depth = 8;
     typedef uchar in_T;
     typedef uchar in_T_uv;
-    typedef uchar out_T;
-    typedef uchar out_T_uv;
+    typedef ushort out_T;
+    typedef ushort out_T_uv;
 
     DEF_F(Convert, out_T)
     {
-        DEFAULT_DST(0) = SUB_F(y, 0);
+        DEFAULT_DST(0) = conv_8to16(SUB_F(y, 0), mask_16bit, mpeg_range);
     }
 
     DEF_F(Convert_uv, out_T_uv)
     {
-        DEFAULT_DST(1) = SUB_F(uv, 1);
-        DEFAULT_DST(2) = SUB_F(uv, 2);
+        DEFAULT_DST(1) = conv_8to16(SUB_F(uv, 1), mask_16bit, mpeg_range);
+        DEFAULT_DST(2) = conv_8to16(SUB_F(uv, 2), mask_16bit, mpeg_range);
     }
 };
 
-struct Convert_yuv420p_p010le
+struct Convert_planar8_semiplanar8
 {
     static const int in_bit_depth = 8;
     typedef uchar in_T;
     typedef uchar in_T_uv;
-    typedef ushort out_T;
-    typedef ushort2 out_T_uv;
+    typedef uchar out_T;
+    typedef uchar2 out_T_uv;
 
     DEF_F(Convert, out_T)
     {
-        DEFAULT_DST(0) = conv_8to16(SUB_F(y, 0), mask_10bit);
+        DEFAULT_DST(0) = SUB_F(y, 0);
     }
 
     DEF_F(Convert_uv, out_T_uv)
     {
-        DEFAULT_DST(1) = make_ushort2(
-            conv_8to16(SUB_F(uv, 1), mask_10bit),
-            conv_8to16(SUB_F(uv, 2), mask_10bit)
+        DEFAULT_DST(1) = make_uchar2(
+            SUB_F(uv, 1),
+            SUB_F(uv, 2)
         );
     }
 };
 
-struct Convert_yuv420p_p016le
+struct Convert_planar8_semiplanar10
 {
     static const int in_bit_depth = 8;
     typedef uchar in_T;
@@ -177,68 +200,71 @@ struct Convert_yuv420p_p016le
 
     DEF_F(Convert, out_T)
     {
-        DEFAULT_DST(0) = conv_8to16(SUB_F(y, 0), mask_16bit);
+        DEFAULT_DST(0) = conv_8to16(SUB_F(y, 0), mask_10bit, mpeg_range);
     }
 
     DEF_F(Convert_uv, out_T_uv)
     {
         DEFAULT_DST(1) = make_ushort2(
-            conv_8to16(SUB_F(uv, 1), mask_16bit),
-            conv_8to16(SUB_F(uv, 2), mask_16bit)
+            conv_8to16(SUB_F(uv, 1), mask_10bit, mpeg_range),
+            conv_8to16(SUB_F(uv, 2), mask_10bit, mpeg_range)
         );
     }
 };
 
-struct Convert_yuv420p_yuv444p16le
+struct Convert_planar8_semiplanar16
 {
     static const int in_bit_depth = 8;
     typedef uchar in_T;
     typedef uchar in_T_uv;
     typedef ushort out_T;
-    typedef ushort out_T_uv;
+    typedef ushort2 out_T_uv;
 
     DEF_F(Convert, out_T)
     {
-        DEFAULT_DST(0) = conv_8to16(SUB_F(y, 0), mask_16bit);
+        DEFAULT_DST(0) = conv_8to16(SUB_F(y, 0), mask_16bit, mpeg_range);
     }
 
     DEF_F(Convert_uv, out_T_uv)
     {
-        DEFAULT_DST(1) = conv_8to16(SUB_F(uv, 1), mask_16bit);
-        DEFAULT_DST(2) = conv_8to16(SUB_F(uv, 2), mask_16bit);
+        DEFAULT_DST(1) = make_ushort2(
+            conv_8to16(SUB_F(uv, 1), mask_16bit, mpeg_range),
+            conv_8to16(SUB_F(uv, 2), mask_16bit, mpeg_range)
+        );
     }
 };
 
-// nv12->X
 
-struct Convert_nv12_yuv420p
+
+// planar10->X
+
+struct Convert_planar10_planar8
 {
-    static const int in_bit_depth = 8;
-    typedef uchar in_T;
-    typedef uchar2 in_T_uv;
+    static const int in_bit_depth = 10;
+    typedef ushort in_T;
+    typedef ushort in_T_uv;
     typedef uchar out_T;
     typedef uchar out_T_uv;
 
     DEF_F(Convert, out_T)
     {
-        DEFAULT_DST(0) = SUB_F(y, 0);
+        DEFAULT_DST(0) = conv_10to8pl(SUB_F(y, 0));
     }
 
     DEF_F(Convert_uv, out_T_uv)
     {
-        in_T_uv res = SUB_F(uv, 1);
-        DEFAULT_DST(1) = res.x;
-        DEFAULT_DST(2) = res.y;
+        DEFAULT_DST(1) = conv_10to8pl(SUB_F(uv, 1));
+        DEFAULT_DST(2) = conv_10to8pl(SUB_F(uv, 2));
     }
 };
 
-struct Convert_nv12_nv12
+struct Convert_planar10_planar10
 {
-    static const int in_bit_depth = 8;
-    typedef uchar in_T;
-    typedef uchar2 in_T_uv;
-    typedef uchar out_T;
-    typedef uchar2 out_T_uv;
+    static const int in_bit_depth = 10;
+    typedef ushort in_T;
+    typedef ushort in_T_uv;
+    typedef ushort out_T;
+    typedef ushort out_T_uv;
 
     DEF_F(Convert, out_T)
     {
@@ -248,148 +274,145 @@ struct Convert_nv12_nv12
     DEF_F(Convert_uv, out_T_uv)
     {
         DEFAULT_DST(1) = SUB_F(uv, 1);
+        DEFAULT_DST(2) = SUB_F(uv, 2);
     }
 };
 
-struct Convert_nv12_yuv444p
+struct Convert_planar10_planar16
 {
-    static const int in_bit_depth = 8;
-    typedef uchar in_T;
-    typedef uchar2 in_T_uv;
-    typedef uchar out_T;
-    typedef uchar out_T_uv;
+    static const int in_bit_depth = 10;
+    typedef ushort in_T;
+    typedef ushort in_T_uv;
+    typedef ushort out_T;
+    typedef ushort out_T_uv;
 
     DEF_F(Convert, out_T)
     {
-        DEFAULT_DST(0) = SUB_F(y, 0);
+        DEFAULT_DST(0) = conv_10to16pl(SUB_F(y, 0), mpeg_range);
     }
 
     DEF_F(Convert_uv, out_T_uv)
     {
-        in_T_uv res = SUB_F(uv, 1);
-        DEFAULT_DST(1) = res.x;
-        DEFAULT_DST(2) = res.y;
+        DEFAULT_DST(1) = conv_10to16pl(SUB_F(uv, 1), mpeg_range);
+        DEFAULT_DST(2) = conv_10to16pl(SUB_F(uv, 2), mpeg_range);
     }
 };
 
-struct Convert_nv12_p010le
+struct Convert_planar10_semiplanar8
 {
-    static const int in_bit_depth = 8;
-    typedef uchar in_T;
-    typedef uchar2 in_T_uv;
-    typedef ushort out_T;
-    typedef ushort2 out_T_uv;
+    static const int in_bit_depth = 10;
+    typedef ushort in_T;
+    typedef ushort in_T_uv;
+    typedef uchar out_T;
+    typedef uchar2 out_T_uv;
 
     DEF_F(Convert, out_T)
     {
-        DEFAULT_DST(0) = conv_8to16(SUB_F(y, 0), mask_10bit);
+        DEFAULT_DST(0) = conv_10to8pl(SUB_F(y, 0));
     }
 
     DEF_F(Convert_uv, out_T_uv)
     {
-        in_T_uv res = SUB_F(uv, 1);
-        DEFAULT_DST(1) = make_ushort2(
-            conv_8to16(res.x, mask_10bit),
-            conv_8to16(res.y, mask_10bit)
+        DEFAULT_DST(1) = make_uchar2(
+            conv_10to8pl(SUB_F(uv, 1)),
+            conv_10to8pl(SUB_F(uv, 2))
         );
     }
 };
 
-struct Convert_nv12_p016le
+struct Convert_planar10_semiplanar10
 {
-    static const int in_bit_depth = 8;
-    typedef uchar in_T;
-    typedef uchar2 in_T_uv;
+    static const int in_bit_depth = 10;
+    typedef ushort in_T;
+    typedef ushort in_T_uv;
     typedef ushort out_T;
     typedef ushort2 out_T_uv;
 
     DEF_F(Convert, out_T)
     {
-        DEFAULT_DST(0) = conv_8to16(SUB_F(y, 0), mask_16bit);
+        DEFAULT_DST(0) = (SUB_F(y, 0) << 6);
     }
 
     DEF_F(Convert_uv, out_T_uv)
     {
-        in_T_uv res = SUB_F(uv, 1);
         DEFAULT_DST(1) = make_ushort2(
-            conv_8to16(res.x, mask_16bit),
-            conv_8to16(res.y, mask_16bit)
+            (SUB_F(uv, 1) << 6),
+            (SUB_F(uv, 2) << 6)
         );
     }
 };
 
-struct Convert_nv12_yuv444p16le
+struct Convert_planar10_semiplanar16
 {
-    static const int in_bit_depth = 8;
-    typedef uchar in_T;
-    typedef uchar2 in_T_uv;
+    static const int in_bit_depth = 10;
+    typedef ushort in_T;
+    typedef ushort in_T_uv;
     typedef ushort out_T;
-    typedef ushort out_T_uv;
+    typedef ushort2 out_T_uv;
 
     DEF_F(Convert, out_T)
     {
-        DEFAULT_DST(0) = conv_8to16(SUB_F(y, 0), mask_16bit);
+        DEFAULT_DST(0) = conv_10to16pl(SUB_F(y, 0), mpeg_range);
     }
 
     DEF_F(Convert_uv, out_T_uv)
     {
-        in_T_uv res = SUB_F(uv, 1);
-        DEFAULT_DST(1) = conv_8to16(res.x, mask_16bit);
-        DEFAULT_DST(2) = conv_8to16(res.y, mask_16bit);
+        DEFAULT_DST(1) = make_ushort2(
+            conv_10to16pl(SUB_F(uv, 1), mpeg_range),
+            conv_10to16pl(SUB_F(uv, 2), mpeg_range)
+        );
     }
 };
 
-// yuv444p->X
+// planar16->X
 
-struct Convert_yuv444p_yuv420p
+struct Convert_planar16_planar8
 {
-    static const int in_bit_depth = 8;
-    typedef uchar in_T;
-    typedef uchar in_T_uv;
+    static const int in_bit_depth = 16;
+    typedef ushort in_T;
+    typedef ushort in_T_uv;
     typedef uchar out_T;
     typedef uchar out_T_uv;
 
     DEF_F(Convert, out_T)
     {
-        DEFAULT_DST(0) = SUB_F(y, 0);
+        DEFAULT_DST(0) = conv_16to8(SUB_F(y, 0));
     }
 
     DEF_F(Convert_uv, out_T_uv)
     {
-        DEFAULT_DST(1) = SUB_F(uv, 1);
-        DEFAULT_DST(2) = SUB_F(uv, 2);
+        DEFAULT_DST(1) = conv_16to8(SUB_F(uv, 1));
+        DEFAULT_DST(2) = conv_16to8(SUB_F(uv, 2));
     }
 };
 
-struct Convert_yuv444p_nv12
+struct Convert_planar16_planar10
 {
-    static const int in_bit_depth = 8;
-    typedef uchar in_T;
-    typedef uchar in_T_uv;
-    typedef uchar out_T;
-    typedef uchar2 out_T_uv;
+    static const int in_bit_depth = 16;
+    typedef ushort in_T;
+    typedef ushort in_T_uv;
+    typedef ushort out_T;
+    typedef ushort out_T_uv;
 
     DEF_F(Convert, out_T)
     {
-        DEFAULT_DST(0) = SUB_F(y, 0);
+        DEFAULT_DST(0) = conv_16to10pl(SUB_F(y, 0));
     }
 
     DEF_F(Convert_uv, out_T_uv)
     {
-        DEFAULT_DST(1) = make_uchar2(
-            SUB_F(uv, 1),
-            SUB_F(uv, 2)
-        );
+        DEFAULT_DST(1) = conv_16to10pl(SUB_F(uv, 1));
+        DEFAULT_DST(2) = conv_16to10pl(SUB_F(uv, 2));
     }
 };
 
-struct Convert_yuv444p_yuv444p
+struct Convert_planar16_planar16
 {
-    static const int in_bit_depth = 8;
-    typedef uchar in_T;
-    typedef uchar in_T_uv;
-    typedef uchar out_T;
-    typedef uchar out_T_uv;
+    static const int in_bit_depth = 16;
+    typedef ushort in_T;
+    typedef ushort in_T_uv;
+    typedef ushort out_T;
+    typedef ushort out_T_uv;
 
     DEF_F(Convert, out_T)
     {
@@ -403,144 +426,144 @@ struct Convert_yuv444p_yuv444p
     }
 };
 
-struct Convert_yuv444p_p010le
+struct Convert_planar16_semiplanar8
 {
-    static const int in_bit_depth = 8;
-    typedef uchar in_T;
-    typedef uchar in_T_uv;
-    typedef ushort out_T;
-    typedef ushort2 out_T_uv;
+    static const int in_bit_depth = 16;
+    typedef ushort in_T;
+    typedef ushort in_T_uv;
+    typedef uchar out_T;
+    typedef uchar2 out_T_uv;
 
     DEF_F(Convert, out_T)
     {
-        DEFAULT_DST(0) = conv_8to16(SUB_F(y, 0), mask_10bit);
+        DEFAULT_DST(0) = conv_16to8(SUB_F(y, 0));
     }
 
     DEF_F(Convert_uv, out_T_uv)
     {
-        DEFAULT_DST(1) = make_ushort2(
-            conv_8to16(SUB_F(uv, 1), mask_10bit),
-            conv_8to16(SUB_F(uv, 2), mask_10bit)
+        DEFAULT_DST(1) = make_uchar2(
+            conv_16to8(SUB_F(uv, 1)),
+            conv_16to8(SUB_F(uv, 2))
         );
     }
 };
 
-struct Convert_yuv444p_p016le
+struct Convert_planar16_semiplanar10
 {
-    static const int in_bit_depth = 8;
-    typedef uchar in_T;
-    typedef uchar in_T_uv;
+    static const int in_bit_depth = 16;
+    typedef ushort in_T;
+    typedef ushort in_T_uv;
     typedef ushort out_T;
     typedef ushort2 out_T_uv;
 
     DEF_F(Convert, out_T)
     {
-        DEFAULT_DST(0) = conv_8to16(SUB_F(y, 0), mask_16bit);
+        DEFAULT_DST(0) = conv_16to10(SUB_F(y, 0));
     }
 
     DEF_F(Convert_uv, out_T_uv)
     {
         DEFAULT_DST(1) = make_ushort2(
-            conv_8to16(SUB_F(uv, 1), mask_16bit),
-            conv_8to16(SUB_F(uv, 2), mask_16bit)
+            conv_16to10(SUB_F(uv, 1)),
+            conv_16to10(SUB_F(uv, 2))
         );
     }
 };
 
-struct Convert_yuv444p_yuv444p16le
+struct Convert_planar16_semiplanar16
 {
-    static const int in_bit_depth = 8;
-    typedef uchar in_T;
-    typedef uchar in_T_uv;
+    static const int in_bit_depth = 16;
+    typedef ushort in_T;
+    typedef ushort in_T_uv;
     typedef ushort out_T;
-    typedef ushort out_T_uv;
+    typedef ushort2 out_T_uv;
 
     DEF_F(Convert, out_T)
     {
-        DEFAULT_DST(0) = conv_8to16(SUB_F(y, 0), mask_16bit);
+        DEFAULT_DST(0) = SUB_F(y, 0);
     }
 
     DEF_F(Convert_uv, out_T_uv)
     {
-        DEFAULT_DST(1) = conv_8to16(SUB_F(uv, 1), mask_16bit);
-        DEFAULT_DST(2) = conv_8to16(SUB_F(uv, 2), mask_16bit);
+        DEFAULT_DST(1) = make_ushort2(
+            SUB_F(uv, 1),
+            SUB_F(uv, 2)
+        );
     }
 };
 
-// p010le->X
+// semiplanar8->X
 
-struct Convert_p010le_yuv420p
+struct Convert_semiplanar8_planar8
 {
-    static const int in_bit_depth = 10;
-    typedef ushort in_T;
-    typedef ushort2 in_T_uv;
+    static const int in_bit_depth = 8;
+    typedef uchar in_T;
+    typedef uchar2 in_T_uv;
     typedef uchar out_T;
     typedef uchar out_T_uv;
 
     DEF_F(Convert, out_T)
     {
-        DEFAULT_DST(0) = conv_10to8(SUB_F(y, 0));
+        DEFAULT_DST(0) = SUB_F(y, 0);
     }
 
     DEF_F(Convert_uv, out_T_uv)
     {
         in_T_uv res = SUB_F(uv, 1);
-        DEFAULT_DST(1) = conv_10to8(res.x);
-        DEFAULT_DST(2) = conv_10to8(res.y);
+        DEFAULT_DST(1) = res.x;
+        DEFAULT_DST(2) = res.y;
     }
 };
 
-struct Convert_p010le_nv12
+struct Convert_semiplanar8_planar10
 {
-    static const int in_bit_depth = 10;
-    typedef ushort in_T;
-    typedef ushort2 in_T_uv;
-    typedef uchar out_T;
-    typedef uchar2 out_T_uv;
+    static const int in_bit_depth = 8;
+    typedef uchar in_T;
+    typedef uchar2 in_T_uv;
+    typedef ushort out_T;
+    typedef ushort out_T_uv;
 
     DEF_F(Convert, out_T)
     {
-        DEFAULT_DST(0) = conv_10to8(SUB_F(y, 0));
+        DEFAULT_DST(0) = conv_8to10pl(SUB_F(y, 0), mpeg_range);
     }
 
     DEF_F(Convert_uv, out_T_uv)
     {
         in_T_uv res = SUB_F(uv, 1);
-        DEFAULT_DST(1) = make_uchar2(
-            conv_10to8(res.x),
-            conv_10to8(res.y)
-        );
+        DEFAULT_DST(1) = conv_8to10pl(res.x, mpeg_range);
+        DEFAULT_DST(2) = conv_8to10pl(res.y, mpeg_range);
     }
 };
 
-struct Convert_p010le_yuv444p
+struct Convert_semiplanar8_planar16
 {
-    static const int in_bit_depth = 10;
-    typedef ushort in_T;
-    typedef ushort2 in_T_uv;
-    typedef uchar out_T;
-    typedef uchar out_T_uv;
+    static const int in_bit_depth = 8;
+    typedef uchar in_T;
+    typedef uchar2 in_T_uv;
+    typedef ushort out_T;
+    typedef ushort out_T_uv;
 
     DEF_F(Convert, out_T)
     {
-        DEFAULT_DST(0) = conv_10to8(SUB_F(y, 0));
+        DEFAULT_DST(0) = conv_8to16(SUB_F(y, 0), mask_16bit, mpeg_range);
     }
 
     DEF_F(Convert_uv, out_T_uv)
     {
         in_T_uv res = SUB_F(uv, 1);
-        DEFAULT_DST(1) = conv_10to8(res.x);
-        DEFAULT_DST(2) = conv_10to8(res.y);
+        DEFAULT_DST(1) = conv_8to16(res.x, mask_16bit, mpeg_range);
+        DEFAULT_DST(2) = conv_8to16(res.y, mask_16bit, mpeg_range);
     }
 };
 
-struct Convert_p010le_p010le
+struct Convert_semiplanar8_semiplanar8
 {
-    static const int in_bit_depth = 10;
-    typedef ushort in_T;
-    typedef ushort2 in_T_uv;
-    typedef ushort out_T;
-    typedef ushort2 out_T_uv;
+    static const int in_bit_depth = 8;
+    typedef uchar in_T;
+    typedef uchar2 in_T_uv;
+    typedef uchar out_T;
+    typedef uchar2 out_T_uv;
 
     DEF_F(Convert, out_T)
     {
@@ -553,55 +576,57 @@ struct Convert_p010le_p010le
     }
 };
 
-struct Convert_p010le_p016le
+struct Convert_semiplanar8_semiplanar10
 {
-    static const int in_bit_depth = 10;
-    typedef ushort in_T;
-    typedef ushort2 in_T_uv;
+    static const int in_bit_depth = 8;
+    typedef uchar in_T;
+    typedef uchar2 in_T_uv;
     typedef ushort out_T;
     typedef ushort2 out_T_uv;
 
     DEF_F(Convert, out_T)
     {
-        DEFAULT_DST(0) = conv_10to16(SUB_F(y, 0));
+        DEFAULT_DST(0) = conv_8to16(SUB_F(y, 0), mask_10bit, mpeg_range);
     }
 
     DEF_F(Convert_uv, out_T_uv)
     {
         in_T_uv res = SUB_F(uv, 1);
         DEFAULT_DST(1) = make_ushort2(
-            conv_10to16(res.x),
-            conv_10to16(res.y)
+            conv_8to16(res.x, mask_10bit, mpeg_range),
+            conv_8to16(res.y, mask_10bit, mpeg_range)
         );
     }
 };
 
-struct Convert_p010le_yuv444p16le
+struct Convert_semiplanar8_semiplanar16
 {
-    static const int in_bit_depth = 10;
-    typedef ushort in_T;
-    typedef ushort2 in_T_uv;
+    static const int in_bit_depth = 8;
+    typedef uchar in_T;
+    typedef uchar2 in_T_uv;
     typedef ushort out_T;
-    typedef ushort out_T_uv;
+    typedef ushort2 out_T_uv;
 
     DEF_F(Convert, out_T)
     {
-        DEFAULT_DST(0) = conv_10to16(SUB_F(y, 0));
+        DEFAULT_DST(0) = conv_8to16(SUB_F(y, 0), mask_16bit, mpeg_range);
     }
 
     DEF_F(Convert_uv, out_T_uv)
     {
         in_T_uv res = SUB_F(uv, 1);
-        DEFAULT_DST(1) = conv_10to16(res.x);
-        DEFAULT_DST(2) = conv_10to16(res.y);
+        DEFAULT_DST(1) = make_ushort2(
+            conv_8to16(res.x, mask_16bit, mpeg_range),
+            conv_8to16(res.y, mask_16bit, mpeg_range)
+        );
     }
 };
 
-// p016le->X
+// semiplanar10->X
 
-struct Convert_p016le_yuv420p
+struct Convert_semiplanar10_planar8
 {
-    static const int in_bit_depth = 16;
+    static const int in_bit_depth = 10;
     typedef ushort in_T;
     typedef ushort2 in_T_uv;
     typedef uchar out_T;
@@ -609,87 +634,85 @@ struct Convert_p016le_yuv420p
 
     DEF_F(Convert, out_T)
     {
-        DEFAULT_DST(0) = conv_16to8(SUB_F(y, 0));
+        DEFAULT_DST(0) = conv_10to8(SUB_F(y, 0));
     }
 
     DEF_F(Convert_uv, out_T_uv)
     {
         in_T_uv res = SUB_F(uv, 1);
-        DEFAULT_DST(1) = conv_16to8(res.x);
-        DEFAULT_DST(2) = conv_16to8(res.y);
+        DEFAULT_DST(1) = conv_10to8(res.x);
+        DEFAULT_DST(2) = conv_10to8(res.y);
     }
 };
 
-struct Convert_p016le_nv12
+struct Convert_semiplanar10_planar10
 {
-    static const int in_bit_depth = 16;
+    static const int in_bit_depth = 10;
     typedef ushort in_T;
     typedef ushort2 in_T_uv;
-    typedef uchar out_T;
-    typedef uchar2 out_T_uv;
+    typedef ushort out_T;
+    typedef ushort out_T_uv;
 
     DEF_F(Convert, out_T)
     {
-        DEFAULT_DST(0) = conv_16to8(SUB_F(y, 0));
+        DEFAULT_DST(0) = SUB_F(y, 0) >> 6;
     }
 
     DEF_F(Convert_uv, out_T_uv)
     {
         in_T_uv res = SUB_F(uv, 1);
-        DEFAULT_DST(1) = make_uchar2(
-            conv_16to8(res.x),
-            conv_16to8(res.y)
-        );
+        DEFAULT_DST(1) = res.x >> 6;
+        DEFAULT_DST(2) = res.y >> 6;
     }
 };
 
-struct Convert_p016le_yuv444p
+struct Convert_semiplanar10_planar16
 {
-    static const int in_bit_depth = 16;
+    static const int in_bit_depth = 10;
     typedef ushort in_T;
     typedef ushort2 in_T_uv;
-    typedef uchar out_T;
-    typedef uchar out_T_uv;
+    typedef ushort out_T;
+    typedef ushort out_T_uv;
 
     DEF_F(Convert, out_T)
     {
-        DEFAULT_DST(0) = conv_16to8(SUB_F(y, 0));
+        DEFAULT_DST(0) = conv_10to16(SUB_F(y, 0), mpeg_range);
     }
 
     DEF_F(Convert_uv, out_T_uv)
     {
         in_T_uv res = SUB_F(uv, 1);
-        DEFAULT_DST(1) = conv_16to8(res.x);
-        DEFAULT_DST(2) = conv_16to8(res.y);
+        DEFAULT_DST(1) = conv_10to16(res.x, mpeg_range);
+        DEFAULT_DST(2) = conv_10to16(res.y, mpeg_range);
     }
 };
 
-struct Convert_p016le_p010le
+struct Convert_semiplanar10_semiplanar8
 {
-    static const int in_bit_depth = 16;
+    static const int in_bit_depth = 10;
     typedef ushort in_T;
     typedef ushort2 in_T_uv;
-    typedef ushort out_T;
-    typedef ushort2 out_T_uv;
+    typedef uchar out_T;
+    typedef uchar2 out_T_uv;
 
     DEF_F(Convert, out_T)
     {
-        DEFAULT_DST(0) = conv_16to10(SUB_F(y, 0));
+        DEFAULT_DST(0) = conv_10to8(SUB_F(y, 0));
     }
 
     DEF_F(Convert_uv, out_T_uv)
     {
         in_T_uv res = SUB_F(uv, 1);
-        DEFAULT_DST(1) = make_ushort2(
-            conv_16to10(res.x),
-            conv_16to10(res.y)
+        DEFAULT_DST(1) = make_uchar2(
+            conv_10to8(res.x),
+            conv_10to8(res.y)
         );
     }
 };
 
-struct Convert_p016le_p016le
+struct Convert_semiplanar10_semiplanar10
 {
-    static const int in_bit_depth = 16;
+    static const int in_bit_depth = 10;
     typedef ushort in_T;
     typedef ushort2 in_T_uv;
     typedef ushort out_T;
@@ -706,34 +729,37 @@ struct Convert_p016le_p016le
     }
 };
 
-struct Convert_p016le_yuv444p16le
+struct Convert_semiplanar10_semiplanar16
 {
-    static const int in_bit_depth = 16;
+    static const int in_bit_depth = 10;
     typedef ushort in_T;
     typedef ushort2 in_T_uv;
     typedef ushort out_T;
-    typedef ushort out_T_uv;
+    typedef ushort2 out_T_uv;
 
     DEF_F(Convert, out_T)
     {
-        DEFAULT_DST(0) = SUB_F(y, 0);
+        DEFAULT_DST(0) = conv_10to16(SUB_F(y, 0), mpeg_range);
     }
 
     DEF_F(Convert_uv, out_T_uv)
     {
         in_T_uv res = SUB_F(uv, 1);
-        DEFAULT_DST(1) = res.x;
-        DEFAULT_DST(2) = res.y;
+        DEFAULT_DST(1) = make_ushort2(
+            conv_10to16(res.x, mpeg_range),
+            conv_10to16(res.y, mpeg_range)
+        );
     }
 };
 
-// yuv444p16le->X
 
-struct Convert_yuv444p16le_yuv420p
+// semiplanar16->X
+
+struct Convert_semiplanar16_planar8
 {
     static const int in_bit_depth = 16;
     typedef ushort in_T;
-    typedef ushort in_T_uv;
+    typedef ushort2 in_T_uv;
     typedef uchar out_T;
     typedef uchar out_T_uv;
 
@@ -744,104 +770,107 @@ struct Convert_yuv444p16le_yuv420p
 
     DEF_F(Convert_uv, out_T_uv)
     {
-        DEFAULT_DST(1) = conv_16to8(SUB_F(uv, 1));
-        DEFAULT_DST(2) = conv_16to8(SUB_F(uv, 2));
+        in_T_uv res = SUB_F(uv, 1);
+        DEFAULT_DST(1) = conv_16to8(res.x);
+        DEFAULT_DST(2) = conv_16to8(res.y);
     }
 };
 
-struct Convert_yuv444p16le_nv12
+struct Convert_semiplanar16_planar10
 {
     static const int in_bit_depth = 16;
     typedef ushort in_T;
-    typedef ushort in_T_uv;
-    typedef uchar out_T;
-    typedef uchar2 out_T_uv;
+    typedef ushort2 in_T_uv;
+    typedef ushort out_T;
+    typedef ushort out_T_uv;
 
     DEF_F(Convert, out_T)
     {
-        DEFAULT_DST(0) = conv_16to8(SUB_F(y, 0));
+        DEFAULT_DST(0) = conv_16to10pl(SUB_F(y, 0));
     }
 
     DEF_F(Convert_uv, out_T_uv)
     {
-        DEFAULT_DST(1) = make_uchar2(
-            conv_16to8(SUB_F(uv, 1)),
-            conv_16to8(SUB_F(uv, 2))
-        );
+        in_T_uv res = SUB_F(uv, 1);
+        DEFAULT_DST(1) = conv_16to10pl(res.x);
+        DEFAULT_DST(2) = conv_16to10pl(res.y);
     }
 };
 
-struct Convert_yuv444p16le_yuv444p
+struct Convert_semiplanar16_planar16
 {
     static const int in_bit_depth = 16;
     typedef ushort in_T;
-    typedef ushort in_T_uv;
-    typedef uchar out_T;
-    typedef uchar out_T_uv;
+    typedef ushort2 in_T_uv;
+    typedef ushort out_T;
+    typedef ushort out_T_uv;
 
     DEF_F(Convert, out_T)
     {
-        DEFAULT_DST(0) = conv_16to8(SUB_F(y, 0));
+        DEFAULT_DST(0) = SUB_F(y, 0);
     }
 
     DEF_F(Convert_uv, out_T_uv)
     {
-        DEFAULT_DST(1) = conv_16to8(SUB_F(uv, 1));
-        DEFAULT_DST(2) = conv_16to8(SUB_F(uv, 2));
+        in_T_uv res = SUB_F(uv, 1);
+        DEFAULT_DST(1) = res.x;
+        DEFAULT_DST(2) = res.y;
     }
 };
 
-struct Convert_yuv444p16le_p010le
+struct Convert_semiplanar16_semiplanar8
 {
     static const int in_bit_depth = 16;
     typedef ushort in_T;
-    typedef ushort in_T_uv;
-    typedef ushort out_T;
-    typedef ushort2 out_T_uv;
+    typedef ushort2 in_T_uv;
+    typedef uchar out_T;
+    typedef uchar2 out_T_uv;
 
     DEF_F(Convert, out_T)
     {
-        DEFAULT_DST(0) = conv_16to10(SUB_F(y, 0));
+        DEFAULT_DST(0) = conv_16to8(SUB_F(y, 0));
     }
 
     DEF_F(Convert_uv, out_T_uv)
     {
-        DEFAULT_DST(1) = make_ushort2(
-            conv_16to10(SUB_F(uv, 1)),
-            conv_16to10(SUB_F(uv, 2))
+        in_T_uv res = SUB_F(uv, 1);
+        DEFAULT_DST(1) = make_uchar2(
+            conv_16to8(res.x),
+            conv_16to8(res.y)
         );
     }
 };
 
-struct Convert_yuv444p16le_p016le
+struct Convert_semiplanar16_semiplanar10
 {
     static const int in_bit_depth = 16;
     typedef ushort in_T;
-    typedef ushort in_T_uv;
+    typedef ushort2 in_T_uv;
     typedef ushort out_T;
     typedef ushort2 out_T_uv;
 
     DEF_F(Convert, out_T)
     {
-        DEFAULT_DST(0) = SUB_F(y, 0);
+        DEFAULT_DST(0) = conv_16to10(SUB_F(y, 0));
     }
 
     DEF_F(Convert_uv, out_T_uv)
     {
+        in_T_uv res = SUB_F(uv, 1);
         DEFAULT_DST(1) = make_ushort2(
-            SUB_F(uv, 1),
-            SUB_F(uv, 2)
+            conv_16to10(res.x),
+            conv_16to10(res.y)
         );
     }
 };
 
-struct Convert_yuv444p16le_yuv444p16le
+struct Convert_semiplanar16_semiplanar16
 {
     static const int in_bit_depth = 16;
     typedef ushort in_T;
-    typedef ushort in_T_uv;
+    typedef ushort2 in_T_uv;
     typedef ushort out_T;
-    typedef ushort out_T_uv;
+    typedef ushort2 out_T_uv;
 
     DEF_F(Convert, out_T)
     {
@@ -851,7 +880,6 @@ struct Convert_yuv444p16le_yuv444p16le
     DEF_F(Convert_uv, out_T_uv)
     {
         DEFAULT_DST(1) = SUB_F(uv, 1);
-        DEFAULT_DST(2) = SUB_F(uv, 2);
     }
 };
 
@@ -930,7 +958,7 @@ struct Convert_bgr0_bgra
             res.x,
             res.y,
             res.z,
-            1
+            0xFF
         );
     }
 
@@ -954,7 +982,7 @@ struct Convert_bgr0_rgba
             res.z,
             res.y,
             res.x,
-            1
+            0xFF
         );
     }
 
@@ -978,7 +1006,7 @@ struct Convert_rgb0_bgra
             res.z,
             res.y,
             res.x,
-            1
+            0xFF
         );
     }
 
@@ -1002,7 +1030,7 @@ struct Convert_rgb0_rgba
             res.x,
             res.y,
             res.z,
-            1
+            0xFF
         );
     }
 
@@ -1147,25 +1175,26 @@ __device__ static inline T 
Subsample_Bicubic(cudaTextureObject_t tex,
 
 /// --- FUNCTION EXPORTS ---
 
-#define KERNEL_ARGS(T) \
-    cudaTextureObject_t src_tex_0, cudaTextureObject_t src_tex_1, \
-    cudaTextureObject_t src_tex_2, cudaTextureObject_t src_tex_3, \
-    T *dst_0, T *dst_1, T *dst_2, T *dst_3,                       \
-    int dst_width, int dst_height, int dst_pitch,                 \
-    int src_left, int src_top, int src_width, int src_height, float param
+#define KERNEL_ARGS(T) CUDAScaleKernelParams params
 
 #define SUBSAMPLE(Convert, T) \
-    cudaTextureObject_t src_tex[4] =                    \
-        { src_tex_0, src_tex_1, src_tex_2, src_tex_3 }; \
-    T *dst[4] = { dst_0, dst_1, dst_2, dst_3 };         \
+    cudaTextureObject_t src_tex[4] = {                  \
+        params.src_tex[0], params.src_tex[1],           \
+        params.src_tex[2], params.src_tex[3]            \
+    };                                                  \
+    T *dst[4] = {                                       \
+        (T*)params.dst[0], (T*)params.dst[1],           \
+        (T*)params.dst[2], (T*)params.dst[3]            \
+    };                                                  \
     int xo = blockIdx.x * blockDim.x + threadIdx.x;     \
     int yo = blockIdx.y * blockDim.y + threadIdx.y;     \
-    if (yo >= dst_height || xo >= dst_width) return;    \
+    if (yo >= params.dst_height || xo >= params.dst_width) return; \
     Convert(                                            \
         src_tex, dst, xo, yo,                           \
-        dst_width, dst_height, dst_pitch,               \
-        src_left, src_top,                              \
-        src_width, src_height, param);
+        params.dst_width, params.dst_height, params.dst_pitch, \
+        params.src_left, params.src_top,                \
+        params.src_width, params.src_height,            \
+        params.param, params.mpeg_range);
 
 extern "C" {
 
@@ -1184,12 +1213,12 @@ extern "C" {
     NEAREST_KERNEL(C,_uv)
 
 #define NEAREST_KERNELS(C) \
-    NEAREST_KERNEL_RAW(yuv420p_ ## C)     \
-    NEAREST_KERNEL_RAW(nv12_ ## C)        \
-    NEAREST_KERNEL_RAW(yuv444p_ ## C)     \
-    NEAREST_KERNEL_RAW(p010le_ ## C)      \
-    NEAREST_KERNEL_RAW(p016le_ ## C)      \
-    NEAREST_KERNEL_RAW(yuv444p16le_ ## C)
+    NEAREST_KERNEL_RAW(planar8_ ## C)      \
+    NEAREST_KERNEL_RAW(planar10_ ## C)     \
+    NEAREST_KERNEL_RAW(planar16_ ## C)     \
+    NEAREST_KERNEL_RAW(semiplanar8_ ## C)  \
+    NEAREST_KERNEL_RAW(semiplanar10_ ## C) \
+    NEAREST_KERNEL_RAW(semiplanar16_ ## C)
 
 #define NEAREST_KERNELS_RGB(C) \
     NEAREST_KERNEL_RAW(rgb0_ ## C)  \
@@ -1197,12 +1226,12 @@ extern "C" {
     NEAREST_KERNEL_RAW(rgba_ ## C)  \
     NEAREST_KERNEL_RAW(bgra_ ## C)  \
 
-NEAREST_KERNELS(yuv420p)
-NEAREST_KERNELS(nv12)
-NEAREST_KERNELS(yuv444p)
-NEAREST_KERNELS(p010le)
-NEAREST_KERNELS(p016le)
-NEAREST_KERNELS(yuv444p16le)
+NEAREST_KERNELS(planar8)
+NEAREST_KERNELS(planar10)
+NEAREST_KERNELS(planar16)
+NEAREST_KERNELS(semiplanar8)
+NEAREST_KERNELS(semiplanar10)
+NEAREST_KERNELS(semiplanar16)
 
 NEAREST_KERNELS_RGB(rgb0)
 NEAREST_KERNELS_RGB(bgr0)
@@ -1224,12 +1253,12 @@ NEAREST_KERNELS_RGB(bgra)
     BILINEAR_KERNEL(C,_uv)
 
 #define BILINEAR_KERNELS(C) \
-    BILINEAR_KERNEL_RAW(yuv420p_ ## C)     \
-    BILINEAR_KERNEL_RAW(nv12_ ## C)        \
-    BILINEAR_KERNEL_RAW(yuv444p_ ## C)     \
-    BILINEAR_KERNEL_RAW(p010le_ ## C)      \
-    BILINEAR_KERNEL_RAW(p016le_ ## C)      \
-    BILINEAR_KERNEL_RAW(yuv444p16le_ ## C)
+    BILINEAR_KERNEL_RAW(planar8_ ## C)      \
+    BILINEAR_KERNEL_RAW(planar10_ ## C)     \
+    BILINEAR_KERNEL_RAW(planar16_ ## C)     \
+    BILINEAR_KERNEL_RAW(semiplanar8_ ## C)  \
+    BILINEAR_KERNEL_RAW(semiplanar10_ ## C) \
+    BILINEAR_KERNEL_RAW(semiplanar16_ ## C)
 
 #define BILINEAR_KERNELS_RGB(C)     \
     BILINEAR_KERNEL_RAW(rgb0_ ## C) \
@@ -1237,12 +1266,12 @@ NEAREST_KERNELS_RGB(bgra)
     BILINEAR_KERNEL_RAW(rgba_ ## C) \
     BILINEAR_KERNEL_RAW(bgra_ ## C)
 
-BILINEAR_KERNELS(yuv420p)
-BILINEAR_KERNELS(nv12)
-BILINEAR_KERNELS(yuv444p)
-BILINEAR_KERNELS(p010le)
-BILINEAR_KERNELS(p016le)
-BILINEAR_KERNELS(yuv444p16le)
+BILINEAR_KERNELS(planar8)
+BILINEAR_KERNELS(planar10)
+BILINEAR_KERNELS(planar16)
+BILINEAR_KERNELS(semiplanar8)
+BILINEAR_KERNELS(semiplanar10)
+BILINEAR_KERNELS(semiplanar16)
 
 BILINEAR_KERNELS_RGB(rgb0)
 BILINEAR_KERNELS_RGB(bgr0)
@@ -1264,12 +1293,12 @@ BILINEAR_KERNELS_RGB(bgra)
     BICUBIC_KERNEL(C,_uv)
 
 #define BICUBIC_KERNELS(C) \
-    BICUBIC_KERNEL_RAW(yuv420p_ ## C)     \
-    BICUBIC_KERNEL_RAW(nv12_ ## C)        \
-    BICUBIC_KERNEL_RAW(yuv444p_ ## C)     \
-    BICUBIC_KERNEL_RAW(p010le_ ## C)      \
-    BICUBIC_KERNEL_RAW(p016le_ ## C)      \
-    BICUBIC_KERNEL_RAW(yuv444p16le_ ## C)
+    BICUBIC_KERNEL_RAW(planar8_ ## C)      \
+    BICUBIC_KERNEL_RAW(planar10_ ## C)     \
+    BICUBIC_KERNEL_RAW(planar16_ ## C)     \
+    BICUBIC_KERNEL_RAW(semiplanar8_ ## C)  \
+    BICUBIC_KERNEL_RAW(semiplanar10_ ## C) \
+    BICUBIC_KERNEL_RAW(semiplanar16_ ## C)
 
 #define BICUBIC_KERNELS_RGB(C)      \
     BICUBIC_KERNEL_RAW(rgb0_ ## C)  \
@@ -1277,12 +1306,12 @@ BILINEAR_KERNELS_RGB(bgra)
     BICUBIC_KERNEL_RAW(rgba_ ## C)  \
     BICUBIC_KERNEL_RAW(bgra_ ## C)
 
-BICUBIC_KERNELS(yuv420p)
-BICUBIC_KERNELS(nv12)
-BICUBIC_KERNELS(yuv444p)
-BICUBIC_KERNELS(p010le)
-BICUBIC_KERNELS(p016le)
-BICUBIC_KERNELS(yuv444p16le)
+BICUBIC_KERNELS(planar8)
+BICUBIC_KERNELS(planar10)
+BICUBIC_KERNELS(planar16)
+BICUBIC_KERNELS(semiplanar8)
+BICUBIC_KERNELS(semiplanar10)
+BICUBIC_KERNELS(semiplanar16)
 
 BICUBIC_KERNELS_RGB(rgb0)
 BICUBIC_KERNELS_RGB(bgr0)
@@ -1304,12 +1333,12 @@ BICUBIC_KERNELS_RGB(bgra)
     LANCZOS_KERNEL(C,_uv)
 
 #define LANCZOS_KERNELS(C) \
-    LANCZOS_KERNEL_RAW(yuv420p_ ## C)     \
-    LANCZOS_KERNEL_RAW(nv12_ ## C)        \
-    LANCZOS_KERNEL_RAW(yuv444p_ ## C)     \
-    LANCZOS_KERNEL_RAW(p010le_ ## C)      \
-    LANCZOS_KERNEL_RAW(p016le_ ## C)      \
-    LANCZOS_KERNEL_RAW(yuv444p16le_ ## C)
+    LANCZOS_KERNEL_RAW(planar8_ ## C)      \
+    LANCZOS_KERNEL_RAW(planar10_ ## C)     \
+    LANCZOS_KERNEL_RAW(planar16_ ## C)     \
+    LANCZOS_KERNEL_RAW(semiplanar8_ ## C)  \
+    LANCZOS_KERNEL_RAW(semiplanar10_ ## C) \
+    LANCZOS_KERNEL_RAW(semiplanar16_ ## C)
 
 #define LANCZOS_KERNELS_RGB(C)      \
     LANCZOS_KERNEL_RAW(rgb0_ ## C)  \
@@ -1317,12 +1346,12 @@ BICUBIC_KERNELS_RGB(bgra)
     LANCZOS_KERNEL_RAW(rgba_ ## C)  \
     LANCZOS_KERNEL_RAW(bgra_ ## C)
 
-LANCZOS_KERNELS(yuv420p)
-LANCZOS_KERNELS(nv12)
-LANCZOS_KERNELS(yuv444p)
-LANCZOS_KERNELS(p010le)
-LANCZOS_KERNELS(p016le)
-LANCZOS_KERNELS(yuv444p16le)
+LANCZOS_KERNELS(planar8)
+LANCZOS_KERNELS(planar10)
+LANCZOS_KERNELS(planar16)
+LANCZOS_KERNELS(semiplanar8)
+LANCZOS_KERNELS(semiplanar10)
+LANCZOS_KERNELS(semiplanar16)
 
 LANCZOS_KERNELS_RGB(rgb0)
 LANCZOS_KERNELS_RGB(bgr0)
diff --git a/libavfilter/vf_scale_cuda.h b/libavfilter/vf_scale_cuda.h
index 40d5b9cfac..81fd8061e3 100644
--- a/libavfilter/vf_scale_cuda.h
+++ b/libavfilter/vf_scale_cuda.h
@@ -23,6 +23,28 @@
 #ifndef AVFILTER_SCALE_CUDA_H
 #define AVFILTER_SCALE_CUDA_H
 
+#if defined(__CUDACC__) || defined(__CUDA__)
+#include <stdint.h>
+typedef cudaTextureObject_t CUtexObject;
+typedef uint8_t* CUdeviceptr;
+#else
+#include <ffnvcodec/dynlink_cuda.h>
+#endif
+
 #define SCALE_CUDA_PARAM_DEFAULT 999999.0f
 
+typedef struct {
+    CUtexObject src_tex[4];
+    CUdeviceptr dst[4];
+    int dst_width;
+    int dst_height;
+    int dst_pitch;
+    int src_left;
+    int src_top;
+    int src_width;
+    int src_height;
+    float param;
+    int mpeg_range;
+} CUDAScaleKernelParams;
+
 #endif

commit 04b5e25d353a4b9de2a1bf0802a8138d42dfe85b
Author:     Diego de Souza <[email protected]>
AuthorDate: Thu Nov 13 09:49:45 2025 +0100
Commit:     Timo Rothenpieler <[email protected]>
CommitDate: Thu Nov 27 22:11:57 2025 +0100

    avfilter/hwupload_cuda: Expands pixel formats support
    
    Add support for uploading additional pixel formats to NVIDIA GPUs:
    - Planar formats (yuv420p10, yuv422p, yuv422p10, yuv444p10)
    - Semiplanar formats (nv16, p210, p216)
    
    Signed-off-by: Diego de Souza <[email protected]>

diff --git a/libavfilter/vf_hwupload_cuda.c b/libavfilter/vf_hwupload_cuda.c
index b505f8b298..34f959ca50 100644
--- a/libavfilter/vf_hwupload_cuda.c
+++ b/libavfilter/vf_hwupload_cuda.c
@@ -59,9 +59,9 @@ static int cudaupload_query_formats(const AVFilterContext 
*ctx,
     int ret;
 
     static const enum AVPixelFormat input_pix_fmts[] = {
-        AV_PIX_FMT_NV12, AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUVA420P, 
AV_PIX_FMT_YUV444P,
-        AV_PIX_FMT_P010, AV_PIX_FMT_P016, AV_PIX_FMT_YUV444P16,
-        AV_PIX_FMT_0RGB32, AV_PIX_FMT_0BGR32,
+        AV_PIX_FMT_NV12, AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUVA420P, 
AV_PIX_FMT_NV16, AV_PIX_FMT_YUV422P, AV_PIX_FMT_YUV444P,
+        AV_PIX_FMT_P010, AV_PIX_FMT_P016, AV_PIX_FMT_P210, AV_PIX_FMT_P216, 
AV_PIX_FMT_YUV420P10, AV_PIX_FMT_YUV422P10, AV_PIX_FMT_YUV444P10, 
AV_PIX_FMT_YUV444P16,
+        AV_PIX_FMT_0RGB32, AV_PIX_FMT_0BGR32, AV_PIX_FMT_RGB32, 
AV_PIX_FMT_BGR32,
 #if CONFIG_VULKAN
         AV_PIX_FMT_VULKAN,
 #endif

commit 9c76d7db863c11ff968d71e4970fc2359303b65f
Author:     Diego de Souza <[email protected]>
AuthorDate: Wed Nov 12 20:08:45 2025 +0100
Commit:     Timo Rothenpieler <[email protected]>
CommitDate: Thu Nov 27 22:11:57 2025 +0100

    avutil/hwcontext_cuda: Expands pixel formats support
    
    Add support for additional pixel formats in CUDA hardware context:
    - Planar formats (yuv420p10, yuv422p, yuv422p10, yuv444p10)
    - Semiplanar formats (nv16, p210, p216)
    
    Signed-off-by: Diego de Souza <[email protected]>

diff --git a/libavutil/hwcontext_cuda.c b/libavutil/hwcontext_cuda.c
index 10d3399537..b0b65b2446 100644
--- a/libavutil/hwcontext_cuda.c
+++ b/libavutil/hwcontext_cuda.c
@@ -50,6 +50,10 @@ static const enum AVPixelFormat supported_formats[] = {
     AV_PIX_FMT_P016,
     AV_PIX_FMT_P210,
     AV_PIX_FMT_P216,
+    AV_PIX_FMT_YUV422P,
+    AV_PIX_FMT_YUV420P10,
+    AV_PIX_FMT_YUV422P10,
+    AV_PIX_FMT_YUV444P10,
     AV_PIX_FMT_YUV444P10MSB,
     AV_PIX_FMT_YUV444P12MSB,
     AV_PIX_FMT_YUV444P16,
diff --git a/libavutil/version.h b/libavutil/version.h
index db250d5c9e..d058e94425 100644
--- a/libavutil/version.h
+++ b/libavutil/version.h
@@ -80,7 +80,7 @@
 
 #define LIBAVUTIL_VERSION_MAJOR  60
 #define LIBAVUTIL_VERSION_MINOR  19
-#define LIBAVUTIL_VERSION_MICRO 100
+#define LIBAVUTIL_VERSION_MICRO 101
 
 #define LIBAVUTIL_VERSION_INT   AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \
                                                LIBAVUTIL_VERSION_MINOR, \

-----------------------------------------------------------------------

Summary of changes:
 libavfilter/Makefile           |   1 +
 libavfilter/version.h          |   2 +-
 libavfilter/vf_hwupload_cuda.c |   6 +-
 libavfilter/vf_scale_cuda.c    |  90 ++++--
 libavfilter/vf_scale_cuda.cu   | 689 +++++++++++++++++++++--------------------
 libavfilter/vf_scale_cuda.h    |  22 ++
 libavutil/hwcontext_cuda.c     |   4 +
 libavutil/version.h            |   2 +-
 8 files changed, 451 insertions(+), 365 deletions(-)


hooks/post-receive
-- 

_______________________________________________
ffmpeg-cvslog mailing list -- [email protected]
To unsubscribe send an email to [email protected]

[FFmpeg-cvslog] [ffmpeg] branch master updated. 75b8567591 avfilter/scale_cuda: Add support for 4:2:2 chroma subsampling

Reply via email to