I have in mind a more general solution that handles 9, 12, 14 and 16-
bit too, and 444p and maybe 420p

/Tomas
From 99cc73053cc9a544ae923e5c8e3f4686f3c05454 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tomas=20H=C3=A4rdin?= <g...@haerdin.se>
Date: Wed, 18 Jan 2023 17:28:53 +0100
Subject: [PATCH] sws/swscale_unscaled.c: Faster yuv422p10 -> yuv422p
 conversion

Based on work by Paul B Mahol.
---
 libswscale/swscale_unscaled.c | 46 +++++++++++++++++++++++++++++++++++
 1 file changed, 46 insertions(+)

diff --git a/libswscale/swscale_unscaled.c b/libswscale/swscale_unscaled.c
index 9af2e7ecc3..6c71ecb34d 100644
--- a/libswscale/swscale_unscaled.c
+++ b/libswscale/swscale_unscaled.c
@@ -371,6 +371,50 @@ static int yuv422pToUyvyWrapper(SwsContext *c, const uint8_t *src[],
     return srcSliceH;
 }
 
+static int yuv422p10ToYuv422p(SwsContext *c, const uint8_t *src[],
+                               int srcStride[], int srcSliceY, int srcSliceH,
+                               uint8_t *dstParam[], int dstStride[])
+{
+    const uint16_t *ysrc = (const uint16_t *)(src[0]);
+    const uint16_t *usrc = (const uint16_t *)(src[1]);
+    const uint16_t *vsrc = (const uint16_t *)(src[2]);
+
+    uint8_t *ydst = dstParam[0] + dstStride[0] * srcSliceY;
+    uint8_t *udst = dstParam[1] + dstStride[1] * srcSliceY;
+    uint8_t *vdst = dstParam[2] + dstStride[2] * srcSliceY;
+
+    for (int y = 0; y < srcSliceH; y++) {
+        int x = 0;
+
+#define BLOCK 4
+        for (; x < (c->dstW / 2 / BLOCK)*BLOCK; x += BLOCK) {
+            for (int x2 = x; x2 < x + BLOCK; x2++) {
+                ydst[2*x2+0] = ysrc[2*x2+0] >> 2;
+                ydst[2*x2+1] = ysrc[2*x2+1] >> 2;
+                udst[x2] = usrc[x2] >> 2;
+                vdst[x2] = vsrc[x2] >> 2;
+            }
+        }
+
+        for (; x < c->dstW / 2; x++) {
+            ydst[2*x+0] = ysrc[2*x+0] >> 2;
+            ydst[2*x+1] = ysrc[2*x+1] >> 2;
+            udst[x] = usrc[x] >> 2;
+            vdst[x] = vsrc[x] >> 2;
+        }
+
+        ysrc += srcStride[0] / 2;
+        usrc += srcStride[1] / 2;
+        vsrc += srcStride[2] / 2;
+
+        ydst += dstStride[0];
+        udst += dstStride[1];
+        vdst += dstStride[2];
+    }
+
+    return srcSliceH;
+}
+
 static int yuyvToYuv420Wrapper(SwsContext *c, const uint8_t *src[],
                                int srcStride[], int srcSliceY, int srcSliceH,
                                uint8_t *dstParam[], int dstStride[])
@@ -2223,6 +2267,8 @@ void ff_get_unscaled_swscale(SwsContext *c)
             c->convert_unscaled = planarCopyWrapper;
     }
 
+    if (srcFormat == AV_PIX_FMT_YUV422P10 && dstFormat == AV_PIX_FMT_YUV422P)
+        c->convert_unscaled = yuv422p10ToYuv422p;
 #if ARCH_PPC
     ff_get_unscaled_swscale_ppc(c);
 #elif ARCH_ARM
-- 
2.30.2

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

Reply via email to