I have in mind a more general solution that handles 9, 12, 14 and 16- bit too, and 444p and maybe 420p
/Tomas
From 99cc73053cc9a544ae923e5c8e3f4686f3c05454 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomas=20H=C3=A4rdin?= <g...@haerdin.se> Date: Wed, 18 Jan 2023 17:28:53 +0100 Subject: [PATCH] sws/swscale_unscaled.c: Faster yuv422p10 -> yuv422p conversion Based on work by Paul B Mahol. --- libswscale/swscale_unscaled.c | 46 +++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/libswscale/swscale_unscaled.c b/libswscale/swscale_unscaled.c index 9af2e7ecc3..6c71ecb34d 100644 --- a/libswscale/swscale_unscaled.c +++ b/libswscale/swscale_unscaled.c @@ -371,6 +371,50 @@ static int yuv422pToUyvyWrapper(SwsContext *c, const uint8_t *src[], return srcSliceH; } +static int yuv422p10ToYuv422p(SwsContext *c, const uint8_t *src[], + int srcStride[], int srcSliceY, int srcSliceH, + uint8_t *dstParam[], int dstStride[]) +{ + const uint16_t *ysrc = (const uint16_t *)(src[0]); + const uint16_t *usrc = (const uint16_t *)(src[1]); + const uint16_t *vsrc = (const uint16_t *)(src[2]); + + uint8_t *ydst = dstParam[0] + dstStride[0] * srcSliceY; + uint8_t *udst = dstParam[1] + dstStride[1] * srcSliceY; + uint8_t *vdst = dstParam[2] + dstStride[2] * srcSliceY; + + for (int y = 0; y < srcSliceH; y++) { + int x = 0; + +#define BLOCK 4 + for (; x < (c->dstW / 2 / BLOCK)*BLOCK; x += BLOCK) { + for (int x2 = x; x2 < x + BLOCK; x2++) { + ydst[2*x2+0] = ysrc[2*x2+0] >> 2; + ydst[2*x2+1] = ysrc[2*x2+1] >> 2; + udst[x2] = usrc[x2] >> 2; + vdst[x2] = vsrc[x2] >> 2; + } + } + + for (; x < c->dstW / 2; x++) { + ydst[2*x+0] = ysrc[2*x+0] >> 2; + ydst[2*x+1] = ysrc[2*x+1] >> 2; + udst[x] = usrc[x] >> 2; + vdst[x] = vsrc[x] >> 2; + } + + ysrc += srcStride[0] / 2; + usrc += srcStride[1] / 2; + vsrc += srcStride[2] / 2; + + ydst += dstStride[0]; + udst += dstStride[1]; + vdst += dstStride[2]; + } + + return srcSliceH; +} + static int yuyvToYuv420Wrapper(SwsContext *c, const uint8_t *src[], int srcStride[], int srcSliceY, int srcSliceH, uint8_t *dstParam[], int dstStride[]) @@ -2223,6 +2267,8 @@ void ff_get_unscaled_swscale(SwsContext *c) c->convert_unscaled = planarCopyWrapper; } + if (srcFormat == AV_PIX_FMT_YUV422P10 && dstFormat == AV_PIX_FMT_YUV422P) + c->convert_unscaled = yuv422p10ToYuv422p; #if ARCH_PPC ff_get_unscaled_swscale_ppc(c); #elif ARCH_ARM -- 2.30.2
_______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".