W dniu 2017-09-26 o 13:31, Carl Eugen Hoyos pisze: > 2017-09-26 1:33 GMT+02:00 Mateusz <mateu...@poczta.onet.pl>: > >> I've sent C code patch 2017-09-06 (and nothing) so I thought that the >> problem is with speed. For simplicity I've attached this patch. > > You could (wait a day or two and) either add an option to > select your dithering code or put it under #ifdef so more > people can test it.
I've attached patch that do nothing unless you specify --extra-cflags="-DNEW_DITHER_COPY" or export CFLAGS="-DNEW_DITHER_COPY" >> In theory it is enough to make only dst = (src + dither)>>shift; >> -- white in limited range has 0 on bits to remove (235*4 for example) >> so overflow is impossible. For files with full range not marked as >> full range overflow is possible (for dither > 0) and white goes >> to black. tmp - (tmp>>dst_depth) undoing this overflow. > > (Not necessarily related, sorry if I misunderstand:) > Valid limited-range frames can contain some pixels with peak > values outside of the defined range. > > Carl Eugen OK, so this fight with possible overflow is even more needed. Mateusz
From 2c0adb2d9a0fc0fbbffc643d27860fbb779c08fc Mon Sep 17 00:00:00 2001 From: Mateusz <mateu...@poczta.onet.pl> Date: Tue, 26 Sep 2017 22:20:10 +0200 Subject: [PATCH] swscale: new precise DITHER_COPY macro (if "-DNEW_DITHER_COPY" CFLAGS) --- libswscale/swscale_unscaled.c | 47 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 46 insertions(+), 1 deletion(-) diff --git a/libswscale/swscale_unscaled.c b/libswscale/swscale_unscaled.c index ef36aec..0d41695 100644 --- a/libswscale/swscale_unscaled.c +++ b/libswscale/swscale_unscaled.c @@ -110,6 +110,7 @@ DECLARE_ALIGNED(8, static const uint8_t, dithers)[8][8][8]={ { 112, 16,104, 8,118, 22,110, 14,}, }}; +#ifndef NEW_DITHER_COPY static const uint16_t dither_scale[15][16]={ { 2, 3, 3, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,}, { 2, 3, 7, 7, 13, 13, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,}, @@ -127,7 +128,7 @@ static const uint16_t dither_scale[15][16]={ { 3, 5, 7, 9, 10, 12, 14, 14, 14, 14, 14, 14, 14, 15,32767,32767,}, { 3, 5, 7, 9, 11, 12, 14, 15, 15, 15, 15, 15, 15, 15, 16,65535,}, }; - +#endif static void fillPlane(uint8_t *plane, int stride, int width, int height, int y, uint8_t val) @@ -1501,6 +1502,7 @@ static int packedCopyWrapper(SwsContext *c, const uint8_t *src[], return srcSliceH; } +#ifndef NEW_DITHER_COPY #define DITHER_COPY(dst, dstStride, src, srcStride, bswap, dbswap)\ uint16_t scale= dither_scale[dst_depth-1][src_depth-1];\ int shift= src_depth-dst_depth + dither_scale[src_depth-2][dst_depth-1];\ @@ -1521,6 +1523,49 @@ static int packedCopyWrapper(SwsContext *c, const uint8_t *src[], dst += dstStride;\ src += srcStride;\ } +#else +#define DITHER_COPY(dst, dstStride, src, srcStride, bswap, dbswap)\ + unsigned shift= src_depth-dst_depth, tmp;\ + if (shiftonly) {\ + for (i = 0; i < height; i++) {\ + const uint8_t *dither= dithers[shift-1][i&7];\ + for (j = 0; j < length-7; j+=8){\ + tmp = (bswap(src[j+0]) + dither[0])>>shift; dst[j+0] = dbswap(tmp - (tmp>>dst_depth));\ + tmp = (bswap(src[j+1]) + dither[1])>>shift; dst[j+1] = dbswap(tmp - (tmp>>dst_depth));\ + tmp = (bswap(src[j+2]) + dither[2])>>shift; dst[j+2] = dbswap(tmp - (tmp>>dst_depth));\ + tmp = (bswap(src[j+3]) + dither[3])>>shift; dst[j+3] = dbswap(tmp - (tmp>>dst_depth));\ + tmp = (bswap(src[j+4]) + dither[4])>>shift; dst[j+4] = dbswap(tmp - (tmp>>dst_depth));\ + tmp = (bswap(src[j+5]) + dither[5])>>shift; dst[j+5] = dbswap(tmp - (tmp>>dst_depth));\ + tmp = (bswap(src[j+6]) + dither[6])>>shift; dst[j+6] = dbswap(tmp - (tmp>>dst_depth));\ + tmp = (bswap(src[j+7]) + dither[7])>>shift; dst[j+7] = dbswap(tmp - (tmp>>dst_depth));\ + }\ + for (; j < length; j++){\ + tmp = (bswap(src[j]) + dither[j&7])>>shift; dst[j] = dbswap(tmp - (tmp>>dst_depth));\ + }\ + dst += dstStride;\ + src += srcStride;\ + }\ + } else {\ + for (i = 0; i < height; i++) {\ + const uint8_t *dither= dithers[shift-1][i&7];\ + for (j = 0; j < length-7; j+=8){\ + tmp = bswap(src[j+0]); dst[j+0] = dbswap((tmp - (tmp>>dst_depth) + dither[0])>>shift);\ + tmp = bswap(src[j+1]); dst[j+1] = dbswap((tmp - (tmp>>dst_depth) + dither[1])>>shift);\ + tmp = bswap(src[j+2]); dst[j+2] = dbswap((tmp - (tmp>>dst_depth) + dither[2])>>shift);\ + tmp = bswap(src[j+3]); dst[j+3] = dbswap((tmp - (tmp>>dst_depth) + dither[3])>>shift);\ + tmp = bswap(src[j+4]); dst[j+4] = dbswap((tmp - (tmp>>dst_depth) + dither[4])>>shift);\ + tmp = bswap(src[j+5]); dst[j+5] = dbswap((tmp - (tmp>>dst_depth) + dither[5])>>shift);\ + tmp = bswap(src[j+6]); dst[j+6] = dbswap((tmp - (tmp>>dst_depth) + dither[6])>>shift);\ + tmp = bswap(src[j+7]); dst[j+7] = dbswap((tmp - (tmp>>dst_depth) + dither[7])>>shift);\ + }\ + for (; j < length; j++){\ + tmp = bswap(src[j]); dst[j] = dbswap((tmp - (tmp>>dst_depth) + dither[j&7])>>shift);\ + }\ + dst += dstStride;\ + src += srcStride;\ + }\ + } +#endif static int planarCopyWrapper(SwsContext *c, const uint8_t *src[], int srcStride[], int srcSliceY, int srcSliceH, -- 2.7.4
_______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel