On Fri, Oct 11, 2024 at 07:54:48PM -0300, James Almer wrote: > On 10/11/2024 7:46 PM, Michael Niedermayer wrote: > > On Tue, Oct 08, 2024 at 07:50:11PM -0300, James Almer wrote: > > > Signed-off-by: James Almer <jamr...@gmail.com> > > > --- > > > libswscale/output.c | 323 ++++++++++++----------- > > > libswscale/utils.c | 2 +- > > > tests/ref/fate/filter-pixdesc-ayuv | 1 + > > > tests/ref/fate/filter-pixfmts-copy | 1 + > > > tests/ref/fate/filter-pixfmts-crop | 1 + > > > tests/ref/fate/filter-pixfmts-field | 1 + > > > tests/ref/fate/filter-pixfmts-fieldorder | 1 + > > > tests/ref/fate/filter-pixfmts-hflip | 1 + > > > tests/ref/fate/filter-pixfmts-il | 1 + > > > tests/ref/fate/filter-pixfmts-null | 1 + > > > tests/ref/fate/filter-pixfmts-pad | 1 + > > > tests/ref/fate/filter-pixfmts-scale | 1 + > > > tests/ref/fate/filter-pixfmts-transpose | 1 + > > > tests/ref/fate/filter-pixfmts-vflip | 1 + > > > 14 files changed, 183 insertions(+), 154 deletions(-) > > > create mode 100644 tests/ref/fate/filter-pixdesc-ayuv > > > > > > diff --git a/libswscale/output.c b/libswscale/output.c > > > index c9dfd6f60a..328b108089 100644 > > > --- a/libswscale/output.c > > > +++ b/libswscale/output.c > > > @@ -2668,165 +2668,177 @@ yuv2xv36le_X_c(SwsContext *c, const int16_t > > > *lumFilter, > > > } > > > } > > > -static void > > > -yuv2vuyX_1_c(SwsContext *c, const int16_t *buf0, > > > - const int16_t *ubuf[2], const int16_t *vbuf[2], > > > - const int16_t *abuf0, uint8_t *dest, int dstW, > > > - int uvalpha, int y) > > > -{ > > > - int hasAlpha = !!abuf0; > > > - int i; > > > - > > > - if (uvalpha < 2048) { > > > - for (i = 0; i < dstW; i++) { > > > - int Y = (buf0[i] + 64) >> 7; > > > - int U = (ubuf[0][i] + 64) >> 7; > > > - int V = (vbuf[0][i] + 64) >> 7; > > > - int A = 255; > > > - > > > - if (Y & 0x100) > > > - Y = av_clip_uint8(Y); > > > - if (U & 0x100) > > > - U = av_clip_uint8(U); > > > - if (V & 0x100) > > > - V = av_clip_uint8(V); > > > - > > > - if (hasAlpha) { > > > - A = (abuf0[i] + 64) >> 7; > > > - if (A & 0x100) > > > - A = av_clip_uint8(A); > > > - } > > > - > > > - dest[4 * i ] = V; > > > - dest[4 * i + 1] = U; > > > - dest[4 * i + 2] = Y; > > > - dest[4 * i + 3] = A; > > > - } > > > - } else { > > > - for (i = 0; i < dstW; i++) { > > > - int Y = (buf0[i] + 64) >> 7; > > > - int U = (ubuf[0][i] + ubuf[1][i] + 128) >> 8; > > > - int V = (vbuf[0][i] + vbuf[1][i] + 128) >> 8; > > > - int A = 255; > > > - > > > - if (Y & 0x100) > > > - Y = av_clip_uint8(Y); > > > - if (U & 0x100) > > > - U = av_clip_uint8(U); > > > - if (V & 0x100) > > > - V = av_clip_uint8(V); > > > - > > > - if (hasAlpha) { > > > - A = (abuf0[i] + 64) >> 7; > > > - if (A & 0x100) > > > - A = av_clip_uint8(A); > > > - } > > > - > > > - dest[4 * i ] = V; > > > - dest[4 * i + 1] = U; > > > - dest[4 * i + 2] = Y; > > > - dest[4 * i + 3] = A; > > > - } > > > - } > > > +#define AYUV_1_WRAPPER(fmt, C0, C1, C2, C3) \ > > > +static void \ > > > +yuv2 ## fmt ##_1_c(SwsContext *c, const int16_t *buf0, \ > > > + const int16_t *ubuf[2], const int16_t *vbuf[2], \ > > > + const int16_t *abuf0, uint8_t *dest, int dstW, \ > > > + int uvalpha, int y) \ > > > +{ \ > > > + int hasAlpha = !!abuf0; \ > > > + int i; \ > > > + \ > > > + if (uvalpha < 2048) { \ > > > + for (i = 0; i < dstW; i++) { \ > > > + int Y = (buf0[i] + 64) >> 7; \ > > > + int U = (ubuf[0][i] + 64) >> 7; \ > > > + int V = (vbuf[0][i] + 64) >> 7; \ > > > + int A = 255; \ > > > + \ > > > + if (Y & 0x100) \ > > > + Y = av_clip_uint8(Y); \ > > > + if (U & 0x100) \ > > > + U = av_clip_uint8(U); \ > > > + if (V & 0x100) \ > > > + V = av_clip_uint8(V); \ > > > + \ > > > + if (hasAlpha) { \ > > > + A = (abuf0[i] + 64) >> 7; \ > > > + if (A & 0x100) \ > > > + A = av_clip_uint8(A); \ > > > + } \ > > > + \ > > > + dest[4 * i ] = (C0); \ > > > + dest[4 * i + 1] = (C1); \ > > > + dest[4 * i + 2] = (C2); \ > > > + dest[4 * i + 3] = (C3); \ > > > + } \ > > > + } else { \ > > > + for (i = 0; i < dstW; i++) { \ > > > + int Y = (buf0[i] + 64) >> 7; \ > > > + int U = (ubuf[0][i] + ubuf[1][i] + 128) >> 8; \ > > > + int V = (vbuf[0][i] + vbuf[1][i] + 128) >> 8; \ > > > + int A = 255; \ > > > + \ > > > + if (Y & 0x100) \ > > > + Y = av_clip_uint8(Y); \ > > > + if (U & 0x100) \ > > > + U = av_clip_uint8(U); \ > > > + if (V & 0x100) \ > > > + V = av_clip_uint8(V); \ > > > + \ > > > + if (hasAlpha) { \ > > > + A = (abuf0[i] + 64) >> 7; \ > > > + if (A & 0x100) \ > > > + A = av_clip_uint8(A); \ > > > + } \ > > > + \ > > > + dest[4 * i ] = (C0); \ > > > + dest[4 * i + 1] = (C1); \ > > > + dest[4 * i + 2] = (C2); \ > > > + dest[4 * i + 3] = (C3); \ > > > + } \ > > > + } \ > > > } > > > > Is there an advantage in using huge multiline macros here ? > > > > This is ugly and hard to maintain code. Simply writing a always inline > > function > > and trusting that the compiler will inline it should result in more normal > > C code and the same result > > > > (is it faster ? or has some other advanatge ?) > > No, just figured doing it like this. I can make it an always inline > function.
please do, we have a few slight differnt ways its done currently, heres one example (and in this example, in fact output_pixels could be itself replaced by a function, which would probably be cleaner too) In fact everything can be cleaned up and i certainly would love to see someone have a brilliant idea to make it cleaner with no disadvanatges ... #define output_pixels(pos, Y1, U, Y2, V) \ if (target == AV_PIX_FMT_YUYV422) { \ dest[pos + 0] = Y1; \ dest[pos + 1] = U; \ dest[pos + 2] = Y2; \ dest[pos + 3] = V; \ } else if (target == AV_PIX_FMT_YVYU422) { \ dest[pos + 0] = Y1; \ dest[pos + 1] = V; \ dest[pos + 2] = Y2; \ dest[pos + 3] = U; \ } else { /* AV_PIX_FMT_UYVY422 */ \ dest[pos + 0] = U; \ dest[pos + 1] = Y1; \ dest[pos + 2] = V; \ dest[pos + 3] = Y2; \ } static av_always_inline void yuv2422_X_c_template(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize, const int16_t *chrFilter, const int16_t **chrUSrc, const int16_t **chrVSrc, int chrFilterSize, const int16_t **alpSrc, uint8_t *dest, int dstW, int y, enum AVPixelFormat target) { int i; for (i = 0; i < ((dstW + 1) >> 1); i++) { int j; int Y1 = 1 << 18; int Y2 = 1 << 18; int U = 1 << 18; int V = 1 << 18; for (j = 0; j < lumFilterSize; j++) { Y1 += lumSrc[j][i * 2] * lumFilter[j]; Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j]; } for (j = 0; j < chrFilterSize; j++) { U += chrUSrc[j][i] * chrFilter[j]; V += chrVSrc[j][i] * chrFilter[j]; } Y1 >>= 19; Y2 >>= 19; U >>= 19; V >>= 19; if ((Y1 | Y2 | U | V) & 0x100) { Y1 = av_clip_uint8(Y1); Y2 = av_clip_uint8(Y2); U = av_clip_uint8(U); V = av_clip_uint8(V); } output_pixels(4*i, Y1, U, Y2, V); } } static av_always_inline void yuv2422_2_c_template(SwsContext *c, const int16_t *buf[2], const int16_t *ubuf[2], const int16_t *vbuf[2], const int16_t *abuf[2], uint8_t *dest, int dstW, int yalpha, int uvalpha, int y, enum AVPixelFormat target) { const int16_t *buf0 = buf[0], *buf1 = buf[1], *ubuf0 = ubuf[0], *ubuf1 = ubuf[1], *vbuf0 = vbuf[0], *vbuf1 = vbuf[1]; int yalpha1 = 4096 - yalpha; int uvalpha1 = 4096 - uvalpha; int i; av_assert2(yalpha <= 4096U); av_assert2(uvalpha <= 4096U); for (i = 0; i < ((dstW + 1) >> 1); i++) { int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 19; int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 19; int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha) >> 19; int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha) >> 19; if ((Y1 | Y2 | U | V) & 0x100) { Y1 = av_clip_uint8(Y1); Y2 = av_clip_uint8(Y2); U = av_clip_uint8(U); V = av_clip_uint8(V); } output_pixels(i * 4, Y1, U, Y2, V); } } static av_always_inline void yuv2422_1_c_template(SwsContext *c, const int16_t *buf0, const int16_t *ubuf[2], const int16_t *vbuf[2], const int16_t *abuf0, uint8_t *dest, int dstW, int uvalpha, int y, enum AVPixelFormat target) { const int16_t *ubuf0 = ubuf[0], *vbuf0 = vbuf[0]; int i; if (uvalpha < 2048) { for (i = 0; i < ((dstW + 1) >> 1); i++) { int Y1 = (buf0[i * 2 ]+64) >> 7; int Y2 = (buf0[i * 2 + 1]+64) >> 7; int U = (ubuf0[i] +64) >> 7; int V = (vbuf0[i] +64) >> 7; if ((Y1 | Y2 | U | V) & 0x100) { Y1 = av_clip_uint8(Y1); Y2 = av_clip_uint8(Y2); U = av_clip_uint8(U); V = av_clip_uint8(V); } output_pixels(i * 4, Y1, U, Y2, V); } } else { const int16_t *ubuf1 = ubuf[1], *vbuf1 = vbuf[1]; for (i = 0; i < ((dstW + 1) >> 1); i++) { int Y1 = (buf0[i * 2 ] + 64) >> 7; int Y2 = (buf0[i * 2 + 1] + 64) >> 7; int U = (ubuf0[i] + ubuf1[i]+128) >> 8; int V = (vbuf0[i] + vbuf1[i]+128) >> 8; if ((Y1 | Y2 | U | V) & 0x100) { Y1 = av_clip_uint8(Y1); Y2 = av_clip_uint8(Y2); U = av_clip_uint8(U); V = av_clip_uint8(V); } output_pixels(i * 4, Y1, U, Y2, V); } } } [...] -- Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB When the tyrant has disposed of foreign enemies by conquest or treaty, and there is nothing more to fear from them, then he is always stirring up some war or other, in order that the people may require a leader. -- Plato
signature.asc
Description: PGP signature
_______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".