ffmpeg | branch: master | Clément Bœsch <cboe...@gopro.com> | Tue Jan 31 15:47:58 2017 +0100| [2456efcc0fe27a38b287abe38a98be49cf9247d5] | committer: Clément Bœsch
Merge commit '4f247de3b797cdc9d243d26534412f81c306e5b5' * commit '4f247de3b797cdc9d243d26534412f81c306e5b5': hevcdsp_template: Templatize IDCT This commit is a noop as we already have that code from a previous commits (see 92cccb7bcd79845020ed8abebf35170c182443b2). Spacing is adjusted to reduce the diff. Merged-by: Clément Bœsch <cboe...@gopro.com> > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=2456efcc0fe27a38b287abe38a98be49cf9247d5 --- libavcodec/hevcdsp_template.c | 158 +++++++++++++++++++++--------------------- 1 file changed, 79 insertions(+), 79 deletions(-) diff --git a/libavcodec/hevcdsp_template.c b/libavcodec/hevcdsp_template.c index 66b1ac0..ae32dcc 100644 --- a/libavcodec/hevcdsp_template.c +++ b/libavcodec/hevcdsp_template.c @@ -170,97 +170,97 @@ static void FUNC(transform_4x4_luma)(int16_t *coeffs) #undef TR_4x4_LUMA -#define TR_4(dst, src, dstep, sstep, assign, end) \ - do { \ - const int e0 = 64 * src[0 * sstep] + 64 * src[2 * sstep]; \ - const int e1 = 64 * src[0 * sstep] - 64 * src[2 * sstep]; \ - const int o0 = 83 * src[1 * sstep] + 36 * src[3 * sstep]; \ - const int o1 = 36 * src[1 * sstep] - 83 * src[3 * sstep]; \ - \ - assign(dst[0 * dstep], e0 + o0); \ - assign(dst[1 * dstep], e1 + o1); \ - assign(dst[2 * dstep], e1 - o1); \ - assign(dst[3 * dstep], e0 - o0); \ +#define TR_4(dst, src, dstep, sstep, assign, end) \ + do { \ + const int e0 = 64 * src[0 * sstep] + 64 * src[2 * sstep]; \ + const int e1 = 64 * src[0 * sstep] - 64 * src[2 * sstep]; \ + const int o0 = 83 * src[1 * sstep] + 36 * src[3 * sstep]; \ + const int o1 = 36 * src[1 * sstep] - 83 * src[3 * sstep]; \ + \ + assign(dst[0 * dstep], e0 + o0); \ + assign(dst[1 * dstep], e1 + o1); \ + assign(dst[2 * dstep], e1 - o1); \ + assign(dst[3 * dstep], e0 - o0); \ } while (0) -#define TR_8(dst, src, dstep, sstep, assign, end) \ - do { \ - int i, j; \ - int e_8[4]; \ - int o_8[4] = { 0 }; \ - for (i = 0; i < 4; i++) \ - for (j = 1; j < end; j += 2) \ - o_8[i] += transform[4 * j][i] * src[j * sstep]; \ - TR_4(e_8, src, 1, 2 * sstep, SET, 4); \ - \ - for (i = 0; i < 4; i++) { \ - assign(dst[i * dstep], e_8[i] + o_8[i]); \ - assign(dst[(7 - i) * dstep], e_8[i] - o_8[i]); \ - } \ +#define TR_8(dst, src, dstep, sstep, assign, end) \ + do { \ + int i, j; \ + int e_8[4]; \ + int o_8[4] = { 0 }; \ + for (i = 0; i < 4; i++) \ + for (j = 1; j < end; j += 2) \ + o_8[i] += transform[4 * j][i] * src[j * sstep]; \ + TR_4(e_8, src, 1, 2 * sstep, SET, 4); \ + \ + for (i = 0; i < 4; i++) { \ + assign(dst[i * dstep], e_8[i] + o_8[i]); \ + assign(dst[(7 - i) * dstep], e_8[i] - o_8[i]); \ + } \ } while (0) -#define TR_16(dst, src, dstep, sstep, assign, end) \ - do { \ - int i, j; \ - int e_16[8]; \ - int o_16[8] = { 0 }; \ - for (i = 0; i < 8; i++) \ - for (j = 1; j < end; j += 2) \ - o_16[i] += transform[2 * j][i] * src[j * sstep]; \ - TR_8(e_16, src, 1, 2 * sstep, SET, 8); \ - \ - for (i = 0; i < 8; i++) { \ - assign(dst[i * dstep], e_16[i] + o_16[i]); \ - assign(dst[(15 - i) * dstep], e_16[i] - o_16[i]); \ - } \ +#define TR_16(dst, src, dstep, sstep, assign, end) \ + do { \ + int i, j; \ + int e_16[8]; \ + int o_16[8] = { 0 }; \ + for (i = 0; i < 8; i++) \ + for (j = 1; j < end; j += 2) \ + o_16[i] += transform[2 * j][i] * src[j * sstep]; \ + TR_8(e_16, src, 1, 2 * sstep, SET, 8); \ + \ + for (i = 0; i < 8; i++) { \ + assign(dst[i * dstep], e_16[i] + o_16[i]); \ + assign(dst[(15 - i) * dstep], e_16[i] - o_16[i]); \ + } \ } while (0) -#define TR_32(dst, src, dstep, sstep, assign, end) \ - do { \ - int i, j; \ - int e_32[16]; \ - int o_32[16] = { 0 }; \ - for (i = 0; i < 16; i++) \ - for (j = 1; j < end; j += 2) \ - o_32[i] += transform[j][i] * src[j * sstep]; \ - TR_16(e_32, src, 1, 2 * sstep, SET, end/2); \ - \ - for (i = 0; i < 16; i++) { \ - assign(dst[i * dstep], e_32[i] + o_32[i]); \ - assign(dst[(31 - i) * dstep], e_32[i] - o_32[i]); \ - } \ +#define TR_32(dst, src, dstep, sstep, assign, end) \ + do { \ + int i, j; \ + int e_32[16]; \ + int o_32[16] = { 0 }; \ + for (i = 0; i < 16; i++) \ + for (j = 1; j < end; j += 2) \ + o_32[i] += transform[j][i] * src[j * sstep]; \ + TR_16(e_32, src, 1, 2 * sstep, SET, end/2); \ + \ + for (i = 0; i < 16; i++) { \ + assign(dst[i * dstep], e_32[i] + o_32[i]); \ + assign(dst[(31 - i) * dstep], e_32[i] - o_32[i]); \ + } \ } while (0) -#define IDCT_VAR4(H) \ +#define IDCT_VAR4(H) \ int limit2 = FFMIN(col_limit + 4, H) -#define IDCT_VAR8(H) \ - int limit = FFMIN(col_limit, H); \ - int limit2 = FFMIN(col_limit + 4, H) +#define IDCT_VAR8(H) \ + int limit = FFMIN(col_limit, H); \ + int limit2 = FFMIN(col_limit + 4, H) #define IDCT_VAR16(H) IDCT_VAR8(H) #define IDCT_VAR32(H) IDCT_VAR8(H) -#define IDCT(H) \ -static void FUNC(idct_##H ##x ##H )( \ - int16_t *coeffs, int col_limit) { \ - int i; \ - int shift = 7; \ - int add = 1 << (shift - 1); \ - int16_t *src = coeffs; \ - IDCT_VAR ##H(H); \ - \ - for (i = 0; i < H; i++) { \ - TR_ ## H(src, src, H, H, SCALE, limit2); \ - if (limit2 < H && i%4 == 0 && !!i) \ - limit2 -= 4; \ - src++; \ - } \ - \ - shift = 20 - BIT_DEPTH; \ - add = 1 << (shift - 1); \ - for (i = 0; i < H; i++) { \ - TR_ ## H(coeffs, coeffs, 1, 1, SCALE, limit); \ - coeffs += H; \ - } \ +#define IDCT(H) \ +static void FUNC(idct_ ## H ## x ## H )(int16_t *coeffs, \ + int col_limit) { \ + int i; \ + int shift = 7; \ + int add = 1 << (shift - 1); \ + int16_t *src = coeffs; \ + IDCT_VAR ##H(H); \ + \ + for (i = 0; i < H; i++) { \ + TR_ ## H(src, src, H, H, SCALE, limit2); \ + if (limit2 < H && i%4 == 0 && !!i) \ + limit2 -= 4; \ + src++; \ + } \ + \ + shift = 20 - BIT_DEPTH; \ + add = 1 << (shift - 1); \ + for (i = 0; i < H; i++) { \ + TR_ ## H(coeffs, coeffs, 1, 1, SCALE, limit); \ + coeffs += H; \ + } \ } #define IDCT_DC(H) \ ====================================================================== diff --cc libavcodec/hevcdsp_template.c index 66b1ac0,2cde5a8..ae32dcc --- a/libavcodec/hevcdsp_template.c +++ b/libavcodec/hevcdsp_template.c @@@ -170,114 -137,92 +170,114 @@@ static void FUNC(transform_4x4_luma)(in #undef TR_4x4_LUMA - #define TR_4(dst, src, dstep, sstep, assign, end) \ - do { \ - const int e0 = 64 * src[0 * sstep] + 64 * src[2 * sstep]; \ - const int e1 = 64 * src[0 * sstep] - 64 * src[2 * sstep]; \ - const int o0 = 83 * src[1 * sstep] + 36 * src[3 * sstep]; \ - const int o1 = 36 * src[1 * sstep] - 83 * src[3 * sstep]; \ - \ - assign(dst[0 * dstep], e0 + o0); \ - assign(dst[1 * dstep], e1 + o1); \ - assign(dst[2 * dstep], e1 - o1); \ - assign(dst[3 * dstep], e0 - o0); \ -#define TR_4(dst, src, dstep, sstep, assign) \ - do { \ - const int e0 = transform[8 * 0][0] * src[0 * sstep] + \ - transform[8 * 2][0] * src[2 * sstep]; \ - const int e1 = transform[8 * 0][1] * src[0 * sstep] + \ - transform[8 * 2][1] * src[2 * sstep]; \ - const int o0 = transform[8 * 1][0] * src[1 * sstep] + \ - transform[8 * 3][0] * src[3 * sstep]; \ - const int o1 = transform[8 * 1][1] * src[1 * sstep] + \ - transform[8 * 3][1] * src[3 * sstep]; \ - \ - assign(dst[0 * dstep], e0 + o0); \ - assign(dst[1 * dstep], e1 + o1); \ - assign(dst[2 * dstep], e1 - o1); \ - assign(dst[3 * dstep], e0 - o0); \ ++#define TR_4(dst, src, dstep, sstep, assign, end) \ ++ do { \ ++ const int e0 = 64 * src[0 * sstep] + 64 * src[2 * sstep]; \ ++ const int e1 = 64 * src[0 * sstep] - 64 * src[2 * sstep]; \ ++ const int o0 = 83 * src[1 * sstep] + 36 * src[3 * sstep]; \ ++ const int o1 = 36 * src[1 * sstep] - 83 * src[3 * sstep]; \ ++ \ ++ assign(dst[0 * dstep], e0 + o0); \ ++ assign(dst[1 * dstep], e1 + o1); \ ++ assign(dst[2 * dstep], e1 - o1); \ ++ assign(dst[3 * dstep], e0 - o0); \ } while (0) - #define TR_8(dst, src, dstep, sstep, assign, end) \ - do { \ - int i, j; \ - int e_8[4]; \ - int o_8[4] = { 0 }; \ - for (i = 0; i < 4; i++) \ - for (j = 1; j < end; j += 2) \ - o_8[i] += transform[4 * j][i] * src[j * sstep]; \ - TR_4(e_8, src, 1, 2 * sstep, SET, 4); \ - \ - for (i = 0; i < 4; i++) { \ - assign(dst[i * dstep], e_8[i] + o_8[i]); \ - assign(dst[(7 - i) * dstep], e_8[i] - o_8[i]); \ - } \ -#define TR_8(dst, src, dstep, sstep, assign) \ ++#define TR_8(dst, src, dstep, sstep, assign, end) \ + do { \ + int i, j; \ + int e_8[4]; \ + int o_8[4] = { 0 }; \ + for (i = 0; i < 4; i++) \ - for (j = 1; j < 8; j += 2) \ ++ for (j = 1; j < end; j += 2) \ + o_8[i] += transform[4 * j][i] * src[j * sstep]; \ - TR_4(e_8, src, 1, 2 * sstep, SET); \ ++ TR_4(e_8, src, 1, 2 * sstep, SET, 4); \ + \ + for (i = 0; i < 4; i++) { \ + assign(dst[i * dstep], e_8[i] + o_8[i]); \ + assign(dst[(7 - i) * dstep], e_8[i] - o_8[i]); \ + } \ } while (0) - #define TR_16(dst, src, dstep, sstep, assign, end) \ - do { \ - int i, j; \ - int e_16[8]; \ - int o_16[8] = { 0 }; \ - for (i = 0; i < 8; i++) \ - for (j = 1; j < end; j += 2) \ - o_16[i] += transform[2 * j][i] * src[j * sstep]; \ - TR_8(e_16, src, 1, 2 * sstep, SET, 8); \ - \ - for (i = 0; i < 8; i++) { \ - assign(dst[i * dstep], e_16[i] + o_16[i]); \ - assign(dst[(15 - i) * dstep], e_16[i] - o_16[i]); \ - } \ -#define TR_16(dst, src, dstep, sstep, assign) \ ++#define TR_16(dst, src, dstep, sstep, assign, end) \ + do { \ + int i, j; \ + int e_16[8]; \ + int o_16[8] = { 0 }; \ + for (i = 0; i < 8; i++) \ - for (j = 1; j < 16; j += 2) \ ++ for (j = 1; j < end; j += 2) \ + o_16[i] += transform[2 * j][i] * src[j * sstep]; \ - TR_8(e_16, src, 1, 2 * sstep, SET); \ ++ TR_8(e_16, src, 1, 2 * sstep, SET, 8); \ + \ + for (i = 0; i < 8; i++) { \ + assign(dst[i * dstep], e_16[i] + o_16[i]); \ + assign(dst[(15 - i) * dstep], e_16[i] - o_16[i]); \ + } \ } while (0) - #define TR_32(dst, src, dstep, sstep, assign, end) \ - do { \ - int i, j; \ - int e_32[16]; \ - int o_32[16] = { 0 }; \ - for (i = 0; i < 16; i++) \ - for (j = 1; j < end; j += 2) \ - o_32[i] += transform[j][i] * src[j * sstep]; \ - TR_16(e_32, src, 1, 2 * sstep, SET, end/2); \ - \ - for (i = 0; i < 16; i++) { \ - assign(dst[i * dstep], e_32[i] + o_32[i]); \ - assign(dst[(31 - i) * dstep], e_32[i] - o_32[i]); \ - } \ -#define TR_32(dst, src, dstep, sstep, assign) \ ++#define TR_32(dst, src, dstep, sstep, assign, end) \ + do { \ + int i, j; \ + int e_32[16]; \ + int o_32[16] = { 0 }; \ + for (i = 0; i < 16; i++) \ - for (j = 1; j < 32; j += 2) \ ++ for (j = 1; j < end; j += 2) \ + o_32[i] += transform[j][i] * src[j * sstep]; \ - TR_16(e_32, src, 1, 2 * sstep, SET); \ ++ TR_16(e_32, src, 1, 2 * sstep, SET, end/2); \ + \ + for (i = 0; i < 16; i++) { \ + assign(dst[i * dstep], e_32[i] + o_32[i]); \ + assign(dst[(31 - i) * dstep], e_32[i] - o_32[i]); \ + } \ } while (0) - #define IDCT_VAR4(H) \ ++#define IDCT_VAR4(H) \ + int limit2 = FFMIN(col_limit + 4, H) - #define IDCT_VAR8(H) \ - int limit = FFMIN(col_limit, H); \ - int limit2 = FFMIN(col_limit + 4, H) ++#define IDCT_VAR8(H) \ ++ int limit = FFMIN(col_limit, H); \ ++ int limit2 = FFMIN(col_limit + 4, H) +#define IDCT_VAR16(H) IDCT_VAR8(H) +#define IDCT_VAR32(H) IDCT_VAR8(H) + - #define IDCT(H) \ - static void FUNC(idct_##H ##x ##H )( \ - int16_t *coeffs, int col_limit) { \ - int i; \ - int shift = 7; \ - int add = 1 << (shift - 1); \ - int16_t *src = coeffs; \ - IDCT_VAR ##H(H); \ - \ - for (i = 0; i < H; i++) { \ - TR_ ## H(src, src, H, H, SCALE, limit2); \ - if (limit2 < H && i%4 == 0 && !!i) \ - limit2 -= 4; \ - src++; \ - } \ - \ - shift = 20 - BIT_DEPTH; \ - add = 1 << (shift - 1); \ - for (i = 0; i < H; i++) { \ - TR_ ## H(coeffs, coeffs, 1, 1, SCALE, limit); \ - coeffs += H; \ - } \ + #define IDCT(H) \ -static void FUNC(idct_ ## H ## x ## H )(int16_t *coeffs) \ -{ \ ++static void FUNC(idct_ ## H ## x ## H )(int16_t *coeffs, \ ++ int col_limit) { \ + int i; \ + int shift = 7; \ + int add = 1 << (shift - 1); \ + int16_t *src = coeffs; \ ++ IDCT_VAR ##H(H); \ + \ + for (i = 0; i < H; i++) { \ - TR_ ## H(src, src, H, H, SCALE); \ ++ TR_ ## H(src, src, H, H, SCALE, limit2); \ ++ if (limit2 < H && i%4 == 0 && !!i) \ ++ limit2 -= 4; \ + src++; \ + } \ + \ + shift = 20 - BIT_DEPTH; \ + add = 1 << (shift - 1); \ + for (i = 0; i < H; i++) { \ - TR_ ## H(coeffs, coeffs, 1, 1, SCALE); \ ++ TR_ ## H(coeffs, coeffs, 1, 1, SCALE, limit); \ + coeffs += H; \ + } \ } +#define IDCT_DC(H) \ +static void FUNC(idct_##H ##x ##H ##_dc)( \ + int16_t *coeffs) { \ + int i, j; \ + int shift = 14 - BIT_DEPTH; \ + int add = 1 << (shift - 1); \ + int coeff = (((coeffs[0] + 1) >> 1) + add) >> shift; \ + \ + for (j = 0; j < H; j++) { \ + for (i = 0; i < H; i++) { \ + coeffs[i+j*H] = coeff; \ + } \ + } \ +} + IDCT( 4) IDCT( 8) IDCT(16) _______________________________________________ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog