ffmpeg | branch: master | Clément Bœsch <u...@pkh.me> | Mon Mar 20 13:47:29 2017 +0100| [e07fa3008bcafc027e9aa654657bae1f1ac64f5d] | committer: Clément Bœsch
Merge commit 'de452e503734ebb0fdbce86e9d16693b3530fad3' * commit 'de452e503734ebb0fdbce86e9d16693b3530fad3': pixblockdsp: Change type of stride parameters to ptrdiff_t Merged-by: Clément Bœsch <u...@pkh.me> > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=e07fa3008bcafc027e9aa654657bae1f1ac64f5d --- libavcodec/alpha/pixblockdsp_alpha.c | 7 ++++--- libavcodec/arm/pixblockdsp_init_arm.c | 5 +++-- libavcodec/dv.h | 2 +- libavcodec/dvenc.c | 9 +++++---- libavcodec/mips/pixblockdsp_mips.h | 6 +++--- libavcodec/mips/pixblockdsp_mmi.c | 22 +++++++++++----------- libavcodec/mips/pixblockdsp_msa.c | 2 +- libavcodec/pixblockdsp.c | 24 ++++++++++++------------ libavcodec/pixblockdsp.h | 4 ++-- libavcodec/ppc/pixblockdsp.c | 18 +++++++++--------- libavcodec/x86/pixblockdsp.asm | 5 ++--- libavcodec/x86/pixblockdsp_init.c | 8 ++++---- tests/checkasm/pixblockdsp.c | 2 +- 13 files changed, 58 insertions(+), 56 deletions(-) diff --git a/libavcodec/alpha/pixblockdsp_alpha.c b/libavcodec/alpha/pixblockdsp_alpha.c index 866b762..c2f1a1d 100644 --- a/libavcodec/alpha/pixblockdsp_alpha.c +++ b/libavcodec/alpha/pixblockdsp_alpha.c @@ -23,7 +23,7 @@ #include "asm.h" static void get_pixels_mvi(int16_t *restrict block, - const uint8_t *restrict pixels, ptrdiff_t line_size) + const uint8_t *restrict pixels, ptrdiff_t stride) { int h = 8; @@ -34,13 +34,14 @@ static void get_pixels_mvi(int16_t *restrict block, stq(unpkbw(p), block); stq(unpkbw(p >> 32), block + 4); - pixels += line_size; + pixels += stride; block += 8; } while (--h); } static void diff_pixels_mvi(int16_t *block, const uint8_t *s1, const uint8_t *s2, - int stride) { + ptrdiff_t stride) +{ int h = 8; uint64_t mask = 0x4040; diff --git a/libavcodec/arm/pixblockdsp_init_arm.c b/libavcodec/arm/pixblockdsp_init_arm.c index 76d7509..59d2b49 100644 --- a/libavcodec/arm/pixblockdsp_init_arm.c +++ b/libavcodec/arm/pixblockdsp_init_arm.c @@ -24,9 +24,10 @@ #include "libavcodec/avcodec.h" #include "libavcodec/pixblockdsp.h" -void ff_get_pixels_armv6(int16_t *block, const uint8_t *pixels, ptrdiff_t stride); +void ff_get_pixels_armv6(int16_t *block, const uint8_t *pixels, + ptrdiff_t stride); void ff_diff_pixels_armv6(int16_t *block, const uint8_t *s1, - const uint8_t *s2, int stride); + const uint8_t *s2, ptrdiff_t stride); av_cold void ff_pixblockdsp_init_arm(PixblockDSPContext *c, AVCodecContext *avctx, diff --git a/libavcodec/dv.h b/libavcodec/dv.h index 3367f92..d5482d1 100644 --- a/libavcodec/dv.h +++ b/libavcodec/dv.h @@ -45,7 +45,7 @@ typedef struct DVVideoContext { uint8_t dv_zigzag[2][64]; - void (*get_pixels)(int16_t *block, const uint8_t *pixels, ptrdiff_t line_size); + void (*get_pixels)(int16_t *block, const uint8_t *pixels, ptrdiff_t linesize); void (*fdct[2])(int16_t *block); void (*idct_put[2])(uint8_t *dest, int line_size, int16_t *block); me_cmp_func ildct_cmp; diff --git a/libavcodec/dvenc.c b/libavcodec/dvenc.c index a27b9ef..e3de18a 100644 --- a/libavcodec/dvenc.c +++ b/libavcodec/dvenc.c @@ -204,7 +204,7 @@ static av_always_inline PutBitContext *dv_encode_ac(EncBlockInfo *bi, } static av_always_inline int dv_guess_dct_mode(DVVideoContext *s, uint8_t *data, - int linesize) + ptrdiff_t linesize) { if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT) { int ps = s->ildct_cmp(NULL, data, NULL, linesize, 8) - 400; @@ -241,8 +241,8 @@ static const int dv_weight_248[64] = { }; static av_always_inline int dv_init_enc_block(EncBlockInfo *bi, uint8_t *data, - int linesize, DVVideoContext *s, - int bias) + ptrdiff_t linesize, + DVVideoContext *s, int bias) { const int *weight; const uint8_t *zigzag_scan; @@ -420,7 +420,8 @@ static int dv_encode_video_segment(AVCodecContext *avctx, void *arg) DVVideoContext *s = avctx->priv_data; DVwork_chunk *work_chunk = arg; int mb_index, i, j; - int mb_x, mb_y, c_offset, linesize, y_stride; + int mb_x, mb_y, c_offset; + ptrdiff_t linesize, y_stride; uint8_t *y_ptr; uint8_t *dif; LOCAL_ALIGNED_8(uint8_t, scratch, [128]); diff --git a/libavcodec/mips/pixblockdsp_mips.h b/libavcodec/mips/pixblockdsp_mips.h index 7f8cc96..a12b1a6 100644 --- a/libavcodec/mips/pixblockdsp_mips.h +++ b/libavcodec/mips/pixblockdsp_mips.h @@ -25,15 +25,15 @@ #include "../mpegvideo.h" void ff_diff_pixels_msa(int16_t *av_restrict block, const uint8_t *src1, - const uint8_t *src2, int stride); + const uint8_t *src2, ptrdiff_t stride); void ff_get_pixels_16_msa(int16_t *restrict dst, const uint8_t *src, ptrdiff_t stride); void ff_get_pixels_8_msa(int16_t *restrict dst, const uint8_t *src, ptrdiff_t stride); void ff_get_pixels_8_mmi(int16_t *av_restrict block, const uint8_t *pixels, - ptrdiff_t line_size); + ptrdiff_t stride); void ff_diff_pixels_mmi(int16_t *av_restrict block, const uint8_t *src1, - const uint8_t *src2, int stride); + const uint8_t *src2, ptrdiff_t stride); #endif // #ifndef AVCODEC_MIPS_PIXBLOCKDSP_MIPS_H diff --git a/libavcodec/mips/pixblockdsp_mmi.c b/libavcodec/mips/pixblockdsp_mmi.c index 9f2eac3..a915a3c 100644 --- a/libavcodec/mips/pixblockdsp_mmi.c +++ b/libavcodec/mips/pixblockdsp_mmi.c @@ -26,7 +26,7 @@ #include "libavutil/mips/mmiutils.h" void ff_get_pixels_8_mmi(int16_t *av_restrict block, const uint8_t *pixels, - ptrdiff_t line_size) + ptrdiff_t stride) { double ftmp[7]; DECLARE_VAR_ALL64; @@ -36,7 +36,7 @@ void ff_get_pixels_8_mmi(int16_t *av_restrict block, const uint8_t *pixels, "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" MMI_LDC1(%[ftmp1], %[pixels], 0x00) - MMI_LDXC1(%[ftmp2], %[pixels], %[line_size], 0x00) + MMI_LDXC1(%[ftmp2], %[pixels], %[stride], 0x00) "punpcklbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" "punpckhbh %[ftmp4], %[ftmp1], %[ftmp0] \n\t" "punpcklbh %[ftmp5], %[ftmp2], %[ftmp0] \n\t" @@ -45,10 +45,10 @@ void ff_get_pixels_8_mmi(int16_t *av_restrict block, const uint8_t *pixels, MMI_SDC1(%[ftmp4], %[block], 0x08) MMI_SDC1(%[ftmp5], %[block], 0x10) MMI_SDC1(%[ftmp6], %[block], 0x18) - PTR_ADDU "%[pixels], %[pixels], %[line_size_x2] \n\t" + PTR_ADDU "%[pixels], %[pixels], %[stride_x2] \n\t" MMI_LDC1(%[ftmp1], %[pixels], 0x00) - MMI_LDXC1(%[ftmp2], %[pixels], %[line_size], 0x00) + MMI_LDXC1(%[ftmp2], %[pixels], %[stride], 0x00) "punpcklbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" "punpckhbh %[ftmp4], %[ftmp1], %[ftmp0] \n\t" "punpcklbh %[ftmp5], %[ftmp2], %[ftmp0] \n\t" @@ -57,10 +57,10 @@ void ff_get_pixels_8_mmi(int16_t *av_restrict block, const uint8_t *pixels, MMI_SDC1(%[ftmp4], %[block], 0x28) MMI_SDC1(%[ftmp5], %[block], 0x30) MMI_SDC1(%[ftmp6], %[block], 0x38) - PTR_ADDU "%[pixels], %[pixels], %[line_size_x2] \n\t" + PTR_ADDU "%[pixels], %[pixels], %[stride_x2] \n\t" MMI_LDC1(%[ftmp1], %[pixels], 0x00) - MMI_LDXC1(%[ftmp2], %[pixels], %[line_size], 0x00) + MMI_LDXC1(%[ftmp2], %[pixels], %[stride], 0x00) "punpcklbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" "punpckhbh %[ftmp4], %[ftmp1], %[ftmp0] \n\t" "punpcklbh %[ftmp5], %[ftmp2], %[ftmp0] \n\t" @@ -69,10 +69,10 @@ void ff_get_pixels_8_mmi(int16_t *av_restrict block, const uint8_t *pixels, MMI_SDC1(%[ftmp4], %[block], 0x48) MMI_SDC1(%[ftmp5], %[block], 0x50) MMI_SDC1(%[ftmp6], %[block], 0x58) - PTR_ADDU "%[pixels], %[pixels], %[line_size_x2] \n\t" + PTR_ADDU "%[pixels], %[pixels], %[stride_x2] \n\t" MMI_LDC1(%[ftmp1], %[pixels], 0x00) - MMI_LDXC1(%[ftmp2], %[pixels], %[line_size], 0x00) + MMI_LDXC1(%[ftmp2], %[pixels], %[stride], 0x00) "punpcklbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" "punpckhbh %[ftmp4], %[ftmp1], %[ftmp0] \n\t" "punpcklbh %[ftmp5], %[ftmp2], %[ftmp0] \n\t" @@ -88,14 +88,14 @@ void ff_get_pixels_8_mmi(int16_t *av_restrict block, const uint8_t *pixels, RESTRICT_ASM_ALL64 RESTRICT_ASM_ADDRT [pixels]"+&r"(pixels) - : [block]"r"((mips_reg)block), [line_size]"r"((mips_reg)line_size), - [line_size_x2]"r"((mips_reg)(line_size<<1)) + : [block]"r"((mips_reg)block), [stride]"r"((mips_reg)stride), + [stride_x2]"r"((mips_reg)(stride<<1)) : "memory" ); } void ff_diff_pixels_mmi(int16_t *av_restrict block, const uint8_t *src1, - const uint8_t *src2, int stride) + const uint8_t *src2, ptrdiff_t stride) { double ftmp[5]; mips_reg tmp[1]; diff --git a/libavcodec/mips/pixblockdsp_msa.c b/libavcodec/mips/pixblockdsp_msa.c index 966e11a..86a4576 100644 --- a/libavcodec/mips/pixblockdsp_msa.c +++ b/libavcodec/mips/pixblockdsp_msa.c @@ -137,7 +137,7 @@ void ff_get_pixels_8_msa(int16_t *av_restrict dest, const uint8_t *src, } void ff_diff_pixels_msa(int16_t *av_restrict block, const uint8_t *src1, - const uint8_t *src2, int stride) + const uint8_t *src2, ptrdiff_t stride) { diff_pixels_msa(block, src1, src2, stride); } diff --git a/libavcodec/pixblockdsp.c b/libavcodec/pixblockdsp.c index f0883d3..417c944 100644 --- a/libavcodec/pixblockdsp.c +++ b/libavcodec/pixblockdsp.c @@ -25,20 +25,20 @@ #include "pixblockdsp.h" static void get_pixels_16_c(int16_t *av_restrict block, const uint8_t *pixels, - ptrdiff_t line_size) + ptrdiff_t stride) { - AV_COPY128U(block + 0 * 8, pixels + 0 * line_size); - AV_COPY128U(block + 1 * 8, pixels + 1 * line_size); - AV_COPY128U(block + 2 * 8, pixels + 2 * line_size); - AV_COPY128U(block + 3 * 8, pixels + 3 * line_size); - AV_COPY128U(block + 4 * 8, pixels + 4 * line_size); - AV_COPY128U(block + 5 * 8, pixels + 5 * line_size); - AV_COPY128U(block + 6 * 8, pixels + 6 * line_size); - AV_COPY128U(block + 7 * 8, pixels + 7 * line_size); + AV_COPY128U(block + 0 * 8, pixels + 0 * stride); + AV_COPY128U(block + 1 * 8, pixels + 1 * stride); + AV_COPY128U(block + 2 * 8, pixels + 2 * stride); + AV_COPY128U(block + 3 * 8, pixels + 3 * stride); + AV_COPY128U(block + 4 * 8, pixels + 4 * stride); + AV_COPY128U(block + 5 * 8, pixels + 5 * stride); + AV_COPY128U(block + 6 * 8, pixels + 6 * stride); + AV_COPY128U(block + 7 * 8, pixels + 7 * stride); } static void get_pixels_8_c(int16_t *av_restrict block, const uint8_t *pixels, - ptrdiff_t line_size) + ptrdiff_t stride) { int i; @@ -52,13 +52,13 @@ static void get_pixels_8_c(int16_t *av_restrict block, const uint8_t *pixels, block[5] = pixels[5]; block[6] = pixels[6]; block[7] = pixels[7]; - pixels += line_size; + pixels += stride; block += 8; } } static void diff_pixels_c(int16_t *av_restrict block, const uint8_t *s1, - const uint8_t *s2, int stride) + const uint8_t *s2, ptrdiff_t stride) { int i; diff --git a/libavcodec/pixblockdsp.h b/libavcodec/pixblockdsp.h index 79ed86c..278accb 100644 --- a/libavcodec/pixblockdsp.h +++ b/libavcodec/pixblockdsp.h @@ -26,11 +26,11 @@ typedef struct PixblockDSPContext { void (*get_pixels)(int16_t *block /* align 16 */, const uint8_t *pixels /* align 8 */, - ptrdiff_t line_size); + ptrdiff_t stride); void (*diff_pixels)(int16_t *block /* align 16 */, const uint8_t *s1 /* align 8 */, const uint8_t *s2 /* align 8 */, - int stride); + ptrdiff_t stride); } PixblockDSPContext; void ff_pixblockdsp_init(PixblockDSPContext *c, AVCodecContext *avctx); diff --git a/libavcodec/ppc/pixblockdsp.c b/libavcodec/ppc/pixblockdsp.c index f3a5050..f5ac850 100644 --- a/libavcodec/ppc/pixblockdsp.c +++ b/libavcodec/ppc/pixblockdsp.c @@ -37,7 +37,7 @@ #if HAVE_VSX static void get_pixels_altivec(int16_t *restrict block, const uint8_t *pixels, - ptrdiff_t line_size) + ptrdiff_t stride) { int i; vector unsigned char perm = @@ -59,12 +59,12 @@ static void get_pixels_altivec(int16_t *restrict block, const uint8_t *pixels, // Save the data to the block, we assume the block is 16-byte aligned. vec_vsx_st(shorts, i * 16, (vector signed short *) block); - pixels += line_size; + pixels += stride; } } #else static void get_pixels_altivec(int16_t *restrict block, const uint8_t *pixels, - ptrdiff_t line_size) + ptrdiff_t stride) { int i; const vec_u8 zero = (const vec_u8)vec_splat_u8(0); @@ -84,7 +84,7 @@ static void get_pixels_altivec(int16_t *restrict block, const uint8_t *pixels, // Save the data to the block, we assume the block is 16-byte aligned. vec_st(shorts, i * 16, (vec_s16 *)block); - pixels += line_size; + pixels += stride; } } @@ -92,7 +92,7 @@ static void get_pixels_altivec(int16_t *restrict block, const uint8_t *pixels, #if HAVE_VSX static void diff_pixels_altivec(int16_t *restrict block, const uint8_t *s1, - const uint8_t *s2, int stride) + const uint8_t *s2, ptrdiff_t stride) { int i; const vector unsigned char zero = @@ -154,7 +154,7 @@ static void diff_pixels_altivec(int16_t *restrict block, const uint8_t *s1, } #else static void diff_pixels_altivec(int16_t *restrict block, const uint8_t *s1, - const uint8_t *s2, int stride) + const uint8_t *s2, ptrdiff_t stride) { int i; vec_u8 perm; @@ -233,7 +233,7 @@ static void diff_pixels_altivec(int16_t *restrict block, const uint8_t *s1, #if HAVE_VSX static void get_pixels_vsx(int16_t *restrict block, const uint8_t *pixels, - ptrdiff_t line_size) + ptrdiff_t stride) { int i; for (i = 0; i < 8; i++) { @@ -241,12 +241,12 @@ static void get_pixels_vsx(int16_t *restrict block, const uint8_t *pixels, vec_vsx_st(shorts, i * 16, block); - pixels += line_size; + pixels += stride; } } static void diff_pixels_vsx(int16_t *restrict block, const uint8_t *s1, - const uint8_t *s2, int stride) + const uint8_t *s2, ptrdiff_t stride) { int i; vec_s16 shorts1, shorts2; diff --git a/libavcodec/x86/pixblockdsp.asm b/libavcodec/x86/pixblockdsp.asm index 2864d0c..440fe29 100644 --- a/libavcodec/x86/pixblockdsp.asm +++ b/libavcodec/x86/pixblockdsp.asm @@ -26,7 +26,7 @@ SECTION .text INIT_MMX mmx -; void ff_get_pixels_mmx(int16_t *block, const uint8_t *pixels, ptrdiff_t line_size) +; void ff_get_pixels_mmx(int16_t *block, const uint8_t *pixels, ptrdiff_t stride) cglobal get_pixels, 3,4 add r0, 128 mov r3, -128 @@ -81,10 +81,9 @@ cglobal get_pixels, 3, 4, 5 RET ; void ff_diff_pixels_mmx(int16_t *block, const uint8_t *s1, const uint8_t *s2, -; int stride); +; ptrdiff_t stride); %macro DIFF_PIXELS 0 cglobal diff_pixels, 4,5,5 - movsxdifnidn r3, r3d pxor m4, m4 add r0, 128 mov r4, -128 diff --git a/libavcodec/x86/pixblockdsp_init.c b/libavcodec/x86/pixblockdsp_init.c index 4d06a44..fa9578a 100644 --- a/libavcodec/x86/pixblockdsp_init.c +++ b/libavcodec/x86/pixblockdsp_init.c @@ -23,12 +23,12 @@ #include "libavutil/x86/cpu.h" #include "libavcodec/pixblockdsp.h" -void ff_get_pixels_mmx(int16_t *block, const uint8_t *pixels, ptrdiff_t line_size); -void ff_get_pixels_sse2(int16_t *block, const uint8_t *pixels, ptrdiff_t line_size); +void ff_get_pixels_mmx(int16_t *block, const uint8_t *pixels, ptrdiff_t stride); +void ff_get_pixels_sse2(int16_t *block, const uint8_t *pixels, ptrdiff_t stride); void ff_diff_pixels_mmx(int16_t *block, const uint8_t *s1, const uint8_t *s2, - int stride); + ptrdiff_t stride); void ff_diff_pixels_sse2(int16_t *block, const uint8_t *s1, const uint8_t *s2, - int stride); + ptrdiff_t stride); av_cold void ff_pixblockdsp_init_x86(PixblockDSPContext *c, AVCodecContext *avctx, diff --git a/tests/checkasm/pixblockdsp.c b/tests/checkasm/pixblockdsp.c index 2b88e7d..e14b0a9 100644 --- a/tests/checkasm/pixblockdsp.c +++ b/tests/checkasm/pixblockdsp.c @@ -64,7 +64,7 @@ #define check_diff_pixels(type) \ do { \ int i; \ - declare_func_emms(AV_CPU_FLAG_MMX, void, int16_t *av_restrict block, const uint8_t *s1, const uint8_t *s2, int stride); \ + declare_func_emms(AV_CPU_FLAG_MMX, void, int16_t *av_restrict block, const uint8_t *s1, const uint8_t *s2, ptrdiff_t stride); \ \ for (i = 0; i < BUF_UNITS; i++) { \ int src_offset = i * 64 * sizeof(type) + 8 * i; /* Test various alignments */ \ ====================================================================== diff --cc libavcodec/alpha/pixblockdsp_alpha.c index 866b762,0000000..c2f1a1d mode 100644,000000..100644 --- a/libavcodec/alpha/pixblockdsp_alpha.c +++ b/libavcodec/alpha/pixblockdsp_alpha.c @@@ -1,78 -1,0 +1,79 @@@ +/* + * SIMD-optimized pixel operations + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/attributes.h" +#include "libavcodec/pixblockdsp.h" +#include "asm.h" + +static void get_pixels_mvi(int16_t *restrict block, - const uint8_t *restrict pixels, ptrdiff_t line_size) ++ const uint8_t *restrict pixels, ptrdiff_t stride) +{ + int h = 8; + + do { + uint64_t p; + + p = ldq(pixels); + stq(unpkbw(p), block); + stq(unpkbw(p >> 32), block + 4); + - pixels += line_size; ++ pixels += stride; + block += 8; + } while (--h); +} + +static void diff_pixels_mvi(int16_t *block, const uint8_t *s1, const uint8_t *s2, - int stride) { ++ ptrdiff_t stride) ++{ + int h = 8; + uint64_t mask = 0x4040; + + mask |= mask << 16; + mask |= mask << 32; + do { + uint64_t x, y, c, d, a; + uint64_t signs; + + x = ldq(s1); + y = ldq(s2); + c = cmpbge(x, y); + d = x - y; + a = zap(mask, c); /* We use 0x4040404040404040 here... */ + d += 4 * a; /* ...so we can use s4addq here. */ + signs = zap(-1, c); + + stq(unpkbw(d) | (unpkbw(signs) << 8), block); + stq(unpkbw(d >> 32) | (unpkbw(signs >> 32) << 8), block + 4); + + s1 += stride; + s2 += stride; + block += 8; + } while (--h); +} + +av_cold void ff_pixblockdsp_init_alpha(PixblockDSPContext *c, AVCodecContext *avctx, + unsigned high_bit_depth) +{ + if (amask(AMASK_MVI) == 0) { + if (!high_bit_depth) + c->get_pixels = get_pixels_mvi; + c->diff_pixels = diff_pixels_mvi; + } +} diff --cc libavcodec/mips/pixblockdsp_mips.h index 7f8cc96,0000000..a12b1a6 mode 100644,000000..100644 --- a/libavcodec/mips/pixblockdsp_mips.h +++ b/libavcodec/mips/pixblockdsp_mips.h @@@ -1,39 -1,0 +1,39 @@@ +/* + * Copyright (c) 2015 Shivraj Patil (shivraj.pa...@imgtec.com) + * Zhou Xiaoyong <zhouxiaoy...@loongson.cn> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_MIPS_PIXBLOCKDSP_MIPS_H +#define AVCODEC_MIPS_PIXBLOCKDSP_MIPS_H + +#include "../mpegvideo.h" + +void ff_diff_pixels_msa(int16_t *av_restrict block, const uint8_t *src1, - const uint8_t *src2, int stride); ++ const uint8_t *src2, ptrdiff_t stride); +void ff_get_pixels_16_msa(int16_t *restrict dst, const uint8_t *src, + ptrdiff_t stride); +void ff_get_pixels_8_msa(int16_t *restrict dst, const uint8_t *src, + ptrdiff_t stride); + +void ff_get_pixels_8_mmi(int16_t *av_restrict block, const uint8_t *pixels, - ptrdiff_t line_size); ++ ptrdiff_t stride); +void ff_diff_pixels_mmi(int16_t *av_restrict block, const uint8_t *src1, - const uint8_t *src2, int stride); ++ const uint8_t *src2, ptrdiff_t stride); + +#endif // #ifndef AVCODEC_MIPS_PIXBLOCKDSP_MIPS_H diff --cc libavcodec/mips/pixblockdsp_mmi.c index 9f2eac3,0000000..a915a3c mode 100644,000000..100644 --- a/libavcodec/mips/pixblockdsp_mmi.c +++ b/libavcodec/mips/pixblockdsp_mmi.c @@@ -1,135 -1,0 +1,135 @@@ +/* + * Loongson SIMD optimized pixblockdsp + * + * Copyright (c) 2015 Loongson Technology Corporation Limited + * Copyright (c) 2015 Zhou Xiaoyong <zhouxiaoy...@loongson.cn> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "pixblockdsp_mips.h" +#include "libavutil/mips/asmdefs.h" +#include "libavutil/mips/mmiutils.h" + +void ff_get_pixels_8_mmi(int16_t *av_restrict block, const uint8_t *pixels, - ptrdiff_t line_size) ++ ptrdiff_t stride) +{ + double ftmp[7]; + DECLARE_VAR_ALL64; + DECLARE_VAR_ADDRT; + + __asm__ volatile ( + "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" + + MMI_LDC1(%[ftmp1], %[pixels], 0x00) - MMI_LDXC1(%[ftmp2], %[pixels], %[line_size], 0x00) ++ MMI_LDXC1(%[ftmp2], %[pixels], %[stride], 0x00) + "punpcklbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" + "punpckhbh %[ftmp4], %[ftmp1], %[ftmp0] \n\t" + "punpcklbh %[ftmp5], %[ftmp2], %[ftmp0] \n\t" + "punpckhbh %[ftmp6], %[ftmp2], %[ftmp0] \n\t" + MMI_SDC1(%[ftmp3], %[block], 0x00) + MMI_SDC1(%[ftmp4], %[block], 0x08) + MMI_SDC1(%[ftmp5], %[block], 0x10) + MMI_SDC1(%[ftmp6], %[block], 0x18) - PTR_ADDU "%[pixels], %[pixels], %[line_size_x2] \n\t" ++ PTR_ADDU "%[pixels], %[pixels], %[stride_x2] \n\t" + + MMI_LDC1(%[ftmp1], %[pixels], 0x00) - MMI_LDXC1(%[ftmp2], %[pixels], %[line_size], 0x00) ++ MMI_LDXC1(%[ftmp2], %[pixels], %[stride], 0x00) + "punpcklbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" + "punpckhbh %[ftmp4], %[ftmp1], %[ftmp0] \n\t" + "punpcklbh %[ftmp5], %[ftmp2], %[ftmp0] \n\t" + "punpckhbh %[ftmp6], %[ftmp2], %[ftmp0] \n\t" + MMI_SDC1(%[ftmp3], %[block], 0x20) + MMI_SDC1(%[ftmp4], %[block], 0x28) + MMI_SDC1(%[ftmp5], %[block], 0x30) + MMI_SDC1(%[ftmp6], %[block], 0x38) - PTR_ADDU "%[pixels], %[pixels], %[line_size_x2] \n\t" ++ PTR_ADDU "%[pixels], %[pixels], %[stride_x2] \n\t" + + MMI_LDC1(%[ftmp1], %[pixels], 0x00) - MMI_LDXC1(%[ftmp2], %[pixels], %[line_size], 0x00) ++ MMI_LDXC1(%[ftmp2], %[pixels], %[stride], 0x00) + "punpcklbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" + "punpckhbh %[ftmp4], %[ftmp1], %[ftmp0] \n\t" + "punpcklbh %[ftmp5], %[ftmp2], %[ftmp0] \n\t" + "punpckhbh %[ftmp6], %[ftmp2], %[ftmp0] \n\t" + MMI_SDC1(%[ftmp3], %[block], 0x40) + MMI_SDC1(%[ftmp4], %[block], 0x48) + MMI_SDC1(%[ftmp5], %[block], 0x50) + MMI_SDC1(%[ftmp6], %[block], 0x58) - PTR_ADDU "%[pixels], %[pixels], %[line_size_x2] \n\t" ++ PTR_ADDU "%[pixels], %[pixels], %[stride_x2] \n\t" + + MMI_LDC1(%[ftmp1], %[pixels], 0x00) - MMI_LDXC1(%[ftmp2], %[pixels], %[line_size], 0x00) ++ MMI_LDXC1(%[ftmp2], %[pixels], %[stride], 0x00) + "punpcklbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" + "punpckhbh %[ftmp4], %[ftmp1], %[ftmp0] \n\t" + "punpcklbh %[ftmp5], %[ftmp2], %[ftmp0] \n\t" + "punpckhbh %[ftmp6], %[ftmp2], %[ftmp0] \n\t" + MMI_SDC1(%[ftmp3], %[block], 0x60) + MMI_SDC1(%[ftmp4], %[block], 0x68) + MMI_SDC1(%[ftmp5], %[block], 0x70) + MMI_SDC1(%[ftmp6], %[block], 0x78) + : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), + [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), + [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), + [ftmp6]"=&f"(ftmp[6]), + RESTRICT_ASM_ALL64 + RESTRICT_ASM_ADDRT + [pixels]"+&r"(pixels) - : [block]"r"((mips_reg)block), [line_size]"r"((mips_reg)line_size), - [line_size_x2]"r"((mips_reg)(line_size<<1)) ++ : [block]"r"((mips_reg)block), [stride]"r"((mips_reg)stride), ++ [stride_x2]"r"((mips_reg)(stride<<1)) + : "memory" + ); +} + +void ff_diff_pixels_mmi(int16_t *av_restrict block, const uint8_t *src1, - const uint8_t *src2, int stride) ++ const uint8_t *src2, ptrdiff_t stride) +{ + double ftmp[5]; + mips_reg tmp[1]; + DECLARE_VAR_ALL64; + + __asm__ volatile ( + "li %[tmp0], 0x08 \n\t" + "xor %[ftmp4], %[ftmp4], %[ftmp4] \n\t" + "1: \n\t" + MMI_LDC1(%[ftmp0], %[src1], 0x00) + "or %[ftmp1], %[ftmp0], %[ftmp0] \n\t" + MMI_LDC1(%[ftmp2], %[src2], 0x00) + "or %[ftmp3], %[ftmp2], %[ftmp2] \n\t" + "punpcklbh %[ftmp0], %[ftmp0], %[ftmp4] \n\t" + "punpckhbh %[ftmp1], %[ftmp1], %[ftmp4] \n\t" + "punpcklbh %[ftmp2], %[ftmp2], %[ftmp4] \n\t" + "punpckhbh %[ftmp3], %[ftmp3], %[ftmp4] \n\t" + "psubh %[ftmp0], %[ftmp0], %[ftmp2] \n\t" + "psubh %[ftmp1], %[ftmp1], %[ftmp3] \n\t" + MMI_SDC1(%[ftmp0], %[block], 0x00) + MMI_SDC1(%[ftmp1], %[block], 0x08) + PTR_ADDI "%[tmp0], %[tmp0], -0x01 \n\t" + PTR_ADDIU "%[block], %[block], 0x10 \n\t" + PTR_ADDU "%[src1], %[src1], %[stride] \n\t" + PTR_ADDU "%[src2], %[src2], %[stride] \n\t" + "bgtz %[tmp0], 1b \n\t" + : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), + [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), + [ftmp4]"=&f"(ftmp[4]), + [tmp0]"=&r"(tmp[0]), + RESTRICT_ASM_ALL64 + [block]"+&r"(block), [src1]"+&r"(src1), + [src2]"+&r"(src2) + : [stride]"r"((mips_reg)stride) + : "memory" + ); +} diff --cc libavcodec/mips/pixblockdsp_msa.c index 966e11a,0000000..86a4576 mode 100644,000000..100644 --- a/libavcodec/mips/pixblockdsp_msa.c +++ b/libavcodec/mips/pixblockdsp_msa.c @@@ -1,143 -1,0 +1,143 @@@ +/* + * Copyright (c) 2015 Shivraj Patil (shivraj.pa...@imgtec.com) + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/mips/generic_macros_msa.h" +#include "pixblockdsp_mips.h" + +static void diff_pixels_msa(int16_t *block, const uint8_t *src1, + const uint8_t *src2, int32_t stride) +{ + v16u8 in10, in11, in12, in13, in14, in15, in16, in17; + v16u8 in20, in21, in22, in23, in24, in25, in26, in27; + v8i16 out0, out1, out2, out3, out4, out5, out6, out7; + + LD_UB8(src1, stride, in10, in11, in12, in13, in14, in15, in16, in17); + LD_UB8(src2, stride, in20, in21, in22, in23, in24, in25, in26, in27); + ILVR_B4_SH(in10, in20, in11, in21, in12, in22, in13, in23, + out0, out1, out2, out3); + ILVR_B4_SH(in14, in24, in15, in25, in16, in26, in17, in27, + out4, out5, out6, out7); + HSUB_UB4_SH(out0, out1, out2, out3, out0, out1, out2, out3); + HSUB_UB4_SH(out4, out5, out6, out7, out4, out5, out6, out7); + ST_SH8(out0, out1, out2, out3, out4, out5, out6, out7, block, 8); +} + +static void copy_8bit_to_16bit_width8_msa(const uint8_t *src, int32_t src_stride, + int16_t *dst, int32_t dst_stride, + int32_t height) +{ + uint8_t *dst_ptr; + int32_t cnt; + v16u8 src0, src1, src2, src3; + v16i8 zero = { 0 }; + + dst_ptr = (uint8_t *) dst; + + for (cnt = (height >> 2); cnt--;) { + LD_UB4(src, src_stride, src0, src1, src2, src3); + src += (4 * src_stride); + + ILVR_B4_UB(zero, src0, zero, src1, zero, src2, zero, src3, + src0, src1, src2, src3); + + ST_UB4(src0, src1, src2, src3, dst_ptr, (dst_stride * 2)); + dst_ptr += (4 * 2 * dst_stride); + } +} + +static void copy_16multx8mult_msa(const uint8_t *src, int32_t src_stride, + uint8_t *dst, int32_t dst_stride, + int32_t height, int32_t width) +{ + int32_t cnt, loop_cnt; + const uint8_t *src_tmp; + uint8_t *dst_tmp; + v16u8 src0, src1, src2, src3, src4, src5, src6, src7; + + for (cnt = (width >> 4); cnt--;) { + src_tmp = src; + dst_tmp = dst; + + for (loop_cnt = (height >> 3); loop_cnt--;) { + LD_UB8(src_tmp, src_stride, + src0, src1, src2, src3, src4, src5, src6, src7); + src_tmp += (8 * src_stride); + + ST_UB8(src0, src1, src2, src3, src4, src5, src6, src7, + dst_tmp, dst_stride); + dst_tmp += (8 * dst_stride); + } + + src += 16; + dst += 16; + } +} + +static void copy_width16_msa(const uint8_t *src, int32_t src_stride, + uint8_t *dst, int32_t dst_stride, + int32_t height) +{ + int32_t cnt; + v16u8 src0, src1, src2, src3, src4, src5, src6, src7; + + if (0 == height % 12) { + for (cnt = (height / 12); cnt--;) { + LD_UB8(src, src_stride, + src0, src1, src2, src3, src4, src5, src6, src7); + src += (8 * src_stride); + ST_UB8(src0, src1, src2, src3, src4, src5, src6, src7, + dst, dst_stride); + dst += (8 * dst_stride); + + LD_UB4(src, src_stride, src0, src1, src2, src3); + src += (4 * src_stride); + ST_UB4(src0, src1, src2, src3, dst, dst_stride); + dst += (4 * dst_stride); + } + } else if (0 == height % 8) { + copy_16multx8mult_msa(src, src_stride, dst, dst_stride, height, 16); + } else if (0 == height % 4) { + for (cnt = (height >> 2); cnt--;) { + LD_UB4(src, src_stride, src0, src1, src2, src3); + src += (4 * src_stride); + + ST_UB4(src0, src1, src2, src3, dst, dst_stride); + dst += (4 * dst_stride); + } + } +} + +void ff_get_pixels_16_msa(int16_t *av_restrict dest, const uint8_t *src, + ptrdiff_t stride) +{ + copy_width16_msa(src, stride, (uint8_t *) dest, 16, 8); +} + +void ff_get_pixels_8_msa(int16_t *av_restrict dest, const uint8_t *src, + ptrdiff_t stride) +{ + copy_8bit_to_16bit_width8_msa(src, stride, dest, 8, 8); +} + +void ff_diff_pixels_msa(int16_t *av_restrict block, const uint8_t *src1, - const uint8_t *src2, int stride) ++ const uint8_t *src2, ptrdiff_t stride) +{ + diff_pixels_msa(block, src1, src2, stride); +} diff --cc libavcodec/pixblockdsp.c index f0883d3,9d68d26..417c944 --- a/libavcodec/pixblockdsp.c +++ b/libavcodec/pixblockdsp.c @@@ -24,41 -23,15 +24,41 @@@ #include "avcodec.h" #include "pixblockdsp.h" -#define BIT_DEPTH 16 -#include "pixblockdsp_template.c" -#undef BIT_DEPTH +static void get_pixels_16_c(int16_t *av_restrict block, const uint8_t *pixels, - ptrdiff_t line_size) ++ ptrdiff_t stride) +{ - AV_COPY128U(block + 0 * 8, pixels + 0 * line_size); - AV_COPY128U(block + 1 * 8, pixels + 1 * line_size); - AV_COPY128U(block + 2 * 8, pixels + 2 * line_size); - AV_COPY128U(block + 3 * 8, pixels + 3 * line_size); - AV_COPY128U(block + 4 * 8, pixels + 4 * line_size); - AV_COPY128U(block + 5 * 8, pixels + 5 * line_size); - AV_COPY128U(block + 6 * 8, pixels + 6 * line_size); - AV_COPY128U(block + 7 * 8, pixels + 7 * line_size); ++ AV_COPY128U(block + 0 * 8, pixels + 0 * stride); ++ AV_COPY128U(block + 1 * 8, pixels + 1 * stride); ++ AV_COPY128U(block + 2 * 8, pixels + 2 * stride); ++ AV_COPY128U(block + 3 * 8, pixels + 3 * stride); ++ AV_COPY128U(block + 4 * 8, pixels + 4 * stride); ++ AV_COPY128U(block + 5 * 8, pixels + 5 * stride); ++ AV_COPY128U(block + 6 * 8, pixels + 6 * stride); ++ AV_COPY128U(block + 7 * 8, pixels + 7 * stride); +} + +static void get_pixels_8_c(int16_t *av_restrict block, const uint8_t *pixels, - ptrdiff_t line_size) ++ ptrdiff_t stride) +{ + int i; -#define BIT_DEPTH 8 -#include "pixblockdsp_template.c" + /* read the pixels */ + for (i = 0; i < 8; i++) { + block[0] = pixels[0]; + block[1] = pixels[1]; + block[2] = pixels[2]; + block[3] = pixels[3]; + block[4] = pixels[4]; + block[5] = pixels[5]; + block[6] = pixels[6]; + block[7] = pixels[7]; - pixels += line_size; ++ pixels += stride; + block += 8; + } +} -static void diff_pixels_c(int16_t *restrict block, const uint8_t *s1, +static void diff_pixels_c(int16_t *av_restrict block, const uint8_t *s1, - const uint8_t *s2, int stride) + const uint8_t *s2, ptrdiff_t stride) { int i; diff --cc libavcodec/ppc/pixblockdsp.c index f3a5050,96e7024..f5ac850 --- a/libavcodec/ppc/pixblockdsp.c +++ b/libavcodec/ppc/pixblockdsp.c @@@ -33,40 -33,13 +33,40 @@@ #include "libavcodec/avcodec.h" #include "libavcodec/pixblockdsp.h" -#if HAVE_ALTIVEC && HAVE_BIGENDIAN +#if HAVE_ALTIVEC +#if HAVE_VSX +static void get_pixels_altivec(int16_t *restrict block, const uint8_t *pixels, - ptrdiff_t line_size) ++ ptrdiff_t stride) +{ + int i; + vector unsigned char perm = + (vector unsigned char) {0x00,0x10, 0x01,0x11,0x02,0x12,0x03,0x13,\ + 0x04,0x14,0x05,0x15,0x06,0x16,0x07,0x17}; + const vector unsigned char zero = + (const vector unsigned char) vec_splat_u8(0); + + for (i = 0; i < 8; i++) { + /* Read potentially unaligned pixels. + * We're reading 16 pixels, and actually only want 8, + * but we simply ignore the extras. */ + vector unsigned char bytes = vec_vsx_ld(0, pixels); + + // Convert the bytes into shorts. + //vector signed short shorts = (vector signed short) vec_perm(zero, bytes, perm); + vector signed short shorts = (vector signed short) vec_perm(bytes, zero, perm); + + // Save the data to the block, we assume the block is 16-byte aligned. + vec_vsx_st(shorts, i * 16, (vector signed short *) block); + - pixels += line_size; ++ pixels += stride; + } +} +#else static void get_pixels_altivec(int16_t *restrict block, const uint8_t *pixels, - ptrdiff_t line_size) + ptrdiff_t stride) { int i; - vec_u8 perm = vec_lvsl(0, pixels); const vec_u8 zero = (const vec_u8)vec_splat_u8(0); for (i = 0; i < 8; i++) { @@@ -88,76 -60,12 +88,76 @@@ } } +#endif /* HAVE_VSX */ + +#if HAVE_VSX +static void diff_pixels_altivec(int16_t *restrict block, const uint8_t *s1, - const uint8_t *s2, int stride) ++ const uint8_t *s2, ptrdiff_t stride) +{ + int i; + const vector unsigned char zero = + (const vector unsigned char) vec_splat_u8(0); + vector signed short shorts1, shorts2; + + for (i = 0; i < 4; i++) { + /* Read potentially unaligned pixels. + * We're reading 16 pixels, and actually only want 8, + * but we simply ignore the extras. */ + vector unsigned char bytes = vec_vsx_ld(0, s1); + + // Convert the bytes into shorts. + shorts1 = (vector signed short) vec_mergeh(bytes, zero); + + // Do the same for the second block of pixels. + bytes =vec_vsx_ld(0, s2); + + // Convert the bytes into shorts. + shorts2 = (vector signed short) vec_mergeh(bytes, zero); + + // Do the subtraction. + shorts1 = vec_sub(shorts1, shorts2); + + // Save the data to the block, we assume the block is 16-byte aligned. + vec_vsx_st(shorts1, 0, (vector signed short *) block); + + s1 += stride; + s2 += stride; + block += 8; + + /* The code below is a copy of the code above... + * This is a manual unroll. */ + + /* Read potentially unaligned pixels. + * We're reading 16 pixels, and actually only want 8, + * but we simply ignore the extras. */ + bytes = vec_vsx_ld(0, s1); + + // Convert the bytes into shorts. + shorts1 = (vector signed short) vec_mergeh(bytes, zero); + + // Do the same for the second block of pixels. + bytes = vec_vsx_ld(0, s2); + + // Convert the bytes into shorts. + shorts2 = (vector signed short) vec_mergeh(bytes, zero); + + // Do the subtraction. + shorts1 = vec_sub(shorts1, shorts2); + + // Save the data to the block, we assume the block is 16-byte aligned. + vec_vsx_st(shorts1, 0, (vector signed short *) block); + + s1 += stride; + s2 += stride; + block += 8; + } +} +#else static void diff_pixels_altivec(int16_t *restrict block, const uint8_t *s1, - const uint8_t *s2, int stride) + const uint8_t *s2, ptrdiff_t stride) { int i; - vec_u8 perm1 = vec_lvsl(0, s1); - vec_u8 perm2 = vec_lvsl(0, s2); + vec_u8 perm; const vec_u8 zero = (const vec_u8)vec_splat_u8(0); vec_s16 shorts1, shorts2; diff --cc libavcodec/x86/pixblockdsp.asm index 2864d0c,8712442..440fe29 --- a/libavcodec/x86/pixblockdsp.asm +++ b/libavcodec/x86/pixblockdsp.asm @@@ -80,12 -80,11 +80,11 @@@ cglobal get_pixels, 3, 4, mova [r0+0x70], m3 RET -INIT_MMX mmx ; void ff_diff_pixels_mmx(int16_t *block, const uint8_t *s1, const uint8_t *s2, - ; int stride); + ; ptrdiff_t stride); -cglobal diff_pixels, 4,5 - pxor m7, m7 +%macro DIFF_PIXELS 0 +cglobal diff_pixels, 4,5,5 - movsxdifnidn r3, r3d + pxor m4, m4 add r0, 128 mov r4, -128 .loop: diff --cc libavcodec/x86/pixblockdsp_init.c index 4d06a44,faa5141..fa9578a --- a/libavcodec/x86/pixblockdsp_init.c +++ b/libavcodec/x86/pixblockdsp_init.c @@@ -23,12 -23,10 +23,12 @@@ #include "libavutil/x86/cpu.h" #include "libavcodec/pixblockdsp.h" - void ff_get_pixels_mmx(int16_t *block, const uint8_t *pixels, ptrdiff_t line_size); - void ff_get_pixels_sse2(int16_t *block, const uint8_t *pixels, ptrdiff_t line_size); + void ff_get_pixels_mmx(int16_t *block, const uint8_t *pixels, ptrdiff_t stride); + void ff_get_pixels_sse2(int16_t *block, const uint8_t *pixels, ptrdiff_t stride); void ff_diff_pixels_mmx(int16_t *block, const uint8_t *s1, const uint8_t *s2, - int stride); + ptrdiff_t stride); +void ff_diff_pixels_sse2(int16_t *block, const uint8_t *s1, const uint8_t *s2, - int stride); ++ ptrdiff_t stride); av_cold void ff_pixblockdsp_init_x86(PixblockDSPContext *c, AVCodecContext *avctx, diff --cc tests/checkasm/pixblockdsp.c index 2b88e7d,0000000..e14b0a9 mode 100644,000000..100644 --- a/tests/checkasm/pixblockdsp.c +++ b/tests/checkasm/pixblockdsp.c @@@ -1,107 -1,0 +1,107 @@@ +/* + * Copyright (c) 2015 Tiancheng "Timothy" Gu + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with FFmpeg; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include <string.h> +#include "checkasm.h" +#include "libavcodec/pixblockdsp.h" +#include "libavutil/common.h" +#include "libavutil/internal.h" +#include "libavutil/intreadwrite.h" + +#define BUF_UNITS 8 +#define BUF_SIZE (BUF_UNITS * 128 + 8 * BUF_UNITS) + +#define randomize_buffers() \ + do { \ + int i; \ + for (i = 0; i < BUF_SIZE; i += 4) { \ + uint32_t r = rnd(); \ + AV_WN32A(src10 + i, r); \ + AV_WN32A(src11 + i, r); \ + r = rnd(); \ + AV_WN32A(src20 + i, r); \ + AV_WN32A(src21 + i, r); \ + r = rnd(); \ + AV_WN32A(dst0_ + i, r); \ + AV_WN32A(dst1_ + i, r); \ + } \ + } while (0) + +#define check_get_pixels(type) \ + do { \ + int i; \ + declare_func_emms(AV_CPU_FLAG_MMX, void, int16_t *block, const uint8_t *pixels, ptrdiff_t line_size); \ + \ + for (i = 0; i < BUF_UNITS; i++) { \ + int src_offset = i * 64 * sizeof(type) + 8 * i; /* Test various alignments */ \ + int dst_offset = i * 64; /* dst must be aligned */ \ + randomize_buffers(); \ + call_ref(dst0 + dst_offset, src10 + src_offset, 8); \ + call_new(dst1 + dst_offset, src11 + src_offset, 8); \ + if (memcmp(src10, src11, BUF_SIZE)|| memcmp(dst0, dst1, BUF_SIZE)) \ + fail(); \ + bench_new(dst1 + dst_offset, src11 + src_offset, 8); \ + } \ + } while (0) + +#define check_diff_pixels(type) \ + do { \ + int i; \ - declare_func_emms(AV_CPU_FLAG_MMX, void, int16_t *av_restrict block, const uint8_t *s1, const uint8_t *s2, int stride); \ ++ declare_func_emms(AV_CPU_FLAG_MMX, void, int16_t *av_restrict block, const uint8_t *s1, const uint8_t *s2, ptrdiff_t stride); \ + \ + for (i = 0; i < BUF_UNITS; i++) { \ + int src_offset = i * 64 * sizeof(type) + 8 * i; /* Test various alignments */ \ + int dst_offset = i * 64; /* dst must be aligned */ \ + randomize_buffers(); \ + call_ref(dst0 + dst_offset, src10 + src_offset, src20 + src_offset, 8); \ + call_new(dst1 + dst_offset, src11 + src_offset, src21 + src_offset, 8); \ + if (memcmp(src10, src11, BUF_SIZE) || memcmp(src20, src21, BUF_SIZE) || memcmp(dst0, dst1, BUF_SIZE)) \ + fail(); \ + bench_new(dst1 + dst_offset, src11 + src_offset, src21 + src_offset, 8); \ + } \ + } while (0) + +void checkasm_check_pixblockdsp(void) +{ + LOCAL_ALIGNED_16(uint8_t, src10, [BUF_SIZE]); + LOCAL_ALIGNED_16(uint8_t, src11, [BUF_SIZE]); + LOCAL_ALIGNED_16(uint8_t, src20, [BUF_SIZE]); + LOCAL_ALIGNED_16(uint8_t, src21, [BUF_SIZE]); + LOCAL_ALIGNED_16(uint8_t, dst0_, [BUF_SIZE]); + LOCAL_ALIGNED_16(uint8_t, dst1_, [BUF_SIZE]); + uint16_t *dst0 = (uint16_t *)dst0_; + uint16_t *dst1 = (uint16_t *)dst1_; + PixblockDSPContext h; + AVCodecContext avctx = { + .bits_per_raw_sample = 8, + }; + + ff_pixblockdsp_init(&h, &avctx); + + if (check_func(h.get_pixels, "get_pixels")) + check_get_pixels(uint8_t); + + report("get_pixels"); + + if (check_func(h.diff_pixels, "diff_pixels")) + check_diff_pixels(uint8_t); + + report("diff_pixels"); +} _______________________________________________ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog