This needs to be benchmarked, i do not have ppc hw This is on big endian more similar to how the code was before 79e0255956bc8fcdb143f39b2e45db77144ac017
Signed-off-by: Michael Niedermayer <michae...@gmx.at> --- libavcodec/ppc/hpeldsp_altivec.c | 30 ++++++++++-------------------- libavutil/ppc/util_altivec.h | 16 ++++++++++++++++ 2 files changed, 26 insertions(+), 20 deletions(-) diff --git a/libavcodec/ppc/hpeldsp_altivec.c b/libavcodec/ppc/hpeldsp_altivec.c index 87a1f05..05d8b81 100644 --- a/libavcodec/ppc/hpeldsp_altivec.c +++ b/libavcodec/ppc/hpeldsp_altivec.c @@ -123,8 +123,7 @@ static void put_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, ptrdi register const vector unsigned char vczero = (const vector unsigned char)vec_splat_u8(0); register const vector unsigned short vctwo = (const vector unsigned short)vec_splat_u16(2); - pixelsv1 = VEC_LD(0, pixels); - pixelsv2 = VEC_LD(1, pixels); + VEC_LD2(pixelsv1, pixelsv2, 0, pixels); pixelsv1 = VEC_MERGEH(vczero, pixelsv1); pixelsv2 = VEC_MERGEH(vczero, pixelsv2); @@ -136,8 +135,7 @@ static void put_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, ptrdi int rightside = ((unsigned long)block & 0x0000000F); blockv = vec_ld(0, block); - pixelsv1 = unaligned_load(line_size, pixels); - pixelsv2 = unaligned_load(line_size+1, pixels); + VEC_LD2(pixelsv1, pixelsv2, line_size, pixels); pixelsv1 = VEC_MERGEH(vczero, pixelsv1); pixelsv2 = VEC_MERGEH(vczero, pixelsv2); pixelssum2 = vec_add((vector unsigned short)pixelsv1, @@ -171,8 +169,7 @@ static void put_no_rnd_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels register const vector unsigned short vcone = (const vector unsigned short)vec_splat_u16(1); register const vector unsigned short vctwo = (const vector unsigned short)vec_splat_u16(2); - pixelsv1 = VEC_LD(0, pixels); - pixelsv2 = VEC_LD(1, pixels); + VEC_LD2(pixelsv1, pixelsv2, 0, pixels); pixelsv1 = VEC_MERGEH(vczero, pixelsv1); pixelsv2 = VEC_MERGEH(vczero, pixelsv2); pixelssum1 = vec_add((vector unsigned short)pixelsv1, @@ -183,8 +180,7 @@ static void put_no_rnd_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels int rightside = ((unsigned long)block & 0x0000000F); blockv = vec_ld(0, block); - pixelsv1 = unaligned_load(line_size, pixels); - pixelsv2 = unaligned_load(line_size+1, pixels); + VEC_LD2(pixelsv1, pixelsv2, line_size, pixels); pixelsv1 = VEC_MERGEH(vczero, pixelsv1); pixelsv2 = VEC_MERGEH(vczero, pixelsv2); pixelssum2 = vec_add((vector unsigned short)pixelsv1, @@ -218,8 +214,7 @@ static void put_pixels16_xy2_altivec(uint8_t * block, const uint8_t * pixels, pt register const vector unsigned char vczero = (const vector unsigned char)vec_splat_u8(0); register const vector unsigned short vctwo = (const vector unsigned short)vec_splat_u16(2); - pixelsv1 = VEC_LD(0, pixels); - pixelsv2 = VEC_LD(1, pixels); + VEC_LD2(pixelsv1, pixelsv2, 0, pixels); pixelsv3 = VEC_MERGEL(vczero, pixelsv1); pixelsv4 = VEC_MERGEL(vczero, pixelsv2); pixelsv1 = VEC_MERGEH(vczero, pixelsv1); @@ -234,8 +229,7 @@ static void put_pixels16_xy2_altivec(uint8_t * block, const uint8_t * pixels, pt for (i = 0; i < h ; i++) { blockv = vec_ld(0, block); - pixelsv1 = unaligned_load(line_size, pixels); - pixelsv2 = unaligned_load(line_size+1, pixels); + VEC_LD2(pixelsv1, pixelsv2, line_size, pixels); pixelsv3 = VEC_MERGEL(vczero, pixelsv1); pixelsv4 = VEC_MERGEL(vczero, pixelsv2); @@ -274,8 +268,7 @@ static void put_no_rnd_pixels16_xy2_altivec(uint8_t * block, const uint8_t * pix register const vector unsigned short vcone = (const vector unsigned short)vec_splat_u16(1); register const vector unsigned short vctwo = (const vector unsigned short)vec_splat_u16(2); - pixelsv1 = VEC_LD(0, pixels); - pixelsv2 = VEC_LD(1, pixels); + VEC_LD2(pixelsv1, pixelsv2, 0, pixels); pixelsv3 = VEC_MERGEL(vczero, pixelsv1); pixelsv4 = VEC_MERGEL(vczero, pixelsv2); pixelsv1 = VEC_MERGEH(vczero, pixelsv1); @@ -288,8 +281,7 @@ static void put_no_rnd_pixels16_xy2_altivec(uint8_t * block, const uint8_t * pix pixelssum1 = vec_add(pixelssum1, vcone); for (i = 0; i < h ; i++) { - pixelsv1 = unaligned_load(line_size, pixels); - pixelsv2 = unaligned_load(line_size+1, pixels); + VEC_LD2(pixelsv1, pixelsv2, line_size, pixels); pixelsv3 = VEC_MERGEL(vczero, pixelsv1); pixelsv4 = VEC_MERGEL(vczero, pixelsv2); @@ -329,8 +321,7 @@ static void avg_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, ptrdi register const vector unsigned short vctwo = (const vector unsigned short) vec_splat_u16(2); - pixelsv1 = VEC_LD(0, pixels); - pixelsv2 = VEC_LD(1, pixels); + VEC_LD2(pixelsv1, pixelsv2, 0, pixels); pixelsv1 = VEC_MERGEH(vczero, pixelsv1); pixelsv2 = VEC_MERGEH(vczero, pixelsv2); pixelssum1 = vec_add((vector unsigned short)pixelsv1, @@ -341,8 +332,7 @@ static void avg_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, ptrdi int rightside = ((unsigned long)block & 0x0000000F); blockv = vec_ld(0, block); - pixelsv1 = unaligned_load(line_size, pixels); - pixelsv2 = unaligned_load(line_size+1, pixels); + VEC_LD2(pixelsv1, pixelsv2, line_size, pixels); pixelsv1 = VEC_MERGEH(vczero, pixelsv1); pixelsv2 = VEC_MERGEH(vczero, pixelsv2); diff --git a/libavutil/ppc/util_altivec.h b/libavutil/ppc/util_altivec.h index fd3bfd3..9fda566 100644 --- a/libavutil/ppc/util_altivec.h +++ b/libavutil/ppc/util_altivec.h @@ -88,9 +88,25 @@ do { \ #if HAVE_BIGENDIAN #define VEC_LD(offset,b) \ vec_perm(vec_ld(offset, b), vec_ld(offset+15, b), vec_lvsl(offset, b)) + +#define VEC_LD2(dst1, dst2, offset, b) do { \ + register vector unsigned char temp1 = vec_ld(offset , b); \ + register vector unsigned char temp2 = vec_ld((offset) + 16, b); \ + (dst1) = vec_perm(temp1, temp2, vec_lvsl(offset, b)); \ + if ((((unsigned long)(b + (offset))) & 0x0000000F) == 0x0000000F) { \ + (dst2) = temp2; \ + } else { \ + (dst2) = vec_perm(temp1, temp2, vec_lvsl((offset)+1, b)); \ + } \ + } while(0) #else #define VEC_LD(offset,b) \ vec_vsx_ld(offset, b) + +#define VEC_LD2(dst1, dst2, offset, b) do { \ + (dst1) = VEC_LD(offset ,b); \ + (dst2) = VEC_LD((offset)+1,b); \ + } while(0) #endif /** @brief loads unaligned vector @a *src with offset @a offset -- 1.7.9.5 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel