On Fri, Oct 31, 2014 at 01:03:58PM +0100, Michael Niedermayer wrote: > On Fri, Oct 31, 2014 at 06:45:51PM +0800, rongyan wrote: > > Hi, > > There are 3 patches to fix bugs for POWER8 little endian. I will send 3 > > patches in 3 different email. This is the first, functions sad8_altivec() > > sse8_altivec(), hadamard8_diff8x8_altivec(), hadamard8_diff16x8_altivec() > > are fixed. > > The fate test result after merge these 3 patches can be found on website > > by searching "ibmcrl", also attached in the below to facilitate the review. > > The passed test cases change from 1679/2182 to 2202/2235. > > > > Thanks. > > > > Rong Yan > > ------------------ > > The world has enough for everyone's need, but not enough for everyone's > > greed. > > > > me_cmp.c | 460 > > +++++++++++++++++++++++++++++++++++++++++++++++++++++++-------- > > 1 file changed, 408 insertions(+), 52 deletions(-) > > dd8fe6d196e1330633e22dd0395b5c010e5585df > > 0001-libavcodec-ppc-me_cmp.c-fix-sad8_altivec-sse8_altive.patch > > From a1bd28eb6d775e41defca51fc156a23e4d3f16ed Mon Sep 17 00:00:00 2001 > > From: Rong Yan <rongyan...@gmail.com> > > Date: Fri, 31 Oct 2014 10:18:01 +0000 > > Subject: [PATCH 1/3] libavcodec/ppc/me_cmp.c : fix sad8_altivec() > > sse8_altivec() hadamard8_diff8x8_altivec() > > hadamard8_diff16x8_altivec() for POWER LE > > > > --- > > libavcodec/ppc/me_cmp.c | 460 > > ++++++++++++++++++++++++++++++++++++++++++------ > > 1 file changed, 408 insertions(+), 52 deletions(-) > > > > diff --git a/libavcodec/ppc/me_cmp.c b/libavcodec/ppc/me_cmp.c > > index 8ff8193..9759b17 100644 > > --- a/libavcodec/ppc/me_cmp.c > > +++ b/libavcodec/ppc/me_cmp.c > > @@ -34,16 +34,18 @@ > > #include "libavcodec/mpegvideo.h" > > #include "libavcodec/me_cmp.h" > > > > +#include <stdio.h> > > #if HAVE_ALTIVEC > > -#if HAVE_VSX > > + > > +#if !HAVE_BIGENDIAN > > static int sad16_x2_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t > > *pix2, > > int line_size, int > > h) > > { > > int i, s = 0; > > - const vector unsigned char zero = > > + const vector unsigned char __attribute__((aligned(16))) zero = > > (const vector unsigned char) vec_splat_u8(0); > > - vector unsigned int sad = (vector unsigned int) vec_splat_u32(0); > > - vector signed int sumdiffs; > > + vector unsigned int __attribute__((aligned(16))) sad = (vector > > unsigned int) vec_splat_u32(0); > > + vector signed int __attribute__((aligned(16))) sumdiffs; > > adding alignment directives should be a sepearte patch > > > > > > for (i = 0; i < h; i++) { > > /* Read unaligned pixels into our vectors. The vectors are as > > follows: > > @@ -69,10 +71,11 @@ static int sad16_x2_altivec(MpegEncContext *v, uint8_t > > *pix1, uint8_t *pix2, > > /* Sum up the four partial sums, and put the result into s. */ > > sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero); > > sumdiffs = vec_splat(sumdiffs, 3); > > - vec_vsx_st(sumdiffs, 0, &s); > > + //vec_vsx_st(sumdiffs, 0, &s); > > + vec_ste(sumdiffs, 0, &s); > > return s; > > } > > -#else > > +#else /* else of #if !HAVE_BIGENDIAN */ > > static int sad16_x2_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t > > *pix2, > > int line_size, int h) > > { > > @@ -114,19 +117,19 @@ static int sad16_x2_altivec(MpegEncContext *v, > > uint8_t *pix1, uint8_t *pix2, > > > > return s; > > } > > > -#endif /* HAVE_VSX */ > > +#endif /* end of #if !HAVE_BIGENDIAN */ > > this is inconsistent with the rest of the codebase > its also a cosmetic change and cosmetic changes must not be in the > same patch as functional changes > > > > > > -#if HAVE_VSX > > +#if !HAVE_BIGENDIAN > > static int sad16_y2_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t > > *pix2, > > int line_size, int > > h) > > { > > int i, s = 0; > > - const vector unsigned char zero = > > + const vector unsigned char __attribute__((aligned(16)))zero = > > (const vector unsigned char) vec_splat_u8(0); > > - vector unsigned char perm = vec_lvsl(0, pix2); > > - vector unsigned char pix1v, pix3v, avgv, t5; > > - vector unsigned int sad = (vector unsigned int) vec_splat_u32(0); > > - vector signed int sumdiffs; > > + vector unsigned char __attribute__((aligned(16))) perm = vec_lvsl(0, > > pix2); > > + vector unsigned char __attribute__((aligned(16))) pix1v, pix3v, avgv, t5; > > + vector unsigned int __attribute__((aligned(16))) sad = (vector unsigned > > int) vec_splat_u32(0); > > + vector signed int __attribute__((aligned(16))) sumdiffs; > > uint8_t *pix3 = pix2 + line_size; > > > > /* Due to the fact that pix3 = pix2 + line_size, the pix3 of one > > @@ -162,10 +165,11 @@ static int sad16_y2_altivec(MpegEncContext *v, > > uint8_t *pix1, uint8_t *pix2, > > /* Sum up the four partial sums, and put the result into s. */ > > sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero); > > sumdiffs = vec_splat(sumdiffs, 3); > > - vec_vsx_st(sumdiffs, 0, &s); > > + //vec_vsx_st(sumdiffs, 0, &s); > > + vec_ste(sumdiffs, 0, &s); > > return s; > > } > > -#else > > +#else /* else of #if !HAVE_BIGENDIAN */ > > static int sad16_y2_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t > > *pix2, > > int line_size, int h) > > { > > @@ -219,24 +223,24 @@ static int sad16_y2_altivec(MpegEncContext *v, > > uint8_t *pix1, uint8_t *pix2, > > vec_ste(sumdiffs, 0, &s); > > return s; > > } > > -#endif /* HAVE_VSX */ > > +#endif /* end of #if !HAVE_BIGENDIAN */ > > > > -#if HAVE_VSX > > +#if !HAVE_BIGENDIAN > > static int sad16_xy2_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t > > *pix2, > > int line_size, int h) > > { > > int i, s = 0; > > uint8_t *pix3 = pix2 + line_size; > > - const vector unsigned char zero = > > + const vector unsigned char __attribute__((aligned(16))) zero = > > (const vector unsigned char) vec_splat_u8(0); > > - const vector unsigned short two = > > + const vector unsigned short __attribute__((aligned(16))) two = > > (const vector unsigned short) vec_splat_u16(2); > > - vector unsigned char avgv, t5; > > - vector unsigned char pix1v, pix3v, pix3iv; > > - vector unsigned short pix3lv, pix3hv, pix3ilv, pix3ihv; > > - vector unsigned short avghv, avglv; > > - vector unsigned int sad = (vector unsigned int) vec_splat_u32(0); > > - vector signed int sumdiffs; > > + vector unsigned char __attribute__((aligned(16))) avgv, t5; > > + vector unsigned char __attribute__((aligned(16))) pix1v, pix3v, pix3iv; > > + vector unsigned short __attribute__((aligned(16))) pix3lv, pix3hv, > > pix3ilv, pix3ihv; > > + vector unsigned short __attribute__((aligned(16))) avghv, avglv; > > + vector unsigned int __attribute__((aligned(16))) sad = (vector unsigned > > int) vec_splat_u32(0); > > + vector signed int __attribute__((aligned(16))) sumdiffs; > > > > /* Due to the fact that pix3 = pix2 + line_size, the pix3 of one > > * iteration becomes pix2 in the next iteration. We can use this > > @@ -305,10 +309,11 @@ static int sad16_xy2_altivec(MpegEncContext *v, > > uint8_t *pix1, uint8_t *pix2, > > /* Sum up the four partial sums, and put the result into s. */ > > sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero); > > sumdiffs = vec_splat(sumdiffs, 3); > > - vec_vsx_st(sumdiffs, 0, &s); > > > + //vec_vsx_st(sumdiffs, 0, &s); > > what purpose does this line have ? > > > [...] > > > +#if !HAVE_BIGENDIAN > > +/* Sum of Squared Errors for an 8x8 block, AltiVec-enhanced. > > + * It's the sad8_altivec code above w/ squaring added. */ > > +static int sse8_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, > > + int line_size, int h) > [...] > > + return s; > > +} > > +#else /* else of #if !HAVE_BIGENDIAN */ > > /* Sum of Squared Errors for an 8x8 block, AltiVec-enhanced. > > * It's the sad8_altivec code above w/ squaring added. */ > > static int sse8_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, > > @@ -617,18 +670,17 @@ static int sse8_altivec(MpegEncContext *v, uint8_t > > *pix1, uint8_t *pix2, > > > > return s; > > } > > +#endif /* end of #if !HAVE_BIGENDIAN */ > > Why do you keep sending patches which duplicate code? > > Maybe someone can translate this to a language you understand > better then english: > > Code duplication is not allowed > > #if A > foo > this > bar > #else > foo > that > bar > #endif > > always must be replaced by > > foo > #if A > this > #else > that > #endif > bar > > > also the then remaining difference (this vs that) should be avoided > as well by using appropriate macros and inline functions > > there is already some duplicated code in there, this was a mistake. > No more duplicated code should be added.
For reference the difference between the if and else /* Sum of Squared Errors for an 8x8 block, AltiVec-enhanced. * It's the sad8_altivec code above w/ squaring added. */ static int sse8_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) { int i, s; - const vector unsigned int zero = + const vector unsigned int __attribute__((aligned(16))) zero = (const vector unsigned int) vec_splat_u32(0); - const vector unsigned char permclear = + const vector unsigned char __attribute__((aligned(16))) permclear = (vector unsigned char) { 255, 255, 255, 255, 255, 255, 255, 255, 0, 0, 0, 0, 0, 0, 0, 0 }; - vector unsigned char perm1 = vec_lvsl(0, pix1); - vector unsigned char perm2 = vec_lvsl(0, pix2); - vector unsigned int sum = (vector unsigned int) vec_splat_u32(0); - vector signed int sumsqr; + vector unsigned int __attribute__((aligned(16))) sum = (vector unsigned int) vec_splat_u32(0); + vector signed int __attribute__((aligned(16))) sumsqr; for (i = 0; i < h; i++) { /* Read potentially unaligned pixels into t1 and t2. * Since we're reading 16 pixels, and actually only want 8, * mask out the last 8 pixels. The 0s don't change the sum. */ - vector unsigned char pix1l = vec_ld(0, pix1); - vector unsigned char pix1r = vec_ld(7, pix1); - vector unsigned char pix2l = vec_ld(0, pix2); - vector unsigned char pix2r = vec_ld(7, pix2); - vector unsigned char t1 = vec_and(vec_perm(pix1l, pix1r, perm1), - permclear); - vector unsigned char t2 = vec_and(vec_perm(pix2l, pix2r, perm2), - permclear); + vector unsigned char t1 = vec_and(vec_vsx_ld(0, pix1), permclear); + vector unsigned char t2 = vec_and(vec_vsx_ld(0, pix2), permclear); /* Since we want to use unsigned chars, we can take advantage * of the fact that abs(a - b) ^ 2 = (a - b) ^ 2. */ @@ -613,22 +614,21 @@ static int sse8_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, /* Sum up the four partial sums, and put the result into s. */ sumsqr = vec_sums((vector signed int) sum, (vector signed int) zero); sumsqr = vec_splat(sumsqr, 3); + //vec_vsx_st(sumsqr, 0, &s); vec_ste(sumsqr, 0, &s); - return s; } this is just a difference in alignment, why is the alignment difference needed ? and a difference of how data is loaded, which belongs in a seperate macro [...] -- Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB Those who are best at talking, realize last or never when they are wrong.
signature.asc
Description: Digital signature
_______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel