me_cmp.c

Michael Niedermayer Fri, 31 Oct 2014 05:28:10 -0700

On Fri, Oct 31, 2014 at 01:03:58PM +0100, Michael Niedermayer wrote:
> On Fri, Oct 31, 2014 at 06:45:51PM +0800, rongyan wrote:
> > Hi,
> > There are 3 patches to fix bugs for POWER8 little endian. I will send 3 
> > patches in 3 different email. This is the first, functions sad8_altivec() 
> > sse8_altivec(), hadamard8_diff8x8_altivec(), hadamard8_diff16x8_altivec() 
> > are fixed.
> >  The fate test result after merge these 3 patches can be found on website 
> > by searching "ibmcrl", also attached in the below to facilitate the review. 
> > The passed test cases change from  1679/2182 to 2202/2235.
> >  
> >  Thanks.
> >   
> >  Rong Yan
> >   ------------------
> >   The world has enough for everyone's need, but not enough for everyone's 
> > greed.
> 
> 
> >  me_cmp.c |  460 
> > +++++++++++++++++++++++++++++++++++++++++++++++++++++++--------
> >  1 file changed, 408 insertions(+), 52 deletions(-)
> > dd8fe6d196e1330633e22dd0395b5c010e5585df  
> > 0001-libavcodec-ppc-me_cmp.c-fix-sad8_altivec-sse8_altive.patch
> > From a1bd28eb6d775e41defca51fc156a23e4d3f16ed Mon Sep 17 00:00:00 2001
> > From: Rong Yan <rongyan...@gmail.com>
> > Date: Fri, 31 Oct 2014 10:18:01 +0000
> > Subject: [PATCH 1/3] libavcodec/ppc/me_cmp.c : fix sad8_altivec()
> >  sse8_altivec()     hadamard8_diff8x8_altivec()    
> >  hadamard8_diff16x8_altivec()     for POWER LE
> > 
> > ---
> >  libavcodec/ppc/me_cmp.c | 460 
> > ++++++++++++++++++++++++++++++++++++++++++------
> >  1 file changed, 408 insertions(+), 52 deletions(-)
> > 
> > diff --git a/libavcodec/ppc/me_cmp.c b/libavcodec/ppc/me_cmp.c
> > index 8ff8193..9759b17 100644
> > --- a/libavcodec/ppc/me_cmp.c
> > +++ b/libavcodec/ppc/me_cmp.c
> > @@ -34,16 +34,18 @@
> >  #include "libavcodec/mpegvideo.h"
> >  #include "libavcodec/me_cmp.h"
> >  
> > +#include <stdio.h>
> >  #if HAVE_ALTIVEC
> > -#if HAVE_VSX
> > +
> > +#if !HAVE_BIGENDIAN
> >  static int sad16_x2_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t 
> > *pix2,
> >                                                          int line_size, int 
> > h)
> >  {
> >      int i, s = 0;
> > -    const vector unsigned char zero =
> > +    const vector unsigned char __attribute__((aligned(16))) zero =
> >          (const vector unsigned char) vec_splat_u8(0);
> > -    vector unsigned int sad = (vector unsigned int) vec_splat_u32(0);
> > -    vector signed int sumdiffs;
> > +    vector unsigned int __attribute__((aligned(16))) sad = (vector 
> > unsigned int) vec_splat_u32(0);
> > +    vector signed int __attribute__((aligned(16))) sumdiffs;
> 
> adding alignment directives should be a sepearte patch
> 
> 
> >  
> >      for (i = 0; i < h; i++) {
> >          /* Read unaligned pixels into our vectors. The vectors are as 
> > follows:
> > @@ -69,10 +71,11 @@ static int sad16_x2_altivec(MpegEncContext *v, uint8_t 
> > *pix1, uint8_t *pix2,
> >      /* Sum up the four partial sums, and put the result into s. */
> >      sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero);
> >      sumdiffs = vec_splat(sumdiffs, 3);
> > -    vec_vsx_st(sumdiffs, 0, &s);
> > +    //vec_vsx_st(sumdiffs, 0, &s);
> > +    vec_ste(sumdiffs, 0, &s);
> >      return s;
> >  }
> > -#else
> > +#else /* else of #if !HAVE_BIGENDIAN */
> >  static int sad16_x2_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t 
> > *pix2,
> >                              int line_size, int h)
> >  {
> > @@ -114,19 +117,19 @@ static int sad16_x2_altivec(MpegEncContext *v, 
> > uint8_t *pix1, uint8_t *pix2,
> >  
> >      return s;
> >  }
> 
> > -#endif /* HAVE_VSX */
> > +#endif /* end of #if !HAVE_BIGENDIAN */
> 
> this is inconsistent with the rest of the codebase
> its also a cosmetic change and cosmetic changes must not be in the
> same patch as functional changes
> 
> 
> >  
> > -#if HAVE_VSX
> > +#if !HAVE_BIGENDIAN
> >  static int sad16_y2_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t 
> > *pix2,
> >                                                          int line_size, int 
> > h)
> >  {
> >    int i, s = 0;
> > -  const vector unsigned char zero =
> > +  const vector unsigned char __attribute__((aligned(16)))zero =
> >      (const vector unsigned char) vec_splat_u8(0);
> > -  vector unsigned char perm = vec_lvsl(0, pix2);
> > -  vector unsigned char pix1v, pix3v, avgv, t5;
> > -  vector unsigned int sad = (vector unsigned int) vec_splat_u32(0);
> > -  vector signed int sumdiffs;
> > +  vector unsigned char __attribute__((aligned(16))) perm = vec_lvsl(0, 
> > pix2);
> > +  vector unsigned char __attribute__((aligned(16))) pix1v, pix3v, avgv, t5;
> > +  vector unsigned int __attribute__((aligned(16))) sad = (vector unsigned 
> > int) vec_splat_u32(0);
> > +  vector signed int __attribute__((aligned(16))) sumdiffs;
> >    uint8_t *pix3 = pix2 + line_size;
> >  
> >    /* Due to the fact that pix3 = pix2 + line_size, the pix3 of one
> > @@ -162,10 +165,11 @@ static int sad16_y2_altivec(MpegEncContext *v, 
> > uint8_t *pix1, uint8_t *pix2,
> >    /* Sum up the four partial sums, and put the result into s. */
> >    sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero);
> >    sumdiffs = vec_splat(sumdiffs, 3);
> > -  vec_vsx_st(sumdiffs, 0, &s);
> > +  //vec_vsx_st(sumdiffs, 0, &s);
> > +  vec_ste(sumdiffs, 0, &s);
> >    return s;
> >  }
> > -#else
> > +#else /* else of #if !HAVE_BIGENDIAN */
> >  static int sad16_y2_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t 
> > *pix2,
> >                              int line_size, int h)
> >  {
> > @@ -219,24 +223,24 @@ static int sad16_y2_altivec(MpegEncContext *v, 
> > uint8_t *pix1, uint8_t *pix2,
> >      vec_ste(sumdiffs, 0, &s);
> >      return s;
> >  }
> > -#endif /* HAVE_VSX */
> > +#endif /* end of #if !HAVE_BIGENDIAN */
> >  
> > -#if HAVE_VSX
> > +#if !HAVE_BIGENDIAN
> >  static int sad16_xy2_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t 
> > *pix2,
> >                               int line_size, int h)
> >  {
> >    int i, s = 0;
> >    uint8_t *pix3 = pix2 + line_size;
> > -  const vector unsigned char zero =
> > +  const vector unsigned char __attribute__((aligned(16))) zero =
> >      (const vector unsigned char) vec_splat_u8(0);
> > -  const vector unsigned short two =
> > +  const vector unsigned short __attribute__((aligned(16))) two =
> >      (const vector unsigned short) vec_splat_u16(2);
> > -  vector unsigned char avgv, t5;
> > -  vector unsigned char pix1v, pix3v, pix3iv;
> > -  vector unsigned short pix3lv, pix3hv, pix3ilv, pix3ihv;
> > -  vector unsigned short avghv, avglv;
> > -  vector unsigned int sad = (vector unsigned int) vec_splat_u32(0);
> > -  vector signed int sumdiffs;
> > +  vector unsigned char __attribute__((aligned(16))) avgv, t5;
> > +  vector unsigned char __attribute__((aligned(16))) pix1v, pix3v, pix3iv;
> > +  vector unsigned short __attribute__((aligned(16))) pix3lv, pix3hv, 
> > pix3ilv, pix3ihv;
> > +  vector unsigned short __attribute__((aligned(16))) avghv, avglv;
> > +  vector unsigned int __attribute__((aligned(16))) sad = (vector unsigned 
> > int) vec_splat_u32(0);
> > +  vector signed int __attribute__((aligned(16))) sumdiffs;
> >  
> >    /* Due to the fact that pix3 = pix2 + line_size, the pix3 of one
> >     * iteration becomes pix2 in the next iteration. We can use this
> > @@ -305,10 +309,11 @@ static int sad16_xy2_altivec(MpegEncContext *v, 
> > uint8_t *pix1, uint8_t *pix2,
> >    /* Sum up the four partial sums, and put the result into s. */
> >    sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero);
> >    sumdiffs = vec_splat(sumdiffs, 3);
> > -  vec_vsx_st(sumdiffs, 0, &s);
> 
> > +  //vec_vsx_st(sumdiffs, 0, &s);
> 
> what purpose does this line have ?
> 
> 
> [...]
> 
> > +#if !HAVE_BIGENDIAN
> > +/* Sum of Squared Errors for an 8x8 block, AltiVec-enhanced.
> > + * It's the sad8_altivec code above w/ squaring added. */
> > +static int sse8_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
> > +                        int line_size, int h)
> [...]
> > +    return s;
> > +}
> > +#else /* else of #if !HAVE_BIGENDIAN */
> >  /* Sum of Squared Errors for an 8x8 block, AltiVec-enhanced.
> >   * It's the sad8_altivec code above w/ squaring added. */
> >  static int sse8_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
> > @@ -617,18 +670,17 @@ static int sse8_altivec(MpegEncContext *v, uint8_t 
> > *pix1, uint8_t *pix2,
> >  
> >      return s;
> >  }
> > +#endif /* end of #if !HAVE_BIGENDIAN */
> 
> Why do you keep sending patches which duplicate code?
> 
> Maybe someone can translate this to a language you understand
> better then english:
> 
> Code duplication is not allowed
> 
> #if A
>  foo
>  this
>  bar
> #else
>  foo
>  that
>  bar
> #endif
> 
> always must be replaced by
> 
>  foo
> #if A
>  this
> #else
>  that
> #endif
>  bar
> 
> 
> also the then remaining difference (this vs that) should be avoided
> as well by using appropriate macros and inline functions
> 
> there is already some duplicated code in there, this was a mistake.
> No more duplicated code should be added.


For reference the difference between the if and else
 /* Sum of Squared Errors for an 8x8 block, AltiVec-enhanced.
  * It's the sad8_altivec code above w/ squaring added. */
 static int sse8_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
                         int line_size, int h)
 {
     int i, s;
-    const vector unsigned int zero =
+    const vector unsigned int __attribute__((aligned(16)))  zero =
         (const vector unsigned int) vec_splat_u32(0);
-    const vector unsigned char permclear =
+    const vector unsigned char __attribute__((aligned(16)))  permclear =
         (vector unsigned char)
         { 255, 255, 255, 255, 255, 255, 255, 255, 0, 0, 0, 0, 0, 0, 0, 0 };
-    vector unsigned char perm1 = vec_lvsl(0, pix1);
-    vector unsigned char perm2 = vec_lvsl(0, pix2);
-    vector unsigned int sum = (vector unsigned int) vec_splat_u32(0);
-    vector signed int sumsqr;
+    vector unsigned int __attribute__((aligned(16)))  sum = (vector unsigned 
int) vec_splat_u32(0);
+    vector signed int __attribute__((aligned(16)))  sumsqr;

     for (i = 0; i < h; i++) {
         /* Read potentially unaligned pixels into t1 and t2.
          * Since we're reading 16 pixels, and actually only want 8,
          * mask out the last 8 pixels. The 0s don't change the sum. */
-        vector unsigned char pix1l = vec_ld(0, pix1);
-        vector unsigned char pix1r = vec_ld(7, pix1);
-        vector unsigned char pix2l = vec_ld(0, pix2);
-        vector unsigned char pix2r = vec_ld(7, pix2);
-        vector unsigned char t1 = vec_and(vec_perm(pix1l, pix1r, perm1),
-                                          permclear);
-        vector unsigned char t2 = vec_and(vec_perm(pix2l, pix2r, perm2),
-                                          permclear);
+        vector unsigned char t1 = vec_and(vec_vsx_ld(0, pix1), permclear);
+        vector unsigned char t2 = vec_and(vec_vsx_ld(0, pix2), permclear);

         /* Since we want to use unsigned chars, we can take advantage
          * of the fact that abs(a - b) ^ 2 = (a - b) ^ 2. */
@@ -613,22 +614,21 @@ static int sse8_altivec(MpegEncContext *v, uint8_t *pix1, 
uint8_t *pix2,
     /* Sum up the four partial sums, and put the result into s. */
     sumsqr = vec_sums((vector signed int) sum, (vector signed int) zero);
     sumsqr = vec_splat(sumsqr, 3);
+    //vec_vsx_st(sumsqr, 0, &s);
     vec_ste(sumsqr, 0, &s);
-
     return s;
 }

this is just a difference in alignment, why is the alignment
difference needed ?

and a difference of how data is loaded, which belongs in a seperate
macro

[...]

-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

Those who are best at talking, realize last or never when they are wrong.

signature.asc
Description: Digital signature

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel

Re: [FFmpeg-devel] [patch 1/3]Fix bug for POWER LE: /libavcodec/ppc/me_cmp.c

Reply via email to