From c6874ecb050d7d539b8306f90d6603167415e1ea Mon Sep 17 00:00:00 2001
From: Rong Yan <rongyan236@gmail.com>
Date: Thu, 27 Nov 2014 05:53:08 +0000
Subject: [PATCH 5/5] libavcodec/ppc/h264qpel.c : fix put_pixels16_l2_altivec()
 avg_pixels16_l2_altivec() add marcos put_unligned_store()
 avg_unligned_store() for POWER LE

---
 libavcodec/ppc/h264qpel.c | 111 ++++++++++++++++++++++------------------------
 1 file changed, 53 insertions(+), 58 deletions(-)

diff --git a/libavcodec/ppc/h264qpel.c b/libavcodec/ppc/h264qpel.c
index 4a01f17..3112333 100644
--- a/libavcodec/ppc/h264qpel.c
+++ b/libavcodec/ppc/h264qpel.c
@@ -191,86 +191,81 @@ static void OPNAME ## h264_qpel ## SIZE ## _mc32_ ## CODETYPE(uint8_t *dst, cons
     OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfV, halfHV, stride, SIZE, SIZE);\
 }\
 
+
+#if HAVE_BIGENDIAN
+#define put_unligned_store(s, dest) {\
+    tmp1 = vec_ld(0, dest);\
+    mask = vec_lvsl(0, dest);\
+    tmp2 = vec_ld(15, dest);\
+    edges = vec_perm(tmp2, tmp1, mask);\
+    align = vec_lvsr(0, dest);\
+    tmp2 = vec_perm(s, edges, align);\
+    tmp1 = vec_perm(edges, s, align);\
+    vec_st(tmp2, 15, dest);\
+    vec_st(tmp1, 0 , dest);\
+}
+#else /* HAVE_BIGENDIAN */
+#define put_unligned_store(s, dest) {\
+    vec_vsx_st(s, 0, dest);\
+}
+#endif /* HAVE_BIGENDIAN */
 static inline void put_pixels16_l2_altivec( uint8_t * dst, const uint8_t * src1,
                                     const uint8_t * src2, int dst_stride,
                                     int src_stride1, int h)
 {
     int i;
-    vec_u8 a, b, d, tmp1, tmp2, mask, mask_, edges, align;
-
+    vec_u8 a, b, d, mask_;
+#if HAVE_BIGENDIAN
+    vec_u8 tmp1, tmp2, mask, edges, align;
     mask_ = vec_lvsl(0, src2);
+#endif
 
     for (i = 0; i < h; i++) {
-
-        tmp1 = vec_ld(i * src_stride1, src1);
-        mask = vec_lvsl(i * src_stride1, src1);
-        tmp2 = vec_ld(i * src_stride1 + 15, src1);
-
-        a = vec_perm(tmp1, tmp2, mask);
-
-        tmp1 = vec_ld(i * 16, src2);
-        tmp2 = vec_ld(i * 16 + 15, src2);
-
-        b = vec_perm(tmp1, tmp2, mask_);
-
-        tmp1 = vec_ld(0, dst);
-        mask = vec_lvsl(0, dst);
-        tmp2 = vec_ld(15, dst);
-
+        a = unaligned_load(i * src_stride1, src1);
+        b = load_with_perm_vec(i * 16, src2, mask_);
         d = vec_avg(a, b);
-
-        edges = vec_perm(tmp2, tmp1, mask);
-
-        align = vec_lvsr(0, dst);
-
-        tmp2 = vec_perm(d, edges, align);
-        tmp1 = vec_perm(edges, d, align);
-
-        vec_st(tmp2, 15, dst);
-        vec_st(tmp1, 0 , dst);
-
+        put_unligned_store(d, dst);
         dst += dst_stride;
     }
 }
 
+#if HAVE_BIGENDIAN
+#define avg_unligned_store(s, dest){\
+    tmp1 = vec_ld(0, dest);\
+    mask = vec_lvsl(0, dest);\
+    tmp2 = vec_ld(15, dest);\
+    a = vec_avg(vec_perm(tmp1, tmp2, mask), s);\
+    edges = vec_perm(tmp2, tmp1, mask);\
+    align = vec_lvsr(0, dest);\
+    tmp2 = vec_perm(a, edges, align);\
+    tmp1 = vec_perm(edges, a, align);\
+    vec_st(tmp2, 15, dest);\
+    vec_st(tmp1, 0 , dest);\
+}
+#else /* HAVE_BIGENDIAN */
+#define avg_unligned_store(s, dest){\
+    a = vec_avg(vec_vsx_ld(0, dst), s);\
+    vec_vsx_st(a, 0, dst);\
+}
+#endif /* HAVE_BIGENDIAN */
+
 static inline void avg_pixels16_l2_altivec( uint8_t * dst, const uint8_t * src1,
                                     const uint8_t * src2, int dst_stride,
                                     int src_stride1, int h)
 {
     int i;
-    vec_u8 a, b, d, tmp1, tmp2, mask, mask_, edges, align;
+    vec_u8 a, b, d, mask_;
 
+#if HAVE_BIGENDIAN
+    vec_u8 tmp1, tmp2, mask, edges, align;
     mask_ = vec_lvsl(0, src2);
+#endif
 
     for (i = 0; i < h; i++) {
-
-        tmp1 = vec_ld(i * src_stride1, src1);
-        mask = vec_lvsl(i * src_stride1, src1);
-        tmp2 = vec_ld(i * src_stride1 + 15, src1);
-
-        a = vec_perm(tmp1, tmp2, mask);
-
-        tmp1 = vec_ld(i * 16, src2);
-        tmp2 = vec_ld(i * 16 + 15, src2);
-
-        b = vec_perm(tmp1, tmp2, mask_);
-
-        tmp1 = vec_ld(0, dst);
-        mask = vec_lvsl(0, dst);
-        tmp2 = vec_ld(15, dst);
-
-        d = vec_avg(vec_perm(tmp1, tmp2, mask), vec_avg(a, b));
-
-        edges = vec_perm(tmp2, tmp1, mask);
-
-        align = vec_lvsr(0, dst);
-
-        tmp2 = vec_perm(d, edges, align);
-        tmp1 = vec_perm(edges, d, align);
-
-        vec_st(tmp2, 15, dst);
-        vec_st(tmp1, 0 , dst);
-
+        a = unaligned_load(i * src_stride1, src1);
+        b = load_with_perm_vec(i * 16, src2, mask_);
+        d = vec_avg(a, b);
+        avg_unligned_store(d, dst);
         dst += dst_stride;
     }
 }
-- 
1.9.1

