[PATCH, rs6000] Correct programmer access to vperm for little endian

Bill Schmidt Fri, 15 Nov 2013 09:45:51 -0800

Hi,

A previous patch of mine was misguided.  It modified the altivec_vperm_*
patterns to use the little endian conversion trick of reversing the
input operands and complementing the permute control vector.


Looking at the Altivec manual, we really can't do this.  These patterns
need to be direct pass-throughs to the vperm instruction, as shown in
Figure 4-95 on page 130 of
http://www.freescale.com/files/32bit/doc/ref_manual/ALTIVECPIM.pdf.
Section 4.2 on page 49 confirms that big-endian byte ordering is to be
used with the Altivec instruction descriptions.

This patch reverts that specific change, cleans up some associated
commentary in another part, and modifies the one test case affected by
the change.  gcc.dg/vmx/3b-15.c performs the argument reversal and
control vector complementing in the source code, as all users will need
to do when porting code containing vec_perm calls to little endian.

Bootstrapped and tested on powerpc64{,le}-unknown-linux-gnu with no
regressions.  Is this ok for trunk?

Thanks,
Bill


gcc:

2013-11-15  Bill Schmidt  <wschm...@linux.vnet.ibm.com>

        * config/rs6000/altivec.md (UNSPEC_VPERM_X, UNSPEC_VPERM_UNS_X):
        Remove.
        (altivec_vperm_<mode>): Revert earlier little endian change.
        (*altivec_vperm_<mode>_internal): Remove.
        (altivec_vperm_<mode>_uns): Revert earlier little endian change.
        (*altivec_vperm_<mode>_uns_internal): Remove.
        * config/rs6000/vector.md (vec_realign_load_<mode>): Revise
        commentary.

gcc/testsuite:

2013-11-15  Bill Schmidt  <wschm...@linux.vnet.ibm.com>

        * gcc.dg/vmx/3b-15.c: Revise for little endian.


Index: gcc/testsuite/gcc.dg/vmx/3b-15.c
===================================================================
--- gcc/testsuite/gcc.dg/vmx/3b-15.c    (revision 204792)
+++ gcc/testsuite/gcc.dg/vmx/3b-15.c    (working copy)
@@ -3,7 +3,11 @@
 vector unsigned char
 f (vector unsigned char a, vector unsigned char b, vector unsigned char c)
 {
+#ifdef __BIG_ENDIAN__
   return vec_perm(a,b,c); 
+#else
+  return vec_perm(b,a,c);
+#endif
 }
 
 static void test()
@@ -12,8 +16,13 @@ static void test()
                                            8,9,10,11,12,13,14,15}),
                     ((vector unsigned char){70,71,72,73,74,75,76,77,
                                            78,79,80,81,82,83,84,85}),
+#ifdef __BIG_ENDIAN__
                     ((vector unsigned 
char){0x1,0x14,0x18,0x10,0x16,0x15,0x19,0x1a,
                                            
0x1c,0x1c,0x1c,0x12,0x8,0x1d,0x1b,0xe})),
+#else
+                     ((vector unsigned char){0x1e,0xb,0x7,0xf,0x9,0xa,0x6,0x5,
+                                            
0x3,0x3,0x3,0xd,0x17,0x2,0x4,0x11})),
+#endif
                   ((vector unsigned 
char){1,74,78,70,76,75,79,80,82,82,82,72,8,83,81,14})),
        "f");
 }
Index: gcc/config/rs6000/vector.md
===================================================================
--- gcc/config/rs6000/vector.md (revision 204792)
+++ gcc/config/rs6000/vector.md (working copy)
@@ -966,8 +966,8 @@
                                         operands[2], operands[3]));
   else
     {
-      /* Avoid the "subtract from splat31" workaround for vperm since
-         we have changed lvsr to lvsl instead.  */
+      /* We have changed lvsr to lvsl, so to complete the transformation
+         of vperm for LE, we must swap the inputs.  */
       rtx unspec = gen_rtx_UNSPEC (<MODE>mode,
                                    gen_rtvec (3, operands[2],
                                               operands[1], operands[3]),
Index: gcc/config/rs6000/altivec.md
===================================================================
--- gcc/config/rs6000/altivec.md        (revision 204792)
+++ gcc/config/rs6000/altivec.md        (working copy)
@@ -59,8 +59,6 @@
    UNSPEC_VSUMSWS
    UNSPEC_VPERM
    UNSPEC_VPERM_UNS
-   UNSPEC_VPERM_X
-   UNSPEC_VPERM_UNS_X
    UNSPEC_VRFIN
    UNSPEC_VCFUX
    UNSPEC_VCFSX
@@ -1393,91 +1391,21 @@
   "vrfiz %0,%1"
   [(set_attr "type" "vecfloat")])
 
-(define_insn_and_split "altivec_vperm_<mode>"
+(define_insn "altivec_vperm_<mode>"
   [(set (match_operand:VM 0 "register_operand" "=v")
        (unspec:VM [(match_operand:VM 1 "register_operand" "v")
                    (match_operand:VM 2 "register_operand" "v")
                    (match_operand:V16QI 3 "register_operand" "v")]
-                  UNSPEC_VPERM_X))]
-  "TARGET_ALTIVEC"
-  "#"
-  "!reload_in_progress && !reload_completed"
-  [(set (match_dup 0) (match_dup 4))]
-{
-  if (BYTES_BIG_ENDIAN)
-    operands[4] = gen_rtx_UNSPEC (<MODE>mode,
-                                  gen_rtvec (3, operands[1],
-                                            operands[2], operands[3]),
-                                  UNSPEC_VPERM);
-  else
-    {
-      /* We want to subtract from 31, but we can't vspltisb 31 since
-         it's out of range.  -1 works as well because only the low-order
-         five bits of the permute control vector elements are used.  */
-      rtx splat = gen_rtx_VEC_DUPLICATE (V16QImode,
-                                         gen_rtx_CONST_INT (QImode, -1));
-      rtx tmp = gen_reg_rtx (V16QImode);
-      emit_move_insn (tmp, splat);
-      rtx sel = gen_rtx_MINUS (V16QImode, tmp, operands[3]);
-      emit_move_insn (tmp, sel);
-      operands[4] = gen_rtx_UNSPEC (<MODE>mode,
-                                    gen_rtvec (3, operands[2],
-                                              operands[1], tmp),
-                                   UNSPEC_VPERM);
-    }
-}
-  [(set_attr "type" "vecperm")])
-
-(define_insn "*altivec_vperm_<mode>_internal"
-  [(set (match_operand:VM 0 "register_operand" "=v")
-       (unspec:VM [(match_operand:VM 1 "register_operand" "v")
-                   (match_operand:VM 2 "register_operand" "v")
-                   (match_operand:V16QI 3 "register_operand" "+v")]
                   UNSPEC_VPERM))]
   "TARGET_ALTIVEC"
   "vperm %0,%1,%2,%3"
   [(set_attr "type" "vecperm")])
 
-(define_insn_and_split "altivec_vperm_<mode>_uns"
+(define_insn "altivec_vperm_<mode>_uns"
   [(set (match_operand:VM 0 "register_operand" "=v")
        (unspec:VM [(match_operand:VM 1 "register_operand" "v")
                    (match_operand:VM 2 "register_operand" "v")
                    (match_operand:V16QI 3 "register_operand" "v")]
-                  UNSPEC_VPERM_UNS_X))]
-  "TARGET_ALTIVEC"
-  "#"
-  "!reload_in_progress && !reload_completed"
-  [(set (match_dup 0) (match_dup 4))]
-{
-  if (BYTES_BIG_ENDIAN)
-    operands[4] = gen_rtx_UNSPEC (<MODE>mode,
-                                  gen_rtvec (3, operands[1],
-                                            operands[2], operands[3]),
-                                  UNSPEC_VPERM_UNS);
-  else
-    {
-      /* We want to subtract from 31, but we can't vspltisb 31 since
-         it's out of range.  -1 works as well because only the low-order
-         five bits of the permute control vector elements are used.  */
-      rtx splat = gen_rtx_VEC_DUPLICATE (V16QImode,
-                                         gen_rtx_CONST_INT (QImode, -1));
-      rtx tmp = gen_reg_rtx (V16QImode);
-      emit_move_insn (tmp, splat);
-      rtx sel = gen_rtx_MINUS (V16QImode, tmp, operands[3]);
-      emit_move_insn (tmp, sel);
-      operands[4] = gen_rtx_UNSPEC (<MODE>mode,
-                                    gen_rtvec (3, operands[2],
-                                              operands[1], tmp),
-                                   UNSPEC_VPERM_UNS);
-    }
-}
-  [(set_attr "type" "vecperm")])
-
-(define_insn "*altivec_vperm_<mode>_uns_internal"
-  [(set (match_operand:VM 0 "register_operand" "=v")
-       (unspec:VM [(match_operand:VM 1 "register_operand" "v")
-                   (match_operand:VM 2 "register_operand" "v")
-                   (match_operand:V16QI 3 "register_operand" "+v")]
                   UNSPEC_VPERM_UNS))]
   "TARGET_ALTIVEC"
   "vperm %0,%1,%2,%3"

[PATCH, rs6000] Correct programmer access to vperm for little endian

Reply via email to