This patch adds support for the ISA 3.0 XXPERM instruction, which is like
VPERM, except it can operate on any VSX register.  Since the instruction is a 3
operand instruction (RT and RA must be the same), I made it so VPERM was
preferred.  I also added XXPERM fusion support where a XXLOR move instruction
immediately before the XXPERM instruction is fused together.

I have bootstrapped and done make check on a big endian power7 and a little
endian power8 system.  In addition, I built all of Spec 2006 with power9
support enabled, and all of the tests that previously built now build with
XXPERM being generated (the OMNETPP benchmark currently does not build on
little endian for either power8 or power9).  Are these patches ok to check in?

[gcc]
2015-12-31  Michael Meissner  <meiss...@linux.vnet.ibm.com>

        * config/rs6000/constraints.md (wo constraint): New constraint for
        ISA 3.0 (power9).

        * config/rs6000/rs6000.c (rs6000_debug_reg_global): Add support
        for wo constraint.
        (rs6000_init_hard_regno_mode_ok): Likewise.

        * config/rs6000/rs6000.h (r6000_reg_class_enum): Add support for
        wo constraint.

        * config/rs6000/altivec.md (altivec_vperm_<mode>): Clean up vperm
        expanders not to have constraints.  Add support for ISA 3.0 xxperm
        instruction.  Add support for fusing xxlor with xxperm.
        (altivec_vperm_<mode>_internal): Likewise.
        (altivec_vperm_v8hiv16qi): Likewise.
        (altivec_vperm_<mode>v16q): Likewise.
        (altivec_vperm_<mode>_uns): Likewise.
        (vperm_v8hiv4si): Likewise.
        (vperm_v16qiv8hi): Likewise.

        * doc/md.texi (RS/6000 constraints): Document wo constraint.

[gcc/testsuite]
2015-12-31  Michael Meissner  <meiss...@linux.vnet.ibm.com>

        * gcc.target/powerpc/p9-permute.c: New test for xxperm code
        generation.

-- 
Michael Meissner, IBM
IBM, M/S 2506R, 550 King Street, Littleton, MA 01460-6245, USA
email: meiss...@linux.vnet.ibm.com, phone: +1 (978) 899-4797
Index: gcc/config/rs6000/constraints.md
===================================================================
--- gcc/config/rs6000/constraints.md    (revision 232008)
+++ gcc/config/rs6000/constraints.md    (working copy)
@@ -99,7 +99,8 @@ (define_register_constraint "wm" "rs6000
 ;; There is a mode_attr that resolves to wm for SDmode and wn for SFmode
 (define_register_constraint "wn" "NO_REGS" "No register (NO_REGS).")
 
-;; wo is not currently used
+(define_register_constraint "wo" "rs6000_constraints[RS6000_CONSTRAINT_wo]"
+  "VSX register if the -mpower9-vector option was used or NO_REGS.")
 
 (define_register_constraint "wp" "rs6000_constraints[RS6000_CONSTRAINT_wp]"
   "VSX register to use for IEEE 128-bit fp TFmode, or NO_REGS.")
Index: gcc/config/rs6000/rs6000.c
===================================================================
--- gcc/config/rs6000/rs6000.c  (revision 232008)
+++ gcc/config/rs6000/rs6000.c  (working copy)
@@ -2284,6 +2284,7 @@ rs6000_debug_reg_global (void)
           "wk reg_class = %s\n"
           "wl reg_class = %s\n"
           "wm reg_class = %s\n"
+          "wo reg_class = %s\n"
           "wp reg_class = %s\n"
           "wq reg_class = %s\n"
           "wr reg_class = %s\n"
@@ -2311,6 +2312,7 @@ rs6000_debug_reg_global (void)
           reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wk]],
           reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wl]],
           reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wm]],
+          reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wo]],
           reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wp]],
           reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wq]],
           reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wr]],
@@ -3019,7 +3021,11 @@ rs6000_init_hard_regno_mode_ok (bool glo
   if (TARGET_P9_DFORM)
     rs6000_constraints[RS6000_CONSTRAINT_wb] = ALTIVEC_REGS;
 
-  /* Support for new direct moves.  */
+  /* Support for ISA 3.0 (power9) vectors.  */
+  if (TARGET_P9_VECTOR)
+    rs6000_constraints[RS6000_CONSTRAINT_wo] = VSX_REGS;
+
+  /* Support for new direct moves (ISA 3.0 + 64bit).  */
   if (TARGET_DIRECT_MOVE_128)
     rs6000_constraints[RS6000_CONSTRAINT_we] = VSX_REGS;
 
Index: gcc/config/rs6000/rs6000.h
===================================================================
--- gcc/config/rs6000/rs6000.h  (revision 232008)
+++ gcc/config/rs6000/rs6000.h  (working copy)
@@ -1535,6 +1535,7 @@ enum r6000_reg_class_enum {
   RS6000_CONSTRAINT_wk,                /* FPR/VSX register for DFmode direct 
moves. */
   RS6000_CONSTRAINT_wl,                /* FPR register for LFIWAX */
   RS6000_CONSTRAINT_wm,                /* VSX register for direct move */
+  RS6000_CONSTRAINT_wo,                /* VSX register for power9 vector.  */
   RS6000_CONSTRAINT_wp,                /* VSX reg for IEEE 128-bit fp TFmode. 
*/
   RS6000_CONSTRAINT_wq,                /* VSX reg for IEEE 128-bit fp KFmode.  
*/
   RS6000_CONSTRAINT_wr,                /* GPR register if 64-bit  */
Index: gcc/config/rs6000/altivec.md
===================================================================
--- gcc/config/rs6000/altivec.md        (revision 232008)
+++ gcc/config/rs6000/altivec.md        (working copy)
@@ -1933,10 +1933,10 @@ (define_insn "*altivec_vrfiz"
   [(set_attr "type" "vecfloat")])
 
 (define_expand "altivec_vperm_<mode>"
-  [(set (match_operand:VM 0 "register_operand" "=v")
-       (unspec:VM [(match_operand:VM 1 "register_operand" "v")
-                   (match_operand:VM 2 "register_operand" "v")
-                   (match_operand:V16QI 3 "register_operand" "v")]
+  [(set (match_operand:VM 0 "register_operand" "")
+       (unspec:VM [(match_operand:VM 1 "register_operand" "")
+                   (match_operand:VM 2 "register_operand" "")
+                   (match_operand:V16QI 3 "register_operand" "")]
                   UNSPEC_VPERM))]
   "TARGET_ALTIVEC"
 {
@@ -1947,31 +1947,40 @@ (define_expand "altivec_vperm_<mode>"
     }
 })
 
+;; Slightly prefer vperm, since the target does not overlap the source
 (define_insn "*altivec_vperm_<mode>_internal"
-  [(set (match_operand:VM 0 "register_operand" "=v")
-       (unspec:VM [(match_operand:VM 1 "register_operand" "v")
-                   (match_operand:VM 2 "register_operand" "v")
-                   (match_operand:V16QI 3 "register_operand" "v")]
+  [(set (match_operand:VM 0 "register_operand" "=v,?wo,?&wo")
+       (unspec:VM [(match_operand:VM 1 "register_operand" "v,0,wo")
+                   (match_operand:VM 2 "register_operand" "v,wo,wo")
+                   (match_operand:V16QI 3 "register_operand" "v,wo,wo")]
                   UNSPEC_VPERM))]
   "TARGET_ALTIVEC"
-  "vperm %0,%1,%2,%3"
-  [(set_attr "type" "vecperm")])
+  "@
+   vperm %0,%1,%2,%3
+   xxperm %x0,%x2,%x3
+   xxlor %x0,%x1,%x1\t\t# xxperm fusion\;xxperm %x0,%x2,%x3"
+  [(set_attr "type" "vecperm")
+   (set_attr "length" "4,4,8")])
 
 (define_insn "altivec_vperm_v8hiv16qi"
-  [(set (match_operand:V16QI 0 "register_operand" "=v")
-       (unspec:V16QI [(match_operand:V8HI 1 "register_operand" "v")
-                      (match_operand:V8HI 2 "register_operand" "v")
-                      (match_operand:V16QI 3 "register_operand" "v")]
+  [(set (match_operand:V16QI 0 "register_operand" "=v,?wo,?&wo")
+       (unspec:V16QI [(match_operand:V8HI 1 "register_operand" "v,0,wo")
+                      (match_operand:V8HI 2 "register_operand" "v,wo,wo")
+                      (match_operand:V16QI 3 "register_operand" "v,wo,wo")]
                   UNSPEC_VPERM))]
   "TARGET_ALTIVEC"
-  "vperm %0,%1,%2,%3"
-  [(set_attr "type" "vecperm")])
+  "@
+   vperm %0,%1,%2,%3
+   xxperm %x0,%x2,%x3
+   xxlor %x0,%x1,%x1\t\t# xxperm fusion\;xxperm %x0,%x2,%x3"
+  [(set_attr "type" "vecperm")
+   (set_attr "length" "4,4,8")])
 
 (define_expand "altivec_vperm_<mode>_uns"
-  [(set (match_operand:VM 0 "register_operand" "=v")
-       (unspec:VM [(match_operand:VM 1 "register_operand" "v")
-                   (match_operand:VM 2 "register_operand" "v")
-                   (match_operand:V16QI 3 "register_operand" "v")]
+  [(set (match_operand:VM 0 "register_operand" "")
+       (unspec:VM [(match_operand:VM 1 "register_operand" "")
+                   (match_operand:VM 2 "register_operand" "")
+                   (match_operand:V16QI 3 "register_operand" "")]
                   UNSPEC_VPERM_UNS))]
   "TARGET_ALTIVEC"
 {
@@ -1983,14 +1992,18 @@ (define_expand "altivec_vperm_<mode>_uns
 })
 
 (define_insn "*altivec_vperm_<mode>_uns_internal"
-  [(set (match_operand:VM 0 "register_operand" "=v")
-       (unspec:VM [(match_operand:VM 1 "register_operand" "v")
-                   (match_operand:VM 2 "register_operand" "v")
-                   (match_operand:V16QI 3 "register_operand" "v")]
+  [(set (match_operand:VM 0 "register_operand" "=v,?wo,?&wo")
+       (unspec:VM [(match_operand:VM 1 "register_operand" "v,0,wo")
+                   (match_operand:VM 2 "register_operand" "v,wo,wo")
+                   (match_operand:V16QI 3 "register_operand" "v,wo,wo")]
                   UNSPEC_VPERM_UNS))]
   "TARGET_ALTIVEC"
-  "vperm %0,%1,%2,%3"
-  [(set_attr "type" "vecperm")])
+  "@
+   vperm %0,%1,%2,%3
+   xxperm %x0,%x2,%x3
+   xxlor %x0,%x1,%x1\t\t# xxperm fusion\;xxperm %x0,%x2,%x3"
+  [(set_attr "type" "vecperm")
+   (set_attr "length" "4,4,8")])
 
 (define_expand "vec_permv16qi"
   [(set (match_operand:V16QI 0 "register_operand" "")
@@ -2778,24 +2791,32 @@ (define_expand "vec_unpacks_lo_<VP_small
   "")
 
 (define_insn "vperm_v8hiv4si"
-  [(set (match_operand:V4SI 0 "register_operand" "=v")
-        (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")
-                   (match_operand:V4SI 2 "register_operand" "v")
-                   (match_operand:V16QI 3 "register_operand" "v")]
+  [(set (match_operand:V4SI 0 "register_operand" "=v,?wo,?&wo")
+        (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v,0,wo")
+                     (match_operand:V4SI 2 "register_operand" "v,wo,wo")
+                     (match_operand:V16QI 3 "register_operand" "v,wo,wo")]
                   UNSPEC_VPERMSI))]
   "TARGET_ALTIVEC"
-  "vperm %0,%1,%2,%3"
-  [(set_attr "type" "vecperm")])
+  "@
+   vperm %0,%1,%2,%3
+   xxperm %x0,%x2,%x3
+   xxlor %x0,%x1,%x1\t\t# xxperm fusion\;xxperm %x0,%x2,%x3"
+  [(set_attr "type" "vecperm")
+   (set_attr "length" "4,4,8")])
 
 (define_insn "vperm_v16qiv8hi"
-  [(set (match_operand:V8HI 0 "register_operand" "=v")
-        (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v")
-                   (match_operand:V8HI 2 "register_operand" "v")
-                   (match_operand:V16QI 3 "register_operand" "v")]
+  [(set (match_operand:V8HI 0 "register_operand" "=v,?wo,?&wo")
+        (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v,0,wo")
+                     (match_operand:V8HI 2 "register_operand" "v,wo,wo")
+                     (match_operand:V16QI 3 "register_operand" "v,wo,wo")]
                   UNSPEC_VPERMHI))]
   "TARGET_ALTIVEC"
-  "vperm %0,%1,%2,%3"
-  [(set_attr "type" "vecperm")])
+  "@
+   vperm %0,%1,%2,%3
+   xxperm %x0,%x2,%x3
+   xxlor %x0,%x1,%x1\t\t# xxperm fusion\;xxperm %x0,%x2,%x3"
+  [(set_attr "type" "vecperm")
+   (set_attr "length" "4,4,8")])
 
 
 (define_expand "vec_unpacku_hi_v16qi"
Index: gcc/doc/md.texi
===================================================================
--- gcc/doc/md.texi     (revision 232008)
+++ gcc/doc/md.texi     (working copy)
@@ -3102,8 +3102,8 @@ Any VSX register if the -mvsx option was
 
 When using any of the register constraints (@code{wa}, @code{wd},
 @code{wf}, @code{wg}, @code{wh}, @code{wi}, @code{wj}, @code{wk},
-@code{wl}, @code{wm}, @code{wp}, @code{wq}, @code{ws}, @code{wt},
-@code{wu}, @code{wv}, @code{ww}, or @code{wy})
+@code{wl}, @code{wm}, @code{wo}, @code{wp}, @code{wq}, @code{ws},
+@code{wt}, @code{wu}, @code{wv}, @code{ww}, or @code{wy})
 that take VSX registers, you must use @code{%x<n>} in the template so
 that the correct register is used.  Otherwise the register number
 output in the assembly file will be incorrect if an Altivec register
@@ -3175,6 +3175,9 @@ VSX register if direct move instructions
 @item wn
 No register (NO_REGS).
 
+@item wo
+VSX register to use for ISA 3.0 vector instructions, or NO_REGS.
+
 @item wp
 VSX register to use for IEEE 128-bit floating point TFmode, or NO_REGS.
 
Index: gcc/testsuite/gcc.target/powerpc/p9-permute.c
===================================================================
--- gcc/testsuite/gcc.target/powerpc/p9-permute.c       (revision 0)
+++ gcc/testsuite/gcc.target/powerpc/p9-permute.c       (revision 0)
@@ -0,0 +1,20 @@
+/* { dg-do compile { target { powerpc64le-*-* } } } */
+/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { 
"-mcpu=power9" } } */
+/* { dg-options "-mcpu=power9 -O2" } */
+
+#include <altivec.h>
+
+vector long long
+permute (vector long long *p, vector long long *q, vector unsigned char mask)
+{
+  vector long long a = *p;
+  vector long long b = *q;
+
+  /* Force a, b to be in FPR registers.  */
+  __asm__ (" # a: %x0, b: %x1" : "+d" (a), "+d" (b));
+
+  return vec_perm (a, b, mask);
+}
+
+/* { dg-final { scan-assembler    "xxperm" } } */
+/* { dg-final { scan-assembler-not "vperm"  } } */

Reply via email to