This patch adds support for the ISA 3.0 XXPERM instruction, which is like VPERM, except it can operate on any VSX register. Since the instruction is a 3 operand instruction (RT and RA must be the same), I made it so VPERM was preferred. I also added XXPERM fusion support where a XXLOR move instruction immediately before the XXPERM instruction is fused together.
I have bootstrapped and done make check on a big endian power7 and a little endian power8 system. In addition, I built all of Spec 2006 with power9 support enabled, and all of the tests that previously built now build with XXPERM being generated (the OMNETPP benchmark currently does not build on little endian for either power8 or power9). Are these patches ok to check in? [gcc] 2015-12-31 Michael Meissner <meiss...@linux.vnet.ibm.com> * config/rs6000/constraints.md (wo constraint): New constraint for ISA 3.0 (power9). * config/rs6000/rs6000.c (rs6000_debug_reg_global): Add support for wo constraint. (rs6000_init_hard_regno_mode_ok): Likewise. * config/rs6000/rs6000.h (r6000_reg_class_enum): Add support for wo constraint. * config/rs6000/altivec.md (altivec_vperm_<mode>): Clean up vperm expanders not to have constraints. Add support for ISA 3.0 xxperm instruction. Add support for fusing xxlor with xxperm. (altivec_vperm_<mode>_internal): Likewise. (altivec_vperm_v8hiv16qi): Likewise. (altivec_vperm_<mode>v16q): Likewise. (altivec_vperm_<mode>_uns): Likewise. (vperm_v8hiv4si): Likewise. (vperm_v16qiv8hi): Likewise. * doc/md.texi (RS/6000 constraints): Document wo constraint. [gcc/testsuite] 2015-12-31 Michael Meissner <meiss...@linux.vnet.ibm.com> * gcc.target/powerpc/p9-permute.c: New test for xxperm code generation. -- Michael Meissner, IBM IBM, M/S 2506R, 550 King Street, Littleton, MA 01460-6245, USA email: meiss...@linux.vnet.ibm.com, phone: +1 (978) 899-4797
Index: gcc/config/rs6000/constraints.md =================================================================== --- gcc/config/rs6000/constraints.md (revision 232008) +++ gcc/config/rs6000/constraints.md (working copy) @@ -99,7 +99,8 @@ (define_register_constraint "wm" "rs6000 ;; There is a mode_attr that resolves to wm for SDmode and wn for SFmode (define_register_constraint "wn" "NO_REGS" "No register (NO_REGS).") -;; wo is not currently used +(define_register_constraint "wo" "rs6000_constraints[RS6000_CONSTRAINT_wo]" + "VSX register if the -mpower9-vector option was used or NO_REGS.") (define_register_constraint "wp" "rs6000_constraints[RS6000_CONSTRAINT_wp]" "VSX register to use for IEEE 128-bit fp TFmode, or NO_REGS.") Index: gcc/config/rs6000/rs6000.c =================================================================== --- gcc/config/rs6000/rs6000.c (revision 232008) +++ gcc/config/rs6000/rs6000.c (working copy) @@ -2284,6 +2284,7 @@ rs6000_debug_reg_global (void) "wk reg_class = %s\n" "wl reg_class = %s\n" "wm reg_class = %s\n" + "wo reg_class = %s\n" "wp reg_class = %s\n" "wq reg_class = %s\n" "wr reg_class = %s\n" @@ -2311,6 +2312,7 @@ rs6000_debug_reg_global (void) reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wk]], reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wl]], reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wm]], + reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wo]], reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wp]], reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wq]], reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wr]], @@ -3019,7 +3021,11 @@ rs6000_init_hard_regno_mode_ok (bool glo if (TARGET_P9_DFORM) rs6000_constraints[RS6000_CONSTRAINT_wb] = ALTIVEC_REGS; - /* Support for new direct moves. */ + /* Support for ISA 3.0 (power9) vectors. */ + if (TARGET_P9_VECTOR) + rs6000_constraints[RS6000_CONSTRAINT_wo] = VSX_REGS; + + /* Support for new direct moves (ISA 3.0 + 64bit). */ if (TARGET_DIRECT_MOVE_128) rs6000_constraints[RS6000_CONSTRAINT_we] = VSX_REGS; Index: gcc/config/rs6000/rs6000.h =================================================================== --- gcc/config/rs6000/rs6000.h (revision 232008) +++ gcc/config/rs6000/rs6000.h (working copy) @@ -1535,6 +1535,7 @@ enum r6000_reg_class_enum { RS6000_CONSTRAINT_wk, /* FPR/VSX register for DFmode direct moves. */ RS6000_CONSTRAINT_wl, /* FPR register for LFIWAX */ RS6000_CONSTRAINT_wm, /* VSX register for direct move */ + RS6000_CONSTRAINT_wo, /* VSX register for power9 vector. */ RS6000_CONSTRAINT_wp, /* VSX reg for IEEE 128-bit fp TFmode. */ RS6000_CONSTRAINT_wq, /* VSX reg for IEEE 128-bit fp KFmode. */ RS6000_CONSTRAINT_wr, /* GPR register if 64-bit */ Index: gcc/config/rs6000/altivec.md =================================================================== --- gcc/config/rs6000/altivec.md (revision 232008) +++ gcc/config/rs6000/altivec.md (working copy) @@ -1933,10 +1933,10 @@ (define_insn "*altivec_vrfiz" [(set_attr "type" "vecfloat")]) (define_expand "altivec_vperm_<mode>" - [(set (match_operand:VM 0 "register_operand" "=v") - (unspec:VM [(match_operand:VM 1 "register_operand" "v") - (match_operand:VM 2 "register_operand" "v") - (match_operand:V16QI 3 "register_operand" "v")] + [(set (match_operand:VM 0 "register_operand" "") + (unspec:VM [(match_operand:VM 1 "register_operand" "") + (match_operand:VM 2 "register_operand" "") + (match_operand:V16QI 3 "register_operand" "")] UNSPEC_VPERM))] "TARGET_ALTIVEC" { @@ -1947,31 +1947,40 @@ (define_expand "altivec_vperm_<mode>" } }) +;; Slightly prefer vperm, since the target does not overlap the source (define_insn "*altivec_vperm_<mode>_internal" - [(set (match_operand:VM 0 "register_operand" "=v") - (unspec:VM [(match_operand:VM 1 "register_operand" "v") - (match_operand:VM 2 "register_operand" "v") - (match_operand:V16QI 3 "register_operand" "v")] + [(set (match_operand:VM 0 "register_operand" "=v,?wo,?&wo") + (unspec:VM [(match_operand:VM 1 "register_operand" "v,0,wo") + (match_operand:VM 2 "register_operand" "v,wo,wo") + (match_operand:V16QI 3 "register_operand" "v,wo,wo")] UNSPEC_VPERM))] "TARGET_ALTIVEC" - "vperm %0,%1,%2,%3" - [(set_attr "type" "vecperm")]) + "@ + vperm %0,%1,%2,%3 + xxperm %x0,%x2,%x3 + xxlor %x0,%x1,%x1\t\t# xxperm fusion\;xxperm %x0,%x2,%x3" + [(set_attr "type" "vecperm") + (set_attr "length" "4,4,8")]) (define_insn "altivec_vperm_v8hiv16qi" - [(set (match_operand:V16QI 0 "register_operand" "=v") - (unspec:V16QI [(match_operand:V8HI 1 "register_operand" "v") - (match_operand:V8HI 2 "register_operand" "v") - (match_operand:V16QI 3 "register_operand" "v")] + [(set (match_operand:V16QI 0 "register_operand" "=v,?wo,?&wo") + (unspec:V16QI [(match_operand:V8HI 1 "register_operand" "v,0,wo") + (match_operand:V8HI 2 "register_operand" "v,wo,wo") + (match_operand:V16QI 3 "register_operand" "v,wo,wo")] UNSPEC_VPERM))] "TARGET_ALTIVEC" - "vperm %0,%1,%2,%3" - [(set_attr "type" "vecperm")]) + "@ + vperm %0,%1,%2,%3 + xxperm %x0,%x2,%x3 + xxlor %x0,%x1,%x1\t\t# xxperm fusion\;xxperm %x0,%x2,%x3" + [(set_attr "type" "vecperm") + (set_attr "length" "4,4,8")]) (define_expand "altivec_vperm_<mode>_uns" - [(set (match_operand:VM 0 "register_operand" "=v") - (unspec:VM [(match_operand:VM 1 "register_operand" "v") - (match_operand:VM 2 "register_operand" "v") - (match_operand:V16QI 3 "register_operand" "v")] + [(set (match_operand:VM 0 "register_operand" "") + (unspec:VM [(match_operand:VM 1 "register_operand" "") + (match_operand:VM 2 "register_operand" "") + (match_operand:V16QI 3 "register_operand" "")] UNSPEC_VPERM_UNS))] "TARGET_ALTIVEC" { @@ -1983,14 +1992,18 @@ (define_expand "altivec_vperm_<mode>_uns }) (define_insn "*altivec_vperm_<mode>_uns_internal" - [(set (match_operand:VM 0 "register_operand" "=v") - (unspec:VM [(match_operand:VM 1 "register_operand" "v") - (match_operand:VM 2 "register_operand" "v") - (match_operand:V16QI 3 "register_operand" "v")] + [(set (match_operand:VM 0 "register_operand" "=v,?wo,?&wo") + (unspec:VM [(match_operand:VM 1 "register_operand" "v,0,wo") + (match_operand:VM 2 "register_operand" "v,wo,wo") + (match_operand:V16QI 3 "register_operand" "v,wo,wo")] UNSPEC_VPERM_UNS))] "TARGET_ALTIVEC" - "vperm %0,%1,%2,%3" - [(set_attr "type" "vecperm")]) + "@ + vperm %0,%1,%2,%3 + xxperm %x0,%x2,%x3 + xxlor %x0,%x1,%x1\t\t# xxperm fusion\;xxperm %x0,%x2,%x3" + [(set_attr "type" "vecperm") + (set_attr "length" "4,4,8")]) (define_expand "vec_permv16qi" [(set (match_operand:V16QI 0 "register_operand" "") @@ -2778,24 +2791,32 @@ (define_expand "vec_unpacks_lo_<VP_small "") (define_insn "vperm_v8hiv4si" - [(set (match_operand:V4SI 0 "register_operand" "=v") - (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v") - (match_operand:V4SI 2 "register_operand" "v") - (match_operand:V16QI 3 "register_operand" "v")] + [(set (match_operand:V4SI 0 "register_operand" "=v,?wo,?&wo") + (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v,0,wo") + (match_operand:V4SI 2 "register_operand" "v,wo,wo") + (match_operand:V16QI 3 "register_operand" "v,wo,wo")] UNSPEC_VPERMSI))] "TARGET_ALTIVEC" - "vperm %0,%1,%2,%3" - [(set_attr "type" "vecperm")]) + "@ + vperm %0,%1,%2,%3 + xxperm %x0,%x2,%x3 + xxlor %x0,%x1,%x1\t\t# xxperm fusion\;xxperm %x0,%x2,%x3" + [(set_attr "type" "vecperm") + (set_attr "length" "4,4,8")]) (define_insn "vperm_v16qiv8hi" - [(set (match_operand:V8HI 0 "register_operand" "=v") - (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v") - (match_operand:V8HI 2 "register_operand" "v") - (match_operand:V16QI 3 "register_operand" "v")] + [(set (match_operand:V8HI 0 "register_operand" "=v,?wo,?&wo") + (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v,0,wo") + (match_operand:V8HI 2 "register_operand" "v,wo,wo") + (match_operand:V16QI 3 "register_operand" "v,wo,wo")] UNSPEC_VPERMHI))] "TARGET_ALTIVEC" - "vperm %0,%1,%2,%3" - [(set_attr "type" "vecperm")]) + "@ + vperm %0,%1,%2,%3 + xxperm %x0,%x2,%x3 + xxlor %x0,%x1,%x1\t\t# xxperm fusion\;xxperm %x0,%x2,%x3" + [(set_attr "type" "vecperm") + (set_attr "length" "4,4,8")]) (define_expand "vec_unpacku_hi_v16qi" Index: gcc/doc/md.texi =================================================================== --- gcc/doc/md.texi (revision 232008) +++ gcc/doc/md.texi (working copy) @@ -3102,8 +3102,8 @@ Any VSX register if the -mvsx option was When using any of the register constraints (@code{wa}, @code{wd}, @code{wf}, @code{wg}, @code{wh}, @code{wi}, @code{wj}, @code{wk}, -@code{wl}, @code{wm}, @code{wp}, @code{wq}, @code{ws}, @code{wt}, -@code{wu}, @code{wv}, @code{ww}, or @code{wy}) +@code{wl}, @code{wm}, @code{wo}, @code{wp}, @code{wq}, @code{ws}, +@code{wt}, @code{wu}, @code{wv}, @code{ww}, or @code{wy}) that take VSX registers, you must use @code{%x<n>} in the template so that the correct register is used. Otherwise the register number output in the assembly file will be incorrect if an Altivec register @@ -3175,6 +3175,9 @@ VSX register if direct move instructions @item wn No register (NO_REGS). +@item wo +VSX register to use for ISA 3.0 vector instructions, or NO_REGS. + @item wp VSX register to use for IEEE 128-bit floating point TFmode, or NO_REGS. Index: gcc/testsuite/gcc.target/powerpc/p9-permute.c =================================================================== --- gcc/testsuite/gcc.target/powerpc/p9-permute.c (revision 0) +++ gcc/testsuite/gcc.target/powerpc/p9-permute.c (revision 0) @@ -0,0 +1,20 @@ +/* { dg-do compile { target { powerpc64le-*-* } } } */ +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power9" } } */ +/* { dg-options "-mcpu=power9 -O2" } */ + +#include <altivec.h> + +vector long long +permute (vector long long *p, vector long long *q, vector unsigned char mask) +{ + vector long long a = *p; + vector long long b = *q; + + /* Force a, b to be in FPR registers. */ + __asm__ (" # a: %x0, b: %x1" : "+d" (a), "+d" (b)); + + return vec_perm (a, b, mask); +} + +/* { dg-final { scan-assembler "xxperm" } } */ +/* { dg-final { scan-assembler-not "vperm" } } */