This patch adds support for two new Power9 instructions, xxpermr and vpermr, providing more efficient vector permutation operations on little-endian configurations. These new instructions are described in the Power ISA 3.0 document. Selection of the new instructions is conditioned upon TARGET_P9_VECTOR and !VECTOR_ELT_ORDER_BIG.
The patch has bootstrapped and tested on powerpc64le-unknown-linux-gnu and powerpc64-unknown-linux-gnu with no regressions. Is this ok for GCC 7 when stage 1 opens?
Thanks. -- Kelvin Nilsen, Ph.D. kdnil...@linux.vnet.ibm.com home office: 801-756-4821, cell: 520-991-6727 IBM Linux Technology Center - PPC Toolchain
gcc/ChangeLog: 2016-03-07 Kelvin Nilsen <kel...@gcc.gnu.org> * config/rs6000/rs6000.c (rs6000_expand_vector_set): If !BYTES_BIG_ENDIAN and TARGET_P9_VECTOR, expand using template that translates into new xxpermr or vpermr instructions. (altivec_expand_vec_perm_le): If TARGET_P9_VECTOR, expand using template that translates into new xxpermr or vpermr instructions. * config/rs6000/altivec.md: (UNSPEC_VPERMR): New unspec constant. (*altivec_vpermr_<mode>_internal): New insn. gcc/testsuite/ChangeLog: 2016-03-07 Kelvin Nilsen <kel...@gcc.gnu.org> * gcc.target/powerpc/p9-permute.c: Generalize test to run on big-endian Power9 in addition to little-endian Power9. * gcc.target/powerpc/p9-vpermr.c: New test.
Index: gcc/config/rs6000/rs6000.c =================================================================== --- gcc/config/rs6000/rs6000.c (revision 233539) +++ gcc/config/rs6000/rs6000.c (working copy) @@ -6553,19 +6553,27 @@ rs6000_expand_vector_set (rtx target, rtx val, int UNSPEC_VPERM); else { - /* Invert selector. We prefer to generate VNAND on P8 so - that future fusion opportunities can kick in, but must - generate VNOR elsewhere. */ - rtx notx = gen_rtx_NOT (V16QImode, force_reg (V16QImode, x)); - rtx iorx = (TARGET_P8_VECTOR - ? gen_rtx_IOR (V16QImode, notx, notx) - : gen_rtx_AND (V16QImode, notx, notx)); - rtx tmp = gen_reg_rtx (V16QImode); - emit_insn (gen_rtx_SET (tmp, iorx)); - - /* Permute with operands reversed and adjusted selector. */ - x = gen_rtx_UNSPEC (mode, gen_rtvec (3, reg, target, tmp), - UNSPEC_VPERM); + if (TARGET_P9_VECTOR) + x = gen_rtx_UNSPEC (mode, + gen_rtvec (3, target, reg, + force_reg (V16QImode, x)), + UNSPEC_VPERMR); + else + { + /* Invert selector. We prefer to generate VNAND on P8 so + that future fusion opportunities can kick in, but must + generate VNOR elsewhere. */ + rtx notx = gen_rtx_NOT (V16QImode, force_reg (V16QImode, x)); + rtx iorx = (TARGET_P8_VECTOR + ? gen_rtx_IOR (V16QImode, notx, notx) + : gen_rtx_AND (V16QImode, notx, notx)); + rtx tmp = gen_reg_rtx (V16QImode); + emit_insn (gen_rtx_SET (tmp, iorx)); + + /* Permute with operands reversed and adjusted selector. */ + x = gen_rtx_UNSPEC (mode, gen_rtvec (3, reg, target, tmp), + UNSPEC_VPERM); + } } emit_insn (gen_rtx_SET (target, x)); @@ -33421,18 +33429,26 @@ altivec_expand_vec_perm_le (rtx operands[4]) if (!REG_P (target)) tmp = gen_reg_rtx (mode); - /* Invert the selector with a VNAND if available, else a VNOR. - The VNAND is preferred for future fusion opportunities. */ - notx = gen_rtx_NOT (V16QImode, sel); - iorx = (TARGET_P8_VECTOR - ? gen_rtx_IOR (V16QImode, notx, notx) - : gen_rtx_AND (V16QImode, notx, notx)); - emit_insn (gen_rtx_SET (norreg, iorx)); + if (TARGET_P9_VECTOR) + { + unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op0, op1, sel), + UNSPEC_VPERMR); + } + else + { + /* Invert the selector with a VNAND if available, else a VNOR. + The VNAND is preferred for future fusion opportunities. */ + notx = gen_rtx_NOT (V16QImode, sel); + iorx = (TARGET_P8_VECTOR + ? gen_rtx_IOR (V16QImode, notx, notx) + : gen_rtx_AND (V16QImode, notx, notx)); + emit_insn (gen_rtx_SET (norreg, iorx)); + + /* Permute with operands reversed and adjusted selector. */ + unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, norreg), + UNSPEC_VPERM); + } - /* Permute with operands reversed and adjusted selector. */ - unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, norreg), - UNSPEC_VPERM); - /* Copy into target, possibly by way of a register. */ if (!REG_P (target)) { Index: gcc/config/rs6000/altivec.md =================================================================== --- gcc/config/rs6000/altivec.md (revision 233539) +++ gcc/config/rs6000/altivec.md (working copy) @@ -58,6 +58,7 @@ UNSPEC_VSUM2SWS UNSPEC_VSUMSWS UNSPEC_VPERM + UNSPEC_VPERMR UNSPEC_VPERM_UNS UNSPEC_VRFIN UNSPEC_VCFUX @@ -1962,6 +1963,20 @@ [(set_attr "type" "vecperm") (set_attr "length" "4,4,8")]) +(define_insn "*altivec_vpermr_<mode>_internal" + [(set (match_operand:VM 0 "register_operand" "=v,?wo,?&wo") + (unspec:VM [(match_operand:VM 1 "register_operand" "v,0,wo") + (match_operand:VM 2 "register_operand" "v,wo,wo") + (match_operand:V16QI 3 "register_operand" "v,wo,wo")] + UNSPEC_VPERMR))] + "TARGET_P9_VECTOR" + "@ + vpermr %0,%1,%2,%3 + xxpermr %x0,%x2,%x3 + xxlor %x0,%x1,%x1\t\t# xxpermr fusion\;xxpermr %x0,%x2,%x3" + [(set_attr "type" "vecperm") + (set_attr "length" "4,4,8")]) + (define_insn "altivec_vperm_v8hiv16qi" [(set (match_operand:V16QI 0 "register_operand" "=v,?wo,?&wo") (unspec:V16QI [(match_operand:V8HI 1 "register_operand" "v,0,wo") Index: gcc/testsuite/gcc.target/powerpc/p9-permute.c =================================================================== --- gcc/testsuite/gcc.target/powerpc/p9-permute.c (revision 233539) +++ gcc/testsuite/gcc.target/powerpc/p9-permute.c (working copy) @@ -1,4 +1,4 @@ -/* { dg-do compile { target { powerpc64le-*-* } } } */ +/* { dg-do compile { target { powerpc64*-*-* } } } */ /* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power9" } } */ /* { dg-options "-mcpu=power9 -O2" } */ @@ -16,5 +16,6 @@ permute (vector long long *p, vector long long *q, return vec_perm (a, b, mask); } +/* expect xxpermr on little-endian, xxperm on big-endian */ /* { dg-final { scan-assembler "xxperm" } } */ /* { dg-final { scan-assembler-not "vperm" } } */ Index: gcc/testsuite/gcc.target/powerpc/p9-vpermr.c =================================================================== --- gcc/testsuite/gcc.target/powerpc/p9-vpermr.c (revision 0) +++ gcc/testsuite/gcc.target/powerpc/p9-vpermr.c (revision 234051) @@ -0,0 +1,19 @@ +/* { dg-do compile { target { powerpc64le-*-* } } } */ +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power9" } } */ +/* { dg-options "-mcpu=power9 -O2" } */ + +#include <altivec.h> + +vector long long +permute (vector long long *p, vector long long *q, vector unsigned char mask) +{ + vector long long a = *p; + vector long long b = *q; + + /* Force a, b to be in altivec registers to select vpermr insn. */ + __asm__ (" # a: %x0, b: %x1" : "+v" (a), "+v" (b)); + + return vec_perm (a, b, mask); +} + +/* { dg-final { scan-assembler "vpermr" } } */