Power10: Add BRH, BRW, BRD support. The power10 processor adds 3 new instructions (BRH, BRW, BRD) that byte swaps half-words, words, and double-words within a GPR register. This patch adds support for these instructions. I have applied the suggestions from the previous times I have submitted this patch. I have done bootstrap builds on a Linux power8 system. I have run the regression tests, and there were no regressions, and the 3 new tests pass. Can I check this into the master branch?
gcc/ 2020-08-04 Michael Meissner <meiss...@linux.ibm.com> * config/rs6000/rs6000.md (bswaphi2_reg): Generate the BRH instruction on ISA 3.1. (bswapsi2_reg): Generate the BRW instruction on ISA 3.1. (bswapdi2): Rename bswapdi2_xxbrd to bswapdi2_brd. (bswapdi2_brd): Rename from bswapdi2_xxbrd. Generate the BRD instruction on ISA 3.1. gcc/testsuite/ 2020-08-04 Michael Meissner <meiss...@linux.ibm.com> * gcc.target/powerpc/bswap-brd.c: New test. * gcc.target/powerpc/bswap-brw.c: New test. * gcc.target/powerpc/bswap-brh.c: New test. --- gcc/config/rs6000/rs6000.md | 44 +++++++++++++++------------- gcc/testsuite/gcc.target/powerpc/bswap-brd.c | 23 +++++++++++++++ gcc/testsuite/gcc.target/powerpc/bswap-brh.c | 11 +++++++ gcc/testsuite/gcc.target/powerpc/bswap-brw.c | 22 ++++++++++++++ 4 files changed, 80 insertions(+), 20 deletions(-) create mode 100644 gcc/testsuite/gcc.target/powerpc/bswap-brd.c create mode 100644 gcc/testsuite/gcc.target/powerpc/bswap-brh.c create mode 100644 gcc/testsuite/gcc.target/powerpc/bswap-brw.c diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index 48f1f1c..43b620a 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -2591,15 +2591,16 @@ (define_insn "bswap<mode>2_store" [(set_attr "type" "store")]) (define_insn_and_split "bswaphi2_reg" - [(set (match_operand:HI 0 "gpc_reg_operand" "=&r,wa") + [(set (match_operand:HI 0 "gpc_reg_operand" "=r,&r,wa") (bswap:HI - (match_operand:HI 1 "gpc_reg_operand" "r,wa"))) - (clobber (match_scratch:SI 2 "=&r,X"))] + (match_operand:HI 1 "gpc_reg_operand" "r,r,wa"))) + (clobber (match_scratch:SI 2 "=X,&r,X"))] "" "@ + brh %0,%1 # xxbrh %x0,%x1" - "reload_completed && int_reg_operand (operands[0], HImode)" + "reload_completed && !TARGET_POWER10 && int_reg_operand (operands[0], HImode)" [(set (match_dup 3) (and:SI (lshiftrt:SI (match_dup 4) (const_int 8)) @@ -2615,21 +2616,22 @@ (define_insn_and_split "bswaphi2_reg" operands[3] = simplify_gen_subreg (SImode, operands[0], HImode, 0); operands[4] = simplify_gen_subreg (SImode, operands[1], HImode, 0); } - [(set_attr "length" "12,4") - (set_attr "type" "*,vecperm") - (set_attr "isa" "*,p9v")]) + [(set_attr "length" "*,12,*") + (set_attr "type" "shift,*,vecperm") + (set_attr "isa" "p10,*,p9v")]) ;; We are always BITS_BIG_ENDIAN, so the bit positions below in ;; zero_extract insns do not change for -mlittle. (define_insn_and_split "bswapsi2_reg" - [(set (match_operand:SI 0 "gpc_reg_operand" "=&r,wa") + [(set (match_operand:SI 0 "gpc_reg_operand" "=r,&r,wa") (bswap:SI - (match_operand:SI 1 "gpc_reg_operand" "r,wa")))] + (match_operand:SI 1 "gpc_reg_operand" "r,r,wa")))] "" "@ + brw %0,%1 # xxbrw %x0,%x1" - "reload_completed && int_reg_operand (operands[0], SImode)" + "reload_completed && !TARGET_POWER10 && int_reg_operand (operands[0], SImode)" [(set (match_dup 0) ; DABC (rotate:SI (match_dup 1) (const_int 24))) @@ -2646,9 +2648,9 @@ (define_insn_and_split "bswapsi2_reg" (and:SI (match_dup 0) (const_int -256))))] "" - [(set_attr "length" "12,4") - (set_attr "type" "*,vecperm") - (set_attr "isa" "*,p9v")]) + [(set_attr "length" "4,12,4") + (set_attr "type" "shift,*,vecperm") + (set_attr "isa" "p10,*,p9v")]) ;; On systems with LDBRX/STDBRX generate the loads/stores directly, just like ;; we do for L{H,W}BRX and ST{H,W}BRX above. If not, we have to generate more @@ -2681,7 +2683,7 @@ (define_expand "bswapdi2" emit_insn (gen_bswapdi2_store (dest, src)); } else if (TARGET_P9_VECTOR) - emit_insn (gen_bswapdi2_xxbrd (dest, src)); + emit_insn (gen_bswapdi2_brd (dest, src)); else emit_insn (gen_bswapdi2_reg (dest, src)); DONE; @@ -2712,13 +2714,15 @@ (define_insn "bswapdi2_store" "stdbrx %1,%y0" [(set_attr "type" "store")]) -(define_insn "bswapdi2_xxbrd" - [(set (match_operand:DI 0 "gpc_reg_operand" "=wa") - (bswap:DI (match_operand:DI 1 "gpc_reg_operand" "wa")))] +(define_insn "bswapdi2_brd" + [(set (match_operand:DI 0 "gpc_reg_operand" "=r,wa") + (bswap:DI (match_operand:DI 1 "gpc_reg_operand" "r,wa")))] "TARGET_P9_VECTOR" - "xxbrd %x0,%x1" - [(set_attr "type" "vecperm") - (set_attr "isa" "p9v")]) + "@ + brd %0,%1 + xxbrd %x0,%x1" + [(set_attr "type" "shift,vecperm") + (set_attr "isa" "p10,p9v")]) (define_insn "bswapdi2_reg" [(set (match_operand:DI 0 "gpc_reg_operand" "=&r") diff --git a/gcc/testsuite/gcc.target/powerpc/bswap-brd.c b/gcc/testsuite/gcc.target/powerpc/bswap-brd.c new file mode 100644 index 0000000..876129e --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/bswap-brd.c @@ -0,0 +1,23 @@ +/* { dg-do compile { target { lp64 } } } */ +/* { dg-require-effective-target power10_ok } */ +/* { dg-options "-O2 -mdejagnu-cpu=power10" } */ + +/* This tests whether GCC generates the ISA 3.1 BRW byte swap instruction for + GPR data, but generates XXBRW for data in a vector register. */ + +unsigned long long +bswap_ll (unsigned long long a) +{ + return __builtin_bswap64 (a); /* { dg-final { scan-assembler {\mbrd\M} } } */ +} + +double +bswap_ll_dbl (unsigned long long a) +{ + unsigned int b = a; + /* Force the value to be loaded into a vector register. */ + __asm__ (" # %x0" : "+wa" (b)); + + /* { dg-final { scan-assembler {\mxxbrd\M} } } */ + return (double) __builtin_bswap64 (b); +} diff --git a/gcc/testsuite/gcc.target/powerpc/bswap-brh.c b/gcc/testsuite/gcc.target/powerpc/bswap-brh.c new file mode 100644 index 0000000..4dbab12 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/bswap-brh.c @@ -0,0 +1,11 @@ +/* { dg-require-effective-target power10_ok } */ +/* { dg-options "-O2 -mdejagnu-cpu=power10" } */ + +/* This tests whether GCC generates the ISA 3.1 16-bit byte swap + instruction BRH. */ + +unsigned short +bswap_short (unsigned short a) +{ + return __builtin_bswap16 (a); /* { dg-final { scan-assembler {\mbrh\M} } } */ +} diff --git a/gcc/testsuite/gcc.target/powerpc/bswap-brw.c b/gcc/testsuite/gcc.target/powerpc/bswap-brw.c new file mode 100644 index 0000000..b3f923e --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/bswap-brw.c @@ -0,0 +1,22 @@ +/* { dg-require-effective-target power10_ok } */ +/* { dg-options "-O2 -mdejagnu-cpu=power10" } */ + +/* This tests whether GCC generates the ISA 3.1 BRW byte swap instruction for + GPR data, but generates XXBRW for data in a vector register. */ + +unsigned int +bswap_int (unsigned int a) +{ + return __builtin_bswap32 (a); /* { dg-final { scan-assembler {\mbrw\M} } } */ +} + +double +bswap_int_dbl (unsigned int a) +{ + unsigned int b = a; + /* Force the value to be loaded into a vector register. */ + __asm__ (" # %x0" : "+wa" (b)); + + /* { dg-final { scan-assembler {\mxxbrw\M} } } */ + return (double) __builtin_bswap32 (b); +} -- 1.8.3.1 -- Michael Meissner, IBM IBM, M/S 2506R, 550 King Street, Littleton, MA 01460-6245, USA email: meiss...@linux.ibm.com, phone: +1 (978) 899-4797