This is a revised version of the patch posted at https://gcc.gnu.org/pipermail/gcc-patches/2020-March/542718.html, resend this since this is a quite high priority performance issue for Power.
Don't split code from add<mode>3 for SDI to allow a later pass to split. This allows later logic to hoist out constant load in add instructions. In loop, lis+ori could be hoisted out to improve performance compared with previous addis+addi (About 15% on typical case), weak point is one more register is used and one more instruction is generated. i.e.: addis 3,3,0x6765 addi 3,3,0x4321 => lis 9,0x6765 ori 9,9,0x4321 add 3,3,9 Likewise, paddi is replaced with pli+add for Power10. No obvious performance and binary size change to SPEC2017. gcc/ChangeLog: 2020-10-21 Xiong Hu Luo <luo...@linux.ibm.com> * config/rs6000/rs6000.md (add<mode>3 for SDI): Don't split before reload, move constant to temp register for add. gcc/testsuite/ChangeLog: 2020-10-21 Xiong Hu Luo <luo...@linux.ibm.com> * gcc.target/powerpc/prefix-add.c: Check pli instead of paddi. * gcc.target/powerpc/prefix-no-update.c: Likewise. * gcc.target/powerpc/add-const.c: New test. --- gcc/config/rs6000/predicates.md | 3 +- gcc/config/rs6000/rs6000.md | 54 +++++++++++-------- gcc/testsuite/gcc.target/powerpc/add-const.c | 18 +++++++ gcc/testsuite/gcc.target/powerpc/prefix-add.c | 4 +- .../gcc.target/powerpc/prefix-no-update.c | 2 +- 5 files changed, 54 insertions(+), 27 deletions(-) create mode 100644 gcc/testsuite/gcc.target/powerpc/add-const.c diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md index 4c2fe7fa312..af577da669e 100644 --- a/gcc/config/rs6000/predicates.md +++ b/gcc/config/rs6000/predicates.md @@ -859,8 +859,7 @@ (define_special_predicate "indexed_address_mem" (define_predicate "add_operand" (if_then_else (match_code "const_int") (match_test "satisfies_constraint_I (op) - || satisfies_constraint_L (op) - || satisfies_constraint_eI (op)") + || satisfies_constraint_L (op)") (match_operand 0 "gpc_reg_operand"))) ;; Return 1 if the operand is either a non-special register, or 0, or -1. diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index 779bfd11237..facf6e12114 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -1750,34 +1750,44 @@ (define_expand "add<mode>3" if (CONST_INT_P (operands[2]) && !add_operand (operands[2], <MODE>mode)) { - rtx tmp = ((!can_create_pseudo_p () - || rtx_equal_p (operands[0], operands[1])) - ? operands[0] : gen_reg_rtx (<MODE>mode)); + bool reg0 = (reg_or_subregno (operands[0]) == 0); + if (can_create_pseudo_p () || reg0) + { + + rtx tmp = (!can_create_pseudo_p () + || rtx_equal_p (operands[0], operands[1])) + ? operands[0] : gen_reg_rtx (<MODE>mode); /* Adding a constant to r0 is not a valid insn, so use a different - strategy in that case. */ - if (reg_or_subregno (operands[1]) == 0 || reg_or_subregno (tmp) == 0) - { - if (operands[0] == operands[1]) - FAIL; - rs6000_emit_move (operands[0], operands[2], <MODE>mode); - emit_insn (gen_add<mode>3 (operands[0], operands[1], operands[0])); + strategy in that case. See stack-limit.c, need generate + "24: %0:DI=0x20fa0; 25: %0:DI=%14:DI+%0:DI" in pro_and_epilogue + when can_create_pseudo_p is false. */ + if (reg0 == 0 || reg_or_subregno (tmp) == 0) + { + if (operands[0] == operands[1]) + FAIL; + } + + rs6000_emit_move (tmp, operands[2], <MODE>mode); + emit_insn (gen_add<mode>3 (operands[0], operands[1], tmp)); DONE; } + else + { + HOST_WIDE_INT val = INTVAL (operands[2]); + HOST_WIDE_INT low = ((val & 0xffff) ^ 0x8000) - 0x8000; + HOST_WIDE_INT rest = trunc_int_for_mode (val - low, <MODE>mode); - HOST_WIDE_INT val = INTVAL (operands[2]); - HOST_WIDE_INT low = ((val & 0xffff) ^ 0x8000) - 0x8000; - HOST_WIDE_INT rest = trunc_int_for_mode (val - low, <MODE>mode); - - if (<MODE>mode == DImode && !satisfies_constraint_L (GEN_INT (rest))) - FAIL; + if (<MODE>mode == DImode && !satisfies_constraint_L (GEN_INT (rest))) + FAIL; - /* The ordering here is important for the prolog expander. - When space is allocated from the stack, adding 'low' first may - produce a temporary deallocation (which would be bad). */ - emit_insn (gen_add<mode>3 (tmp, operands[1], GEN_INT (rest))); - emit_insn (gen_add<mode>3 (operands[0], tmp, GEN_INT (low))); - DONE; + /* The ordering here is important for the prolog expander. + When space is allocated from the stack, adding 'low' first may + produce a temporary deallocation (which would be bad). */ + emit_insn (gen_add<mode>3 (operands[0], operands[1], GEN_INT (rest))); + emit_insn (gen_add<mode>3 (operands[0], operands[0], GEN_INT (low))); + DONE; + } } }) diff --git a/gcc/testsuite/gcc.target/powerpc/add-const.c b/gcc/testsuite/gcc.target/powerpc/add-const.c new file mode 100644 index 00000000000..eeeed091032 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/add-const.c @@ -0,0 +1,18 @@ +/* { dg-do compile { target { lp64 } } } */ +/* { dg-options "-O3 -fno-unroll-loops" } */ + +/* Ensure the lis,ori are generated, which indicates they have + been hoisted outside of the loop. */ + +typedef unsigned long ulong; +ulong +foo (ulong n, ulong h) +{ + int i; + for (i = 0; i < n; i++) + h = ((h + 8) | h) + 0x67654321; + return h; +} + +/* { dg-final { scan-assembler-times {\mlis\M} 1 } } */ +/* { dg-final { scan-assembler-times {\mori\M} 1 } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/prefix-add.c b/gcc/testsuite/gcc.target/powerpc/prefix-add.c index 0027406e457..13af39e51ec 100644 --- a/gcc/testsuite/gcc.target/powerpc/prefix-add.c +++ b/gcc/testsuite/gcc.target/powerpc/prefix-add.c @@ -2,13 +2,13 @@ /* { dg-require-effective-target powerpc_prefixed_addr } */ /* { dg-options "-O2 -mdejagnu-cpu=power10" } */ -/* Test that PADDI is generated to add a large constant. */ +/* Test that PLI is generated to add a large constant. */ unsigned long add (unsigned long a) { return a + 0x12345U; } -/* { dg-final { scan-assembler {\mpaddi\M} } } */ +/* { dg-final { scan-assembler {\mpli\M} } } */ /* { dg-final { scan-assembler-not {\maddi\M} } } */ /* { dg-final { scan-assembler-not {\maddis\M} } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/prefix-no-update.c b/gcc/testsuite/gcc.target/powerpc/prefix-no-update.c index 837fcd77c0b..26221d3eec5 100644 --- a/gcc/testsuite/gcc.target/powerpc/prefix-no-update.c +++ b/gcc/testsuite/gcc.target/powerpc/prefix-no-update.c @@ -42,7 +42,7 @@ struct foo *dec_store (struct foo *p, unsigned int *q) /* { dg-final { scan-assembler-times {\mlwz\M} 2 } } */ /* { dg-final { scan-assembler-times {\mstw\M} 2 } } */ -/* { dg-final { scan-assembler-times {\mpaddi\M} 4 } } */ +/* { dg-final { scan-assembler-times {\mpli\M} 4 } } */ /* { dg-final { scan-assembler-times {\mplwz\M} 2 } } */ /* { dg-final { scan-assembler-times {\mpstw\M} 2 } } */ /* { dg-final { scan-assembler-not {\mplwzu\M} } } */ -- 2.25.1