Shifting by more than the size of a SUBREG_REG doesn't work, so we either need to disable splits if an input is paradoxical, or else we need to generate a clean temporary for intermediate results.
This was tested with rv32i/newlib and rv64gc/linux cross builds and checks. There were no regressions. The new pr91635.c testcase works with the patch and fails without it. Also, code size for libc and libstdc++ was checked and is smaller or equal sized with the patch, ignoring alignment padding. The shift-shift-4.c testcase gives better code size with this patch. The shift-shift-5.c testcase gave worse code size with a draft version of this patch and is OK with the final version of this patch. Jakub wrote the first version of this patch, so gets primary credit for it. Committed. Jim gcc/ PR target/91635 * config/riscv/riscv.md (zero_extendsidi2, zero_extendhi<GPR:mode>2, extend<SHORT:mode><SUPERQI:mode>2): Don't split if paradoxical_subreg_p (operands[0]). (*lshrsi3_zero_extend_3+1, *lshrsi3_zero_extend_3+2): Add clobber and use as intermediate value. gcc/testsuite/ PR target/91635 * gcc.c-torture/execute/pr91635.c: New test. * gcc.target/riscv/shift-shift-4.c: New test. * gcc.target/riscv/shift-shift-5.c: New test. --- gcc/config/riscv/riscv.md | 30 +++++++--- gcc/testsuite/gcc.c-torture/execute/pr91635.c | 57 +++++++++++++++++++ .../gcc.target/riscv/shift-shift-4.c | 13 +++++ .../gcc.target/riscv/shift-shift-5.c | 16 ++++++ 4 files changed, 107 insertions(+), 9 deletions(-) create mode 100644 gcc/testsuite/gcc.c-torture/execute/pr91635.c create mode 100644 gcc/testsuite/gcc.target/riscv/shift-shift-4.c create mode 100644 gcc/testsuite/gcc.target/riscv/shift-shift-5.c diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md index 78260fcf6fd..744a027a1b7 100644 --- a/gcc/config/riscv/riscv.md +++ b/gcc/config/riscv/riscv.md @@ -1051,7 +1051,9 @@ "@ # lwu\t%0,%1" - "&& reload_completed && REG_P (operands[1])" + "&& reload_completed + && REG_P (operands[1]) + && !paradoxical_subreg_p (operands[0])" [(set (match_dup 0) (ashift:DI (match_dup 1) (const_int 32))) (set (match_dup 0) @@ -1068,7 +1070,9 @@ "@ # lhu\t%0,%1" - "&& reload_completed && REG_P (operands[1])" + "&& reload_completed + && REG_P (operands[1]) + && !paradoxical_subreg_p (operands[0])" [(set (match_dup 0) (ashift:GPR (match_dup 1) (match_dup 2))) (set (match_dup 0) @@ -1117,7 +1121,9 @@ "@ # l<SHORT:size>\t%0,%1" - "&& reload_completed && REG_P (operands[1])" + "&& reload_completed + && REG_P (operands[1]) + && !paradoxical_subreg_p (operands[0])" [(set (match_dup 0) (ashift:SI (match_dup 1) (match_dup 2))) (set (match_dup 0) (ashiftrt:SI (match_dup 0) (match_dup 2)))] { @@ -1766,15 +1772,20 @@ ;; Handle AND with 2^N-1 for N from 12 to XLEN. This can be split into ;; two logical shifts. Otherwise it requires 3 instructions: lui, ;; xor/addi/srli, and. + +;; Generating a temporary for the shift output gives better combiner results; +;; and also fixes a problem where op0 could be a paradoxical reg and shifting +;; by amounts larger than the size of the SUBREG_REG doesn't work. (define_split [(set (match_operand:GPR 0 "register_operand") (and:GPR (match_operand:GPR 1 "register_operand") - (match_operand:GPR 2 "p2m1_shift_operand")))] + (match_operand:GPR 2 "p2m1_shift_operand"))) + (clobber (match_operand:GPR 3 "register_operand"))] "" - [(set (match_dup 0) + [(set (match_dup 3) (ashift:GPR (match_dup 1) (match_dup 2))) (set (match_dup 0) - (lshiftrt:GPR (match_dup 0) (match_dup 2)))] + (lshiftrt:GPR (match_dup 3) (match_dup 2)))] { /* Op2 is a VOIDmode constant, so get the mode size from op1. */ operands[2] = GEN_INT (GET_MODE_BITSIZE (GET_MODE (operands[1])) @@ -1786,12 +1797,13 @@ (define_split [(set (match_operand:DI 0 "register_operand") (and:DI (match_operand:DI 1 "register_operand") - (match_operand:DI 2 "high_mask_shift_operand")))] + (match_operand:DI 2 "high_mask_shift_operand"))) + (clobber (match_operand:DI 3 "register_operand"))] "TARGET_64BIT" - [(set (match_dup 0) + [(set (match_dup 3) (lshiftrt:DI (match_dup 1) (match_dup 2))) (set (match_dup 0) - (ashift:DI (match_dup 0) (match_dup 2)))] + (ashift:DI (match_dup 3) (match_dup 2)))] { operands[2] = GEN_INT (ctz_hwi (INTVAL (operands[2]))); }) diff --git a/gcc/testsuite/gcc.c-torture/execute/pr91635.c b/gcc/testsuite/gcc.c-torture/execute/pr91635.c new file mode 100644 index 00000000000..878a491fc36 --- /dev/null +++ b/gcc/testsuite/gcc.c-torture/execute/pr91635.c @@ -0,0 +1,57 @@ +/* PR target/91635 */ + +#if __CHAR_BIT__ == 8 && __SIZEOF_SHORT__ == 2 \ + && __SIZEOF_INT__ == 4 && __SIZEOF_LONG_LONG__ == 8 +unsigned short b, c; +int u, v, w, x; + +__attribute__ ((noipa)) int +foo (unsigned short c) +{ + c <<= __builtin_add_overflow (-c, -1, &b); + c >>= 1; + return c; +} + +__attribute__ ((noipa)) int +bar (unsigned short b) +{ + b <<= -14 & 15; + b = b >> -~1; + return b; +} + +__attribute__ ((noipa)) int +baz (unsigned short e) +{ + e <<= 1; + e >>= __builtin_add_overflow (8719476735, u, &v); + return e; +} + +__attribute__ ((noipa)) int +qux (unsigned int e) +{ + c = ~1; + c *= e; + c = c >> (-15 & 5); + return c + w + x; +} +#endif + +int +main () +{ +#if __CHAR_BIT__ == 8 && __SIZEOF_SHORT__ == 2 \ + && __SIZEOF_INT__ == 4 && __SIZEOF_LONG_LONG__ == 8 + if (foo (0xffff) != 0x7fff) + __builtin_abort (); + if (bar (5) != 5) + __builtin_abort (); + if (baz (~0) != 0x7fff) + __builtin_abort (); + if (qux (2) != 0x7ffe) + __builtin_abort (); +#endif + return 0; +} diff --git a/gcc/testsuite/gcc.target/riscv/shift-shift-4.c b/gcc/testsuite/gcc.target/riscv/shift-shift-4.c new file mode 100644 index 00000000000..72a45ee87ae --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/shift-shift-4.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv32i -mabi=ilp32 -O2" } */ + +/* One zero-extend shift can be eliminated by modifying the constant in the + greater than test. Started working after modifying the splitter + lshrsi3_zero_extend_3+1 to use a temporary reg for the first split dest. */ +int +sub (int i) +{ + i &= 0x7fffffff; + return i > 0x7f800000; +} +/* { dg-final { scan-assembler-not "srli" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/shift-shift-5.c b/gcc/testsuite/gcc.target/riscv/shift-shift-5.c new file mode 100644 index 00000000000..5b2ae89a471 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/shift-shift-5.c @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gc -mabi=lp64d -O2" } */ + +/* Fails if lshrsi3_zero_extend_3+1 uses a temp reg which has no REG_DEST + note. */ +unsigned long +sub (long l) +{ + union u { + struct s { int a : 19; unsigned int b : 13; int x; } s; + long l; + } u; + u.l = l; + return u.s.b; +} +/* { dg-final { scan-assembler "srliw" } } */ -- 2.17.1