Hi Jakub, Richard: This commit is fixing wrong code gen for RISC-V, does it OK to backport to GCC 9 branch?
On Fri, Sep 6, 2019 at 4:34 AM Jim Wilson <j...@sifive.com> wrote: > > Shifting by more than the size of a SUBREG_REG doesn't work, so we either > need to disable splits if an input is paradoxical, or else we need to > generate a clean temporary for intermediate results. > > This was tested with rv32i/newlib and rv64gc/linux cross builds and checks. > There were no regressions. The new pr91635.c testcase works with the patch > and fails without it. Also, code size for libc and libstdc++ was checked > and is smaller or equal sized with the patch, ignoring alignment padding. > The shift-shift-4.c testcase gives better code size with this patch. > The shift-shift-5.c testcase gave worse code size with a draft version > of this patch and is OK with the final version of this patch. > > Jakub wrote the first version of this patch, so gets primary credit for it. > > Committed. > > Jim > > gcc/ > PR target/91635 > * config/riscv/riscv.md (zero_extendsidi2, zero_extendhi<GPR:mode>2, > extend<SHORT:mode><SUPERQI:mode>2): Don't split if > paradoxical_subreg_p (operands[0]). > (*lshrsi3_zero_extend_3+1, *lshrsi3_zero_extend_3+2): Add clobber and > use as intermediate value. > > gcc/testsuite/ > PR target/91635 > * gcc.c-torture/execute/pr91635.c: New test. > * gcc.target/riscv/shift-shift-4.c: New test. > * gcc.target/riscv/shift-shift-5.c: New test. > --- > gcc/config/riscv/riscv.md | 30 +++++++--- > gcc/testsuite/gcc.c-torture/execute/pr91635.c | 57 +++++++++++++++++++ > .../gcc.target/riscv/shift-shift-4.c | 13 +++++ > .../gcc.target/riscv/shift-shift-5.c | 16 ++++++ > 4 files changed, 107 insertions(+), 9 deletions(-) > create mode 100644 gcc/testsuite/gcc.c-torture/execute/pr91635.c > create mode 100644 gcc/testsuite/gcc.target/riscv/shift-shift-4.c > create mode 100644 gcc/testsuite/gcc.target/riscv/shift-shift-5.c > > diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md > index 78260fcf6fd..744a027a1b7 100644 > --- a/gcc/config/riscv/riscv.md > +++ b/gcc/config/riscv/riscv.md > @@ -1051,7 +1051,9 @@ > "@ > # > lwu\t%0,%1" > - "&& reload_completed && REG_P (operands[1])" > + "&& reload_completed > + && REG_P (operands[1]) > + && !paradoxical_subreg_p (operands[0])" > [(set (match_dup 0) > (ashift:DI (match_dup 1) (const_int 32))) > (set (match_dup 0) > @@ -1068,7 +1070,9 @@ > "@ > # > lhu\t%0,%1" > - "&& reload_completed && REG_P (operands[1])" > + "&& reload_completed > + && REG_P (operands[1]) > + && !paradoxical_subreg_p (operands[0])" > [(set (match_dup 0) > (ashift:GPR (match_dup 1) (match_dup 2))) > (set (match_dup 0) > @@ -1117,7 +1121,9 @@ > "@ > # > l<SHORT:size>\t%0,%1" > - "&& reload_completed && REG_P (operands[1])" > + "&& reload_completed > + && REG_P (operands[1]) > + && !paradoxical_subreg_p (operands[0])" > [(set (match_dup 0) (ashift:SI (match_dup 1) (match_dup 2))) > (set (match_dup 0) (ashiftrt:SI (match_dup 0) (match_dup 2)))] > { > @@ -1766,15 +1772,20 @@ > ;; Handle AND with 2^N-1 for N from 12 to XLEN. This can be split into > ;; two logical shifts. Otherwise it requires 3 instructions: lui, > ;; xor/addi/srli, and. > + > +;; Generating a temporary for the shift output gives better combiner results; > +;; and also fixes a problem where op0 could be a paradoxical reg and shifting > +;; by amounts larger than the size of the SUBREG_REG doesn't work. > (define_split > [(set (match_operand:GPR 0 "register_operand") > (and:GPR (match_operand:GPR 1 "register_operand") > - (match_operand:GPR 2 "p2m1_shift_operand")))] > + (match_operand:GPR 2 "p2m1_shift_operand"))) > + (clobber (match_operand:GPR 3 "register_operand"))] > "" > - [(set (match_dup 0) > + [(set (match_dup 3) > (ashift:GPR (match_dup 1) (match_dup 2))) > (set (match_dup 0) > - (lshiftrt:GPR (match_dup 0) (match_dup 2)))] > + (lshiftrt:GPR (match_dup 3) (match_dup 2)))] > { > /* Op2 is a VOIDmode constant, so get the mode size from op1. */ > operands[2] = GEN_INT (GET_MODE_BITSIZE (GET_MODE (operands[1])) > @@ -1786,12 +1797,13 @@ > (define_split > [(set (match_operand:DI 0 "register_operand") > (and:DI (match_operand:DI 1 "register_operand") > - (match_operand:DI 2 "high_mask_shift_operand")))] > + (match_operand:DI 2 "high_mask_shift_operand"))) > + (clobber (match_operand:DI 3 "register_operand"))] > "TARGET_64BIT" > - [(set (match_dup 0) > + [(set (match_dup 3) > (lshiftrt:DI (match_dup 1) (match_dup 2))) > (set (match_dup 0) > - (ashift:DI (match_dup 0) (match_dup 2)))] > + (ashift:DI (match_dup 3) (match_dup 2)))] > { > operands[2] = GEN_INT (ctz_hwi (INTVAL (operands[2]))); > }) > diff --git a/gcc/testsuite/gcc.c-torture/execute/pr91635.c > b/gcc/testsuite/gcc.c-torture/execute/pr91635.c > new file mode 100644 > index 00000000000..878a491fc36 > --- /dev/null > +++ b/gcc/testsuite/gcc.c-torture/execute/pr91635.c > @@ -0,0 +1,57 @@ > +/* PR target/91635 */ > + > +#if __CHAR_BIT__ == 8 && __SIZEOF_SHORT__ == 2 \ > + && __SIZEOF_INT__ == 4 && __SIZEOF_LONG_LONG__ == 8 > +unsigned short b, c; > +int u, v, w, x; > + > +__attribute__ ((noipa)) int > +foo (unsigned short c) > +{ > + c <<= __builtin_add_overflow (-c, -1, &b); > + c >>= 1; > + return c; > +} > + > +__attribute__ ((noipa)) int > +bar (unsigned short b) > +{ > + b <<= -14 & 15; > + b = b >> -~1; > + return b; > +} > + > +__attribute__ ((noipa)) int > +baz (unsigned short e) > +{ > + e <<= 1; > + e >>= __builtin_add_overflow (8719476735, u, &v); > + return e; > +} > + > +__attribute__ ((noipa)) int > +qux (unsigned int e) > +{ > + c = ~1; > + c *= e; > + c = c >> (-15 & 5); > + return c + w + x; > +} > +#endif > + > +int > +main () > +{ > +#if __CHAR_BIT__ == 8 && __SIZEOF_SHORT__ == 2 \ > + && __SIZEOF_INT__ == 4 && __SIZEOF_LONG_LONG__ == 8 > + if (foo (0xffff) != 0x7fff) > + __builtin_abort (); > + if (bar (5) != 5) > + __builtin_abort (); > + if (baz (~0) != 0x7fff) > + __builtin_abort (); > + if (qux (2) != 0x7ffe) > + __builtin_abort (); > +#endif > + return 0; > +} > diff --git a/gcc/testsuite/gcc.target/riscv/shift-shift-4.c > b/gcc/testsuite/gcc.target/riscv/shift-shift-4.c > new file mode 100644 > index 00000000000..72a45ee87ae > --- /dev/null > +++ b/gcc/testsuite/gcc.target/riscv/shift-shift-4.c > @@ -0,0 +1,13 @@ > +/* { dg-do compile } */ > +/* { dg-options "-march=rv32i -mabi=ilp32 -O2" } */ > + > +/* One zero-extend shift can be eliminated by modifying the constant in the > + greater than test. Started working after modifying the splitter > + lshrsi3_zero_extend_3+1 to use a temporary reg for the first split dest. > */ > +int > +sub (int i) > +{ > + i &= 0x7fffffff; > + return i > 0x7f800000; > +} > +/* { dg-final { scan-assembler-not "srli" } } */ > diff --git a/gcc/testsuite/gcc.target/riscv/shift-shift-5.c > b/gcc/testsuite/gcc.target/riscv/shift-shift-5.c > new file mode 100644 > index 00000000000..5b2ae89a471 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/riscv/shift-shift-5.c > @@ -0,0 +1,16 @@ > +/* { dg-do compile } */ > +/* { dg-options "-march=rv64gc -mabi=lp64d -O2" } */ > + > +/* Fails if lshrsi3_zero_extend_3+1 uses a temp reg which has no REG_DEST > + note. */ > +unsigned long > +sub (long l) > +{ > + union u { > + struct s { int a : 19; unsigned int b : 13; int x; } s; > + long l; > + } u; > + u.l = l; > + return u.s.b; > +} > +/* { dg-final { scan-assembler "srliw" } } */ > -- > 2.17.1 >