Hi Jakub, Richard:

This commit is fixing wrong code gen for RISC-V, does it OK to
backport to GCC 9 branch?

On Fri, Sep 6, 2019 at 4:34 AM Jim Wilson <j...@sifive.com> wrote:
>
> Shifting by more than the size of a SUBREG_REG doesn't work, so we either
> need to disable splits if an input is paradoxical, or else we need to
> generate a clean temporary for intermediate results.
>
> This was tested with rv32i/newlib and rv64gc/linux cross builds and checks.
> There were no regressions.  The new pr91635.c testcase works with the patch
> and fails without it.  Also, code size for libc and libstdc++ was checked
> and is smaller or equal sized with the patch, ignoring alignment padding.
> The shift-shift-4.c testcase gives better code size with this patch.
> The shift-shift-5.c testcase gave worse code size with a draft version
> of this patch and is OK with the final version of this patch.
>
> Jakub wrote the first version of this patch, so gets primary credit for it.
>
> Committed.
>
> Jim
>
>         gcc/
>         PR target/91635
>         * config/riscv/riscv.md (zero_extendsidi2, zero_extendhi<GPR:mode>2,
>         extend<SHORT:mode><SUPERQI:mode>2): Don't split if
>         paradoxical_subreg_p (operands[0]).
>         (*lshrsi3_zero_extend_3+1, *lshrsi3_zero_extend_3+2): Add clobber and
>         use as intermediate value.
>
>         gcc/testsuite/
>         PR target/91635
>         * gcc.c-torture/execute/pr91635.c: New test.
>         * gcc.target/riscv/shift-shift-4.c: New test.
>         * gcc.target/riscv/shift-shift-5.c: New test.
> ---
>  gcc/config/riscv/riscv.md                     | 30 +++++++---
>  gcc/testsuite/gcc.c-torture/execute/pr91635.c | 57 +++++++++++++++++++
>  .../gcc.target/riscv/shift-shift-4.c          | 13 +++++
>  .../gcc.target/riscv/shift-shift-5.c          | 16 ++++++
>  4 files changed, 107 insertions(+), 9 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.c-torture/execute/pr91635.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/shift-shift-4.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/shift-shift-5.c
>
> diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
> index 78260fcf6fd..744a027a1b7 100644
> --- a/gcc/config/riscv/riscv.md
> +++ b/gcc/config/riscv/riscv.md
> @@ -1051,7 +1051,9 @@
>    "@
>     #
>     lwu\t%0,%1"
> -  "&& reload_completed && REG_P (operands[1])"
> +  "&& reload_completed
> +   && REG_P (operands[1])
> +   && !paradoxical_subreg_p (operands[0])"
>    [(set (match_dup 0)
>         (ashift:DI (match_dup 1) (const_int 32)))
>     (set (match_dup 0)
> @@ -1068,7 +1070,9 @@
>    "@
>     #
>     lhu\t%0,%1"
> -  "&& reload_completed && REG_P (operands[1])"
> +  "&& reload_completed
> +   && REG_P (operands[1])
> +   && !paradoxical_subreg_p (operands[0])"
>    [(set (match_dup 0)
>         (ashift:GPR (match_dup 1) (match_dup 2)))
>     (set (match_dup 0)
> @@ -1117,7 +1121,9 @@
>    "@
>     #
>     l<SHORT:size>\t%0,%1"
> -  "&& reload_completed && REG_P (operands[1])"
> +  "&& reload_completed
> +   && REG_P (operands[1])
> +   && !paradoxical_subreg_p (operands[0])"
>    [(set (match_dup 0) (ashift:SI (match_dup 1) (match_dup 2)))
>     (set (match_dup 0) (ashiftrt:SI (match_dup 0) (match_dup 2)))]
>  {
> @@ -1766,15 +1772,20 @@
>  ;; Handle AND with 2^N-1 for N from 12 to XLEN.  This can be split into
>  ;; two logical shifts.  Otherwise it requires 3 instructions: lui,
>  ;; xor/addi/srli, and.
> +
> +;; Generating a temporary for the shift output gives better combiner results;
> +;; and also fixes a problem where op0 could be a paradoxical reg and shifting
> +;; by amounts larger than the size of the SUBREG_REG doesn't work.
>  (define_split
>    [(set (match_operand:GPR 0 "register_operand")
>         (and:GPR (match_operand:GPR 1 "register_operand")
> -                (match_operand:GPR 2 "p2m1_shift_operand")))]
> +                (match_operand:GPR 2 "p2m1_shift_operand")))
> +   (clobber (match_operand:GPR 3 "register_operand"))]
>    ""
> - [(set (match_dup 0)
> + [(set (match_dup 3)
>         (ashift:GPR (match_dup 1) (match_dup 2)))
>    (set (match_dup 0)
> -       (lshiftrt:GPR (match_dup 0) (match_dup 2)))]
> +       (lshiftrt:GPR (match_dup 3) (match_dup 2)))]
>  {
>    /* Op2 is a VOIDmode constant, so get the mode size from op1.  */
>    operands[2] = GEN_INT (GET_MODE_BITSIZE (GET_MODE (operands[1]))
> @@ -1786,12 +1797,13 @@
>  (define_split
>    [(set (match_operand:DI 0 "register_operand")
>         (and:DI (match_operand:DI 1 "register_operand")
> -               (match_operand:DI 2 "high_mask_shift_operand")))]
> +               (match_operand:DI 2 "high_mask_shift_operand")))
> +   (clobber (match_operand:DI 3 "register_operand"))]
>    "TARGET_64BIT"
> -  [(set (match_dup 0)
> +  [(set (match_dup 3)
>         (lshiftrt:DI (match_dup 1) (match_dup 2)))
>     (set (match_dup 0)
> -       (ashift:DI (match_dup 0) (match_dup 2)))]
> +       (ashift:DI (match_dup 3) (match_dup 2)))]
>  {
>    operands[2] = GEN_INT (ctz_hwi (INTVAL (operands[2])));
>  })
> diff --git a/gcc/testsuite/gcc.c-torture/execute/pr91635.c 
> b/gcc/testsuite/gcc.c-torture/execute/pr91635.c
> new file mode 100644
> index 00000000000..878a491fc36
> --- /dev/null
> +++ b/gcc/testsuite/gcc.c-torture/execute/pr91635.c
> @@ -0,0 +1,57 @@
> +/* PR target/91635 */
> +
> +#if __CHAR_BIT__ == 8 && __SIZEOF_SHORT__ == 2 \
> +    && __SIZEOF_INT__ == 4 && __SIZEOF_LONG_LONG__ == 8
> +unsigned short b, c;
> +int u, v, w, x;
> +
> +__attribute__ ((noipa)) int
> +foo (unsigned short c)
> +{
> +  c <<= __builtin_add_overflow (-c, -1, &b);
> +  c >>= 1;
> +  return c;
> +}
> +
> +__attribute__ ((noipa)) int
> +bar (unsigned short b)
> +{
> +  b <<= -14 & 15;
> +  b = b >> -~1;
> +  return b;
> +}
> +
> +__attribute__ ((noipa)) int
> +baz (unsigned short e)
> +{
> +  e <<= 1;
> +  e >>= __builtin_add_overflow (8719476735, u, &v);
> +  return e;
> +}
> +
> +__attribute__ ((noipa)) int
> +qux (unsigned int e)
> +{
> +  c = ~1;
> +  c *= e;
> +  c = c >> (-15 & 5);
> +  return c + w + x;
> +}
> +#endif
> +
> +int
> +main ()
> +{
> +#if __CHAR_BIT__ == 8 && __SIZEOF_SHORT__ == 2 \
> +    && __SIZEOF_INT__ == 4 && __SIZEOF_LONG_LONG__ == 8
> +  if (foo (0xffff) != 0x7fff)
> +    __builtin_abort ();
> +  if (bar (5) != 5)
> +    __builtin_abort ();
> +  if (baz (~0) != 0x7fff)
> +    __builtin_abort ();
> +  if (qux (2) != 0x7ffe)
> +    __builtin_abort ();
> +#endif
> +  return 0;
> +}
> diff --git a/gcc/testsuite/gcc.target/riscv/shift-shift-4.c 
> b/gcc/testsuite/gcc.target/riscv/shift-shift-4.c
> new file mode 100644
> index 00000000000..72a45ee87ae
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/shift-shift-4.c
> @@ -0,0 +1,13 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=rv32i -mabi=ilp32 -O2" } */
> +
> +/* One zero-extend shift can be eliminated by modifying the constant in the
> +   greater than test.  Started working after modifying the splitter
> +   lshrsi3_zero_extend_3+1 to use a temporary reg for the first split dest.  
> */
> +int
> +sub (int i)
> +{
> +  i &= 0x7fffffff;
> +  return i > 0x7f800000;
> +}
> +/* { dg-final { scan-assembler-not "srli" } } */
> diff --git a/gcc/testsuite/gcc.target/riscv/shift-shift-5.c 
> b/gcc/testsuite/gcc.target/riscv/shift-shift-5.c
> new file mode 100644
> index 00000000000..5b2ae89a471
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/shift-shift-5.c
> @@ -0,0 +1,16 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=rv64gc -mabi=lp64d -O2" } */
> +
> +/* Fails if lshrsi3_zero_extend_3+1 uses a temp reg which has no REG_DEST
> +   note.  */
> +unsigned long
> +sub (long l)
> +{
> +  union u {
> +    struct s { int a : 19; unsigned int b : 13; int x; } s;
> +    long l;
> +  } u;
> +  u.l = l;
> +  return u.s.b;
> +}
> +/* { dg-final { scan-assembler "srliw" } } */
> --
> 2.17.1
>

Reply via email to