On Mon, 23 Sep 2024, Artemiy Volkov wrote:

> Whenever C1 and C2 are integer constants, X is of a wrapping type, and
> cmp is a relational operator, the expression X +- C1 cmp C2 can be
> simplified in the following cases:
> 
> (a) If cmp is <= and C2 -+ C1 == +INF(1), we can transform the initial
> comparison in the following way:
>    X +- C1 <= C2
>    -INF <= X +- C1 <= C2 (add left hand side which holds for any X, C1)
>    -INF -+ C1 <= X <= C2 -+ C1 (add -+C1 to all 3 expressions)
>    -INF -+ C1 <= X <= +INF (due to (1))
>    -INF -+ C1 <= X (eliminate the right hand side since it holds for any X)
> 
> (b) By analogy, if cmp if >= and C2 -+ C1 == -INF(1), use the following
> sequence of transformations:
> 
>    X +- C1 >= C2
>    +INF >= X +- C1 >= C2 (add left hand side which holds for any X, C1)
>    +INF -+ C1 >= X >= C2 -+ C1 (add -+C1 to all 3 expressions)
>    +INF -+ C1 >= X >= -INF (due to (1))
>    +INF -+ C1 >= X (eliminate the right hand side since it holds for any X)
> 
> (c) The > and < cases are negations of (a) and (b), respectively.
> 
> This transformation allows to occasionally save add / sub instructions,
> for instance the expression
> 
> 3 + (uint32_t)f() < 2
> 
> compiles to
> 
> cmn     w0, #4
> cset    w0, ls
> 
> instead of
> 
> add     w0, w0, 3
> cmp     w0, 2
> cset    w0, ls
> 
> on aarch64.
> 
> Testcases that go together with this patch have been split into two
> separate files, one containing testcases for unsigned variables and the
> other for wrapping signed ones (and thus compiled with -fwrapv).
> Additionally, one aarch64 test has been adjusted since the patch has
> caused the generated code to change from
> 
> cmn     w0, #2
> csinc   w0, w1, wzr, cc   (x < -2)
> 
> to
> 
> cmn     w0, #3
> csinc   w0, w1, wzr, cs   (x <= -3)
> 
> This patch has been bootstrapped and regtested on aarch64, x86_64, and
> i386, and additionally regtested on riscv32.
> 
> gcc/ChangeLog:
> 
>         PR tree-optimization/116024
>         * match.pd: New transformation around integer comparison.
> 
> gcc/testsuite/ChangeLog:
> 
>         * gcc.dg/tree-ssa/pr116024-2.c: New test.
>         * gcc.dg/tree-ssa/pr116024-2-fwrapv.c: Ditto.
>         * gcc.target/aarch64/gtu_to_ltu_cmp_1.c: Adjust.
> 
> Signed-off-by: Artemiy Volkov <arte...@synopsys.com>
> ---
>  gcc/match.pd                                  | 44 ++++++++++++++++++-
>  .../gcc.dg/tree-ssa/pr116024-2-fwrapv.c       | 38 ++++++++++++++++
>  gcc/testsuite/gcc.dg/tree-ssa/pr116024-2.c    | 38 ++++++++++++++++
>  .../gcc.target/aarch64/gtu_to_ltu_cmp_1.c     |  2 +-
>  4 files changed, 120 insertions(+), 2 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr116024-2-fwrapv.c
>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr116024-2.c
> 
> diff --git a/gcc/match.pd b/gcc/match.pd
> index bf3b4a2e3fe..3275a69252f 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -8896,6 +8896,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>         (cmp @0 { TREE_OVERFLOW (res)
>                ? drop_tree_overflow (res) : res; }))))))))
>  (for cmp (lt le gt ge)
> +     rcmp (gt ge lt le)
>   (for op (plus minus)
>        rop (minus plus)
>    (simplify
> @@ -8923,7 +8924,48 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>                                 "X cmp C2 -+ C1"),
>                                WARN_STRICT_OVERFLOW_COMPARISON);
>       }
> -     (cmp @0 { res; })))))))))
> +     (cmp @0 { res; })))))
> +/* For wrapping types, simplify the following cases of X +- C1 CMP C2:
> +
> +   (a) If CMP is <= and C2 -+ C1 == +INF (1), simplify to X >= -INF -+ C1
> +       by observing the following:
> +
> +     X +- C1 <= C2
> +  ==>  -INF <= X +- C1 <= C2 (add left hand side which holds for any X, C1)
> +  ==>  -INF -+ C1 <= X <= C2 -+ C1 (add -+C1 to all 3 expressions)
> +  ==>  -INF -+ C1 <= X <= +INF (due to (1))
> +  ==>  -INF -+ C1 <= X (eliminate the right hand side since it holds for any 
> X)
> +
> +    (b) Similarly, if CMP is >= and C2 -+ C1 == -INF (1):
> +
> +     X +- C1 >= C2
> +  ==>  +INF >= X +- C1 >= C2 (add left hand side which holds for any X, C1)
> +  ==>  +INF -+ C1 >= X >= C2 -+ C1 (add -+C1 to all 3 expressions)
> +  ==>  +INF -+ C1 >= X >= -INF (due to (1))
> +  ==>  +INF -+ C1 >= X (eliminate the right hand side since it holds for any 
> X)
> +
> +    (c) The > and < cases are negations of (a) and (b), respectively.  */
> +   (if (TYPE_OVERFLOW_WRAPS (TREE_TYPE (@0)))

This only works for signed types, right?  So add a !TYPE_UNSIGNED check.

> +     (with
> +       {
> +     wide_int max = wi::max_value (TREE_TYPE (@0));
> +     wide_int min = wi::min_value (TREE_TYPE (@0));
> +
> +     wide_int c2 = rop == PLUS_EXPR
> +                           ? wi::add (wi::to_wide (@2), wi::to_wide (@1))
> +                           : wi::sub (wi::to_wide (@2), wi::to_wide (@1));
> +     }
> +     (if (((cmp == LE_EXPR || cmp == GT_EXPR) && wi::eq_p (c2, max))
> +         || ((cmp == LT_EXPR || cmp == GE_EXPR) && wi::eq_p (c2, min)))
> +       (with
> +        {
> +          wide_int c1 = rop == PLUS_EXPR
> +                        ? wi::add (wi::bit_not (c2), wi::to_wide (@1))
> +                        : wi::sub (wi::bit_not (c2), wi::to_wide (@1));
> +          tree c1_cst = build_uniform_cst (TREE_TYPE (@0),
> +                             wide_int_to_tree (TREE_TYPE (@0), c1));

You can drop build_uniform_cst, we're not dealing with vectors here.

OK with those changes.

Thanks,
Richard.

> +        }
> +        (rcmp @0 { c1_cst; })))))))))
>  
>  /* Invert sign of X in comparisons of the form C1 - X CMP C2.  */
>  
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr116024-2-fwrapv.c 
> b/gcc/testsuite/gcc.dg/tree-ssa/pr116024-2-fwrapv.c
> new file mode 100644
> index 00000000000..b9e88ba79fc
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr116024-2-fwrapv.c
> @@ -0,0 +1,38 @@
> +/* PR tree-optimization/116024 */
> +/* { dg-do compile } */
> +/* { dg-options "-O1 -fdump-tree-forwprop1-details -fwrapv" } */
> +
> +#include <stdint.h>
> +#include <limits.h>
> +
> +uint32_t f(void);
> +
> +int32_t i3(void)
> +{
> +  int32_t l = -10 + (int32_t)f();
> +  return l <= INT32_MAX - 10; // f() >= INT32_MIN + 10
> +}
> +
> +int32_t i3a(void)
> +{
> +  int32_t l = -20 + (int32_t)f();
> +  return l < INT32_MAX - 19; // f() > INT32_MAX + 20
> +}
> +
> +int32_t i3b(void)
> +{
> +  int32_t l = 30 + (int32_t)f();
> +  return l >= INT32_MIN + 30; // f() <= INT32_MAX - 30
> +}
> +
> +int32_t i3c(void)
> +{
> +  int32_t l = 40 + (int32_t)f();
> +  return l > INT32_MIN + 39; // f() < INT32_MIN - 40
> +}
> +
> +/* { dg-final { scan-tree-dump-times "Removing dead stmt:.*? \\+" 4 
> "forwprop1" } } */
> +/* { dg-final { scan-tree-dump-times "gimple_simplified to.* >= -2147483638" 
> 1 "forwprop1" } } */
> +/* { dg-final { scan-tree-dump-times "gimple_simplified to.* >= -2147483628" 
> 1 "forwprop1" } } */
> +/* { dg-final { scan-tree-dump-times "gimple_simplified to.* <= 2147483617" 
> 1 "forwprop1" } } */
> +/* { dg-final { scan-tree-dump-times "gimple_simplified to.* <= 2147483607" 
> 1 "forwprop1" } } */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr116024-2.c 
> b/gcc/testsuite/gcc.dg/tree-ssa/pr116024-2.c
> new file mode 100644
> index 00000000000..07b637fab40
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr116024-2.c
> @@ -0,0 +1,38 @@
> +/* PR tree-optimization/116024 */
> +/* { dg-do compile } */
> +/* { dg-options "-O1 -fdump-tree-forwprop1-details" } */
> +
> +#include <stdint.h>
> +
> +uint32_t f(void);
> +
> +int32_t i3(void)
> +{
> +  uint32_t l = 2;
> +  l = 10 + (uint32_t)f();
> +  return l <= 9; // f() >= -10u
> +}
> +
> +int32_t i3a(void)
> +{
> +  uint32_t l = 20 + (uint32_t)f();
> +  return l < 20; // f() > -21u
> +}
> +
> +int32_t i3b(void)
> +{
> +  uint32_t l = 30 + (uint32_t)f();
> +  return l >= 30; // f() <= -31u
> +}
> +
> +int32_t i3c(void)
> +{
> +  uint32_t l = 40 + (uint32_t)f();
> +  return l > 39; // f() < -39u
> +}
> +
> +/* { dg-final { scan-tree-dump-times "Removing dead stmt:.*? \\+" 4 
> "forwprop1" } } */
> +/* { dg-final { scan-tree-dump-times "gimple_simplified to.* > 4294967285" 1 
> "forwprop1" } } */
> +/* { dg-final { scan-tree-dump-times "gimple_simplified to.* > 4294967275" 1 
> "forwprop1" } } */
> +/* { dg-final { scan-tree-dump-times "gimple_simplified to.* <= 4294967265" 
> 1 "forwprop1" } } */
> +/* { dg-final { scan-tree-dump-times "gimple_simplified to.* <= 4294967255" 
> 1 "forwprop1" } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/gtu_to_ltu_cmp_1.c 
> b/gcc/testsuite/gcc.target/aarch64/gtu_to_ltu_cmp_1.c
> index 81c536c90af..bfcec6719de 100644
> --- a/gcc/testsuite/gcc.target/aarch64/gtu_to_ltu_cmp_1.c
> +++ b/gcc/testsuite/gcc.target/aarch64/gtu_to_ltu_cmp_1.c
> @@ -10,4 +10,4 @@ f1 (int x, int t)
>    return t;
>  }
>  
> -/* { dg-final { scan-assembler-times "cmn\\tw\[0-9\]+, #2" 1 } } */
> +/* { dg-final { scan-assembler-times "cmn\\tw\[0-9\]+, #3" 1 } } */
> 

-- 
Richard Biener <rguent...@suse.de>
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)

Reply via email to