On Mon, 23 Sep 2024, Artemiy Volkov wrote: > Whenever C1 and C2 are integer constants, X is of a wrapping type, and > cmp is a relational operator, the expression X +- C1 cmp C2 can be > simplified in the following cases: > > (a) If cmp is <= and C2 -+ C1 == +INF(1), we can transform the initial > comparison in the following way: > X +- C1 <= C2 > -INF <= X +- C1 <= C2 (add left hand side which holds for any X, C1) > -INF -+ C1 <= X <= C2 -+ C1 (add -+C1 to all 3 expressions) > -INF -+ C1 <= X <= +INF (due to (1)) > -INF -+ C1 <= X (eliminate the right hand side since it holds for any X) > > (b) By analogy, if cmp if >= and C2 -+ C1 == -INF(1), use the following > sequence of transformations: > > X +- C1 >= C2 > +INF >= X +- C1 >= C2 (add left hand side which holds for any X, C1) > +INF -+ C1 >= X >= C2 -+ C1 (add -+C1 to all 3 expressions) > +INF -+ C1 >= X >= -INF (due to (1)) > +INF -+ C1 >= X (eliminate the right hand side since it holds for any X) > > (c) The > and < cases are negations of (a) and (b), respectively. > > This transformation allows to occasionally save add / sub instructions, > for instance the expression > > 3 + (uint32_t)f() < 2 > > compiles to > > cmn w0, #4 > cset w0, ls > > instead of > > add w0, w0, 3 > cmp w0, 2 > cset w0, ls > > on aarch64. > > Testcases that go together with this patch have been split into two > separate files, one containing testcases for unsigned variables and the > other for wrapping signed ones (and thus compiled with -fwrapv). > Additionally, one aarch64 test has been adjusted since the patch has > caused the generated code to change from > > cmn w0, #2 > csinc w0, w1, wzr, cc (x < -2) > > to > > cmn w0, #3 > csinc w0, w1, wzr, cs (x <= -3) > > This patch has been bootstrapped and regtested on aarch64, x86_64, and > i386, and additionally regtested on riscv32. > > gcc/ChangeLog: > > PR tree-optimization/116024 > * match.pd: New transformation around integer comparison. > > gcc/testsuite/ChangeLog: > > * gcc.dg/tree-ssa/pr116024-2.c: New test. > * gcc.dg/tree-ssa/pr116024-2-fwrapv.c: Ditto. > * gcc.target/aarch64/gtu_to_ltu_cmp_1.c: Adjust. > > Signed-off-by: Artemiy Volkov <arte...@synopsys.com> > --- > gcc/match.pd | 44 ++++++++++++++++++- > .../gcc.dg/tree-ssa/pr116024-2-fwrapv.c | 38 ++++++++++++++++ > gcc/testsuite/gcc.dg/tree-ssa/pr116024-2.c | 38 ++++++++++++++++ > .../gcc.target/aarch64/gtu_to_ltu_cmp_1.c | 2 +- > 4 files changed, 120 insertions(+), 2 deletions(-) > create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr116024-2-fwrapv.c > create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr116024-2.c > > diff --git a/gcc/match.pd b/gcc/match.pd > index bf3b4a2e3fe..3275a69252f 100644 > --- a/gcc/match.pd > +++ b/gcc/match.pd > @@ -8896,6 +8896,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) > (cmp @0 { TREE_OVERFLOW (res) > ? drop_tree_overflow (res) : res; })))))))) > (for cmp (lt le gt ge) > + rcmp (gt ge lt le) > (for op (plus minus) > rop (minus plus) > (simplify > @@ -8923,7 +8924,48 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) > "X cmp C2 -+ C1"), > WARN_STRICT_OVERFLOW_COMPARISON); > } > - (cmp @0 { res; }))))))))) > + (cmp @0 { res; }))))) > +/* For wrapping types, simplify the following cases of X +- C1 CMP C2: > + > + (a) If CMP is <= and C2 -+ C1 == +INF (1), simplify to X >= -INF -+ C1 > + by observing the following: > + > + X +- C1 <= C2 > + ==> -INF <= X +- C1 <= C2 (add left hand side which holds for any X, C1) > + ==> -INF -+ C1 <= X <= C2 -+ C1 (add -+C1 to all 3 expressions) > + ==> -INF -+ C1 <= X <= +INF (due to (1)) > + ==> -INF -+ C1 <= X (eliminate the right hand side since it holds for any > X) > + > + (b) Similarly, if CMP is >= and C2 -+ C1 == -INF (1): > + > + X +- C1 >= C2 > + ==> +INF >= X +- C1 >= C2 (add left hand side which holds for any X, C1) > + ==> +INF -+ C1 >= X >= C2 -+ C1 (add -+C1 to all 3 expressions) > + ==> +INF -+ C1 >= X >= -INF (due to (1)) > + ==> +INF -+ C1 >= X (eliminate the right hand side since it holds for any > X) > + > + (c) The > and < cases are negations of (a) and (b), respectively. */ > + (if (TYPE_OVERFLOW_WRAPS (TREE_TYPE (@0)))
This only works for signed types, right? So add a !TYPE_UNSIGNED check. > + (with > + { > + wide_int max = wi::max_value (TREE_TYPE (@0)); > + wide_int min = wi::min_value (TREE_TYPE (@0)); > + > + wide_int c2 = rop == PLUS_EXPR > + ? wi::add (wi::to_wide (@2), wi::to_wide (@1)) > + : wi::sub (wi::to_wide (@2), wi::to_wide (@1)); > + } > + (if (((cmp == LE_EXPR || cmp == GT_EXPR) && wi::eq_p (c2, max)) > + || ((cmp == LT_EXPR || cmp == GE_EXPR) && wi::eq_p (c2, min))) > + (with > + { > + wide_int c1 = rop == PLUS_EXPR > + ? wi::add (wi::bit_not (c2), wi::to_wide (@1)) > + : wi::sub (wi::bit_not (c2), wi::to_wide (@1)); > + tree c1_cst = build_uniform_cst (TREE_TYPE (@0), > + wide_int_to_tree (TREE_TYPE (@0), c1)); You can drop build_uniform_cst, we're not dealing with vectors here. OK with those changes. Thanks, Richard. > + } > + (rcmp @0 { c1_cst; }))))))))) > > /* Invert sign of X in comparisons of the form C1 - X CMP C2. */ > > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr116024-2-fwrapv.c > b/gcc/testsuite/gcc.dg/tree-ssa/pr116024-2-fwrapv.c > new file mode 100644 > index 00000000000..b9e88ba79fc > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr116024-2-fwrapv.c > @@ -0,0 +1,38 @@ > +/* PR tree-optimization/116024 */ > +/* { dg-do compile } */ > +/* { dg-options "-O1 -fdump-tree-forwprop1-details -fwrapv" } */ > + > +#include <stdint.h> > +#include <limits.h> > + > +uint32_t f(void); > + > +int32_t i3(void) > +{ > + int32_t l = -10 + (int32_t)f(); > + return l <= INT32_MAX - 10; // f() >= INT32_MIN + 10 > +} > + > +int32_t i3a(void) > +{ > + int32_t l = -20 + (int32_t)f(); > + return l < INT32_MAX - 19; // f() > INT32_MAX + 20 > +} > + > +int32_t i3b(void) > +{ > + int32_t l = 30 + (int32_t)f(); > + return l >= INT32_MIN + 30; // f() <= INT32_MAX - 30 > +} > + > +int32_t i3c(void) > +{ > + int32_t l = 40 + (int32_t)f(); > + return l > INT32_MIN + 39; // f() < INT32_MIN - 40 > +} > + > +/* { dg-final { scan-tree-dump-times "Removing dead stmt:.*? \\+" 4 > "forwprop1" } } */ > +/* { dg-final { scan-tree-dump-times "gimple_simplified to.* >= -2147483638" > 1 "forwprop1" } } */ > +/* { dg-final { scan-tree-dump-times "gimple_simplified to.* >= -2147483628" > 1 "forwprop1" } } */ > +/* { dg-final { scan-tree-dump-times "gimple_simplified to.* <= 2147483617" > 1 "forwprop1" } } */ > +/* { dg-final { scan-tree-dump-times "gimple_simplified to.* <= 2147483607" > 1 "forwprop1" } } */ > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr116024-2.c > b/gcc/testsuite/gcc.dg/tree-ssa/pr116024-2.c > new file mode 100644 > index 00000000000..07b637fab40 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr116024-2.c > @@ -0,0 +1,38 @@ > +/* PR tree-optimization/116024 */ > +/* { dg-do compile } */ > +/* { dg-options "-O1 -fdump-tree-forwprop1-details" } */ > + > +#include <stdint.h> > + > +uint32_t f(void); > + > +int32_t i3(void) > +{ > + uint32_t l = 2; > + l = 10 + (uint32_t)f(); > + return l <= 9; // f() >= -10u > +} > + > +int32_t i3a(void) > +{ > + uint32_t l = 20 + (uint32_t)f(); > + return l < 20; // f() > -21u > +} > + > +int32_t i3b(void) > +{ > + uint32_t l = 30 + (uint32_t)f(); > + return l >= 30; // f() <= -31u > +} > + > +int32_t i3c(void) > +{ > + uint32_t l = 40 + (uint32_t)f(); > + return l > 39; // f() < -39u > +} > + > +/* { dg-final { scan-tree-dump-times "Removing dead stmt:.*? \\+" 4 > "forwprop1" } } */ > +/* { dg-final { scan-tree-dump-times "gimple_simplified to.* > 4294967285" 1 > "forwprop1" } } */ > +/* { dg-final { scan-tree-dump-times "gimple_simplified to.* > 4294967275" 1 > "forwprop1" } } */ > +/* { dg-final { scan-tree-dump-times "gimple_simplified to.* <= 4294967265" > 1 "forwprop1" } } */ > +/* { dg-final { scan-tree-dump-times "gimple_simplified to.* <= 4294967255" > 1 "forwprop1" } } */ > diff --git a/gcc/testsuite/gcc.target/aarch64/gtu_to_ltu_cmp_1.c > b/gcc/testsuite/gcc.target/aarch64/gtu_to_ltu_cmp_1.c > index 81c536c90af..bfcec6719de 100644 > --- a/gcc/testsuite/gcc.target/aarch64/gtu_to_ltu_cmp_1.c > +++ b/gcc/testsuite/gcc.target/aarch64/gtu_to_ltu_cmp_1.c > @@ -10,4 +10,4 @@ f1 (int x, int t) > return t; > } > > -/* { dg-final { scan-assembler-times "cmn\\tw\[0-9\]+, #2" 1 } } */ > +/* { dg-final { scan-assembler-times "cmn\\tw\[0-9\]+, #3" 1 } } */ > -- Richard Biener <rguent...@suse.de> SUSE Software Solutions Germany GmbH, Frankenstrasse 146, 90461 Nuernberg, Germany; GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)