On 9/27/2024 1:29 PM, Richard Biener wrote: > On Mon, 23 Sep 2024, Artemiy Volkov wrote: > >> Whenever C1 and C2 are integer constants, X is of a wrapping type, and >> cmp is a relational operator, the expression X +- C1 cmp C2 can be >> simplified in the following cases: >> >> (a) If cmp is <= and C2 -+ C1 == +INF(1), we can transform the initial >> comparison in the following way: >> X +- C1 <= C2 >> -INF <= X +- C1 <= C2 (add left hand side which holds for any X, C1) >> -INF -+ C1 <= X <= C2 -+ C1 (add -+C1 to all 3 expressions) >> -INF -+ C1 <= X <= +INF (due to (1)) >> -INF -+ C1 <= X (eliminate the right hand side since it holds for any X) >> >> (b) By analogy, if cmp if >= and C2 -+ C1 == -INF(1), use the following >> sequence of transformations: >> >> X +- C1 >= C2 >> +INF >= X +- C1 >= C2 (add left hand side which holds for any X, C1) >> +INF -+ C1 >= X >= C2 -+ C1 (add -+C1 to all 3 expressions) >> +INF -+ C1 >= X >= -INF (due to (1)) >> +INF -+ C1 >= X (eliminate the right hand side since it holds for any X) >> >> (c) The > and < cases are negations of (a) and (b), respectively. >> >> This transformation allows to occasionally save add / sub instructions, >> for instance the expression >> >> 3 + (uint32_t)f() < 2 >> >> compiles to >> >> cmn w0, #4 >> cset w0, ls >> >> instead of >> >> add w0, w0, 3 >> cmp w0, 2 >> cset w0, ls >> >> on aarch64. >> >> Testcases that go together with this patch have been split into two >> separate files, one containing testcases for unsigned variables and the >> other for wrapping signed ones (and thus compiled with -fwrapv). >> Additionally, one aarch64 test has been adjusted since the patch has >> caused the generated code to change from >> >> cmn w0, #2 >> csinc w0, w1, wzr, cc (x < -2) >> >> to >> >> cmn w0, #3 >> csinc w0, w1, wzr, cs (x <= -3) >> >> This patch has been bootstrapped and regtested on aarch64, x86_64, and >> i386, and additionally regtested on riscv32. >> >> gcc/ChangeLog: >> >> PR tree-optimization/116024 >> * match.pd: New transformation around integer comparison. >> >> gcc/testsuite/ChangeLog: >> >> * gcc.dg/tree-ssa/pr116024-2.c: New test. >> * gcc.dg/tree-ssa/pr116024-2-fwrapv.c: Ditto. >> * gcc.target/aarch64/gtu_to_ltu_cmp_1.c: Adjust. >> >> Signed-off-by: Artemiy Volkov <arte...@synopsys.com> >> --- >> gcc/match.pd | 44 ++++++++++++++++++- >> .../gcc.dg/tree-ssa/pr116024-2-fwrapv.c | 38 ++++++++++++++++ >> gcc/testsuite/gcc.dg/tree-ssa/pr116024-2.c | 38 ++++++++++++++++ >> .../gcc.target/aarch64/gtu_to_ltu_cmp_1.c | 2 +- >> 4 files changed, 120 insertions(+), 2 deletions(-) >> create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr116024-2-fwrapv.c >> create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr116024-2.c >> >> diff --git a/gcc/match.pd b/gcc/match.pd >> index bf3b4a2e3fe..3275a69252f 100644 >> --- a/gcc/match.pd >> +++ b/gcc/match.pd >> @@ -8896,6 +8896,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) >> (cmp @0 { TREE_OVERFLOW (res) >> ? drop_tree_overflow (res) : res; })))))))) >> (for cmp (lt le gt ge) >> + rcmp (gt ge lt le) >> (for op (plus minus) >> rop (minus plus) >> (simplify >> @@ -8923,7 +8924,48 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) >> "X cmp C2 -+ C1"), >> WARN_STRICT_OVERFLOW_COMPARISON); >> } >> - (cmp @0 { res; }))))))))) >> + (cmp @0 { res; }))))) >> +/* For wrapping types, simplify the following cases of X +- C1 CMP C2: >> + >> + (a) If CMP is <= and C2 -+ C1 == +INF (1), simplify to X >= -INF -+ C1 >> + by observing the following: >> + >> + X +- C1 <= C2 >> + ==> -INF <= X +- C1 <= C2 (add left hand side which holds for any X, C1) >> + ==> -INF -+ C1 <= X <= C2 -+ C1 (add -+C1 to all 3 expressions) >> + ==> -INF -+ C1 <= X <= +INF (due to (1)) >> + ==> -INF -+ C1 <= X (eliminate the right hand side since it holds for >> any X) >> + >> + (b) Similarly, if CMP is >= and C2 -+ C1 == -INF (1): >> + >> + X +- C1 >= C2 >> + ==> +INF >= X +- C1 >= C2 (add left hand side which holds for any X, C1) >> + ==> +INF -+ C1 >= X >= C2 -+ C1 (add -+C1 to all 3 expressions) >> + ==> +INF -+ C1 >= X >= -INF (due to (1)) >> + ==> +INF -+ C1 >= X (eliminate the right hand side since it holds for >> any X) >> + >> + (c) The > and < cases are negations of (a) and (b), respectively. */ >> + (if (TYPE_OVERFLOW_WRAPS (TREE_TYPE (@0))) > > This only works for signed types, right? So add a !TYPE_UNSIGNED check.
Nope, unlike the one in patch #3, this transformation is intended to work for both unsigned and signed wrapping types, so in this case there's no need for the check. > >> + (with >> + { >> + wide_int max = wi::max_value (TREE_TYPE (@0)); >> + wide_int min = wi::min_value (TREE_TYPE (@0)); >> + >> + wide_int c2 = rop == PLUS_EXPR >> + ? wi::add (wi::to_wide (@2), wi::to_wide (@1)) >> + : wi::sub (wi::to_wide (@2), wi::to_wide (@1)); >> + } >> + (if (((cmp == LE_EXPR || cmp == GT_EXPR) && wi::eq_p (c2, max)) >> + || ((cmp == LT_EXPR || cmp == GE_EXPR) && wi::eq_p (c2, min))) >> + (with >> + { >> + wide_int c1 = rop == PLUS_EXPR >> + ? wi::add (wi::bit_not (c2), wi::to_wide (@1)) >> + : wi::sub (wi::bit_not (c2), wi::to_wide (@1)); >> + tree c1_cst = build_uniform_cst (TREE_TYPE (@0), >> + wide_int_to_tree (TREE_TYPE (@0), c1)); > > You can drop build_uniform_cst, we're not dealing with vectors here. OK, will remove before resending. Thanks, Artemiy > > OK with those changes. > > Thanks, > Richard. > >> + } >> + (rcmp @0 { c1_cst; }))))))))) >> >> /* Invert sign of X in comparisons of the form C1 - X CMP C2. */ >> >> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr116024-2-fwrapv.c >> b/gcc/testsuite/gcc.dg/tree-ssa/pr116024-2-fwrapv.c >> new file mode 100644 >> index 00000000000..b9e88ba79fc >> --- /dev/null >> +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr116024-2-fwrapv.c >> @@ -0,0 +1,38 @@ >> +/* PR tree-optimization/116024 */ >> +/* { dg-do compile } */ >> +/* { dg-options "-O1 -fdump-tree-forwprop1-details -fwrapv" } */ >> + >> +#include <stdint.h> >> +#include <limits.h> >> + >> +uint32_t f(void); >> + >> +int32_t i3(void) >> +{ >> + int32_t l = -10 + (int32_t)f(); >> + return l <= INT32_MAX - 10; // f() >= INT32_MIN + 10 >> +} >> + >> +int32_t i3a(void) >> +{ >> + int32_t l = -20 + (int32_t)f(); >> + return l < INT32_MAX - 19; // f() > INT32_MAX + 20 >> +} >> + >> +int32_t i3b(void) >> +{ >> + int32_t l = 30 + (int32_t)f(); >> + return l >= INT32_MIN + 30; // f() <= INT32_MAX - 30 >> +} >> + >> +int32_t i3c(void) >> +{ >> + int32_t l = 40 + (int32_t)f(); >> + return l > INT32_MIN + 39; // f() < INT32_MIN - 40 >> +} >> + >> +/* { dg-final { scan-tree-dump-times "Removing dead stmt:.*? \\+" 4 >> "forwprop1" } } */ >> +/* { dg-final { scan-tree-dump-times "gimple_simplified to.* >= >> -2147483638" 1 "forwprop1" } } */ >> +/* { dg-final { scan-tree-dump-times "gimple_simplified to.* >= >> -2147483628" 1 "forwprop1" } } */ >> +/* { dg-final { scan-tree-dump-times "gimple_simplified to.* <= 2147483617" >> 1 "forwprop1" } } */ >> +/* { dg-final { scan-tree-dump-times "gimple_simplified to.* <= 2147483607" >> 1 "forwprop1" } } */ >> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr116024-2.c >> b/gcc/testsuite/gcc.dg/tree-ssa/pr116024-2.c >> new file mode 100644 >> index 00000000000..07b637fab40 >> --- /dev/null >> +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr116024-2.c >> @@ -0,0 +1,38 @@ >> +/* PR tree-optimization/116024 */ >> +/* { dg-do compile } */ >> +/* { dg-options "-O1 -fdump-tree-forwprop1-details" } */ >> + >> +#include <stdint.h> >> + >> +uint32_t f(void); >> + >> +int32_t i3(void) >> +{ >> + uint32_t l = 2; >> + l = 10 + (uint32_t)f(); >> + return l <= 9; // f() >= -10u >> +} >> + >> +int32_t i3a(void) >> +{ >> + uint32_t l = 20 + (uint32_t)f(); >> + return l < 20; // f() > -21u >> +} >> + >> +int32_t i3b(void) >> +{ >> + uint32_t l = 30 + (uint32_t)f(); >> + return l >= 30; // f() <= -31u >> +} >> + >> +int32_t i3c(void) >> +{ >> + uint32_t l = 40 + (uint32_t)f(); >> + return l > 39; // f() < -39u >> +} >> + >> +/* { dg-final { scan-tree-dump-times "Removing dead stmt:.*? \\+" 4 >> "forwprop1" } } */ >> +/* { dg-final { scan-tree-dump-times "gimple_simplified to.* > 4294967285" >> 1 "forwprop1" } } */ >> +/* { dg-final { scan-tree-dump-times "gimple_simplified to.* > 4294967275" >> 1 "forwprop1" } } */ >> +/* { dg-final { scan-tree-dump-times "gimple_simplified to.* <= 4294967265" >> 1 "forwprop1" } } */ >> +/* { dg-final { scan-tree-dump-times "gimple_simplified to.* <= 4294967255" >> 1 "forwprop1" } } */ >> diff --git a/gcc/testsuite/gcc.target/aarch64/gtu_to_ltu_cmp_1.c >> b/gcc/testsuite/gcc.target/aarch64/gtu_to_ltu_cmp_1.c >> index 81c536c90af..bfcec6719de 100644 >> --- a/gcc/testsuite/gcc.target/aarch64/gtu_to_ltu_cmp_1.c >> +++ b/gcc/testsuite/gcc.target/aarch64/gtu_to_ltu_cmp_1.c >> @@ -10,4 +10,4 @@ f1 (int x, int t) >> return t; >> } >> >> -/* { dg-final { scan-assembler-times "cmn\\tw\[0-9\]+, #2" 1 } } */ >> +/* { dg-final { scan-assembler-times "cmn\\tw\[0-9\]+, #3" 1 } } */ >> >