On 9/27/2024 1:29 PM, Richard Biener wrote:
> On Mon, 23 Sep 2024, Artemiy Volkov wrote:
>
>> Whenever C1 and C2 are integer constants, X is of a wrapping type, and
>> cmp is a relational operator, the expression X +- C1 cmp C2 can be
>> simplified in the following cases:
>>
>> (a) If cmp is <= and C2 -+ C1 == +INF(1), we can transform the initial
>> comparison in the following way:
>> X +- C1 <= C2
>> -INF <= X +- C1 <= C2 (add left hand side which holds for any X, C1)
>> -INF -+ C1 <= X <= C2 -+ C1 (add -+C1 to all 3 expressions)
>> -INF -+ C1 <= X <= +INF (due to (1))
>> -INF -+ C1 <= X (eliminate the right hand side since it holds for any X)
>>
>> (b) By analogy, if cmp if >= and C2 -+ C1 == -INF(1), use the following
>> sequence of transformations:
>>
>> X +- C1 >= C2
>> +INF >= X +- C1 >= C2 (add left hand side which holds for any X, C1)
>> +INF -+ C1 >= X >= C2 -+ C1 (add -+C1 to all 3 expressions)
>> +INF -+ C1 >= X >= -INF (due to (1))
>> +INF -+ C1 >= X (eliminate the right hand side since it holds for any X)
>>
>> (c) The > and < cases are negations of (a) and (b), respectively.
>>
>> This transformation allows to occasionally save add / sub instructions,
>> for instance the expression
>>
>> 3 + (uint32_t)f() < 2
>>
>> compiles to
>>
>> cmn w0, #4
>> cset w0, ls
>>
>> instead of
>>
>> add w0, w0, 3
>> cmp w0, 2
>> cset w0, ls
>>
>> on aarch64.
>>
>> Testcases that go together with this patch have been split into two
>> separate files, one containing testcases for unsigned variables and the
>> other for wrapping signed ones (and thus compiled with -fwrapv).
>> Additionally, one aarch64 test has been adjusted since the patch has
>> caused the generated code to change from
>>
>> cmn w0, #2
>> csinc w0, w1, wzr, cc (x < -2)
>>
>> to
>>
>> cmn w0, #3
>> csinc w0, w1, wzr, cs (x <= -3)
>>
>> This patch has been bootstrapped and regtested on aarch64, x86_64, and
>> i386, and additionally regtested on riscv32.
>>
>> gcc/ChangeLog:
>>
>> PR tree-optimization/116024
>> * match.pd: New transformation around integer comparison.
>>
>> gcc/testsuite/ChangeLog:
>>
>> * gcc.dg/tree-ssa/pr116024-2.c: New test.
>> * gcc.dg/tree-ssa/pr116024-2-fwrapv.c: Ditto.
>> * gcc.target/aarch64/gtu_to_ltu_cmp_1.c: Adjust.
>>
>> Signed-off-by: Artemiy Volkov <[email protected]>
>> ---
>> gcc/match.pd | 44 ++++++++++++++++++-
>> .../gcc.dg/tree-ssa/pr116024-2-fwrapv.c | 38 ++++++++++++++++
>> gcc/testsuite/gcc.dg/tree-ssa/pr116024-2.c | 38 ++++++++++++++++
>> .../gcc.target/aarch64/gtu_to_ltu_cmp_1.c | 2 +-
>> 4 files changed, 120 insertions(+), 2 deletions(-)
>> create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr116024-2-fwrapv.c
>> create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr116024-2.c
>>
>> diff --git a/gcc/match.pd b/gcc/match.pd
>> index bf3b4a2e3fe..3275a69252f 100644
>> --- a/gcc/match.pd
>> +++ b/gcc/match.pd
>> @@ -8896,6 +8896,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>> (cmp @0 { TREE_OVERFLOW (res)
>> ? drop_tree_overflow (res) : res; }))))))))
>> (for cmp (lt le gt ge)
>> + rcmp (gt ge lt le)
>> (for op (plus minus)
>> rop (minus plus)
>> (simplify
>> @@ -8923,7 +8924,48 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>> "X cmp C2 -+ C1"),
>> WARN_STRICT_OVERFLOW_COMPARISON);
>> }
>> - (cmp @0 { res; })))))))))
>> + (cmp @0 { res; })))))
>> +/* For wrapping types, simplify the following cases of X +- C1 CMP C2:
>> +
>> + (a) If CMP is <= and C2 -+ C1 == +INF (1), simplify to X >= -INF -+ C1
>> + by observing the following:
>> +
>> + X +- C1 <= C2
>> + ==> -INF <= X +- C1 <= C2 (add left hand side which holds for any X, C1)
>> + ==> -INF -+ C1 <= X <= C2 -+ C1 (add -+C1 to all 3 expressions)
>> + ==> -INF -+ C1 <= X <= +INF (due to (1))
>> + ==> -INF -+ C1 <= X (eliminate the right hand side since it holds for
>> any X)
>> +
>> + (b) Similarly, if CMP is >= and C2 -+ C1 == -INF (1):
>> +
>> + X +- C1 >= C2
>> + ==> +INF >= X +- C1 >= C2 (add left hand side which holds for any X, C1)
>> + ==> +INF -+ C1 >= X >= C2 -+ C1 (add -+C1 to all 3 expressions)
>> + ==> +INF -+ C1 >= X >= -INF (due to (1))
>> + ==> +INF -+ C1 >= X (eliminate the right hand side since it holds for
>> any X)
>> +
>> + (c) The > and < cases are negations of (a) and (b), respectively. */
>> + (if (TYPE_OVERFLOW_WRAPS (TREE_TYPE (@0)))
>
> This only works for signed types, right? So add a !TYPE_UNSIGNED check.
Nope, unlike the one in patch #3, this transformation is intended to
work for both unsigned and signed wrapping types, so in this case
there's no need for the check.
>
>> + (with
>> + {
>> + wide_int max = wi::max_value (TREE_TYPE (@0));
>> + wide_int min = wi::min_value (TREE_TYPE (@0));
>> +
>> + wide_int c2 = rop == PLUS_EXPR
>> + ? wi::add (wi::to_wide (@2), wi::to_wide (@1))
>> + : wi::sub (wi::to_wide (@2), wi::to_wide (@1));
>> + }
>> + (if (((cmp == LE_EXPR || cmp == GT_EXPR) && wi::eq_p (c2, max))
>> + || ((cmp == LT_EXPR || cmp == GE_EXPR) && wi::eq_p (c2, min)))
>> + (with
>> + {
>> + wide_int c1 = rop == PLUS_EXPR
>> + ? wi::add (wi::bit_not (c2), wi::to_wide (@1))
>> + : wi::sub (wi::bit_not (c2), wi::to_wide (@1));
>> + tree c1_cst = build_uniform_cst (TREE_TYPE (@0),
>> + wide_int_to_tree (TREE_TYPE (@0), c1));
>
> You can drop build_uniform_cst, we're not dealing with vectors here.
OK, will remove before resending.
Thanks,
Artemiy
>
> OK with those changes.
>
> Thanks,
> Richard.
>
>> + }
>> + (rcmp @0 { c1_cst; })))))))))
>>
>> /* Invert sign of X in comparisons of the form C1 - X CMP C2. */
>>
>> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr116024-2-fwrapv.c
>> b/gcc/testsuite/gcc.dg/tree-ssa/pr116024-2-fwrapv.c
>> new file mode 100644
>> index 00000000000..b9e88ba79fc
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr116024-2-fwrapv.c
>> @@ -0,0 +1,38 @@
>> +/* PR tree-optimization/116024 */
>> +/* { dg-do compile } */
>> +/* { dg-options "-O1 -fdump-tree-forwprop1-details -fwrapv" } */
>> +
>> +#include <stdint.h>
>> +#include <limits.h>
>> +
>> +uint32_t f(void);
>> +
>> +int32_t i3(void)
>> +{
>> + int32_t l = -10 + (int32_t)f();
>> + return l <= INT32_MAX - 10; // f() >= INT32_MIN + 10
>> +}
>> +
>> +int32_t i3a(void)
>> +{
>> + int32_t l = -20 + (int32_t)f();
>> + return l < INT32_MAX - 19; // f() > INT32_MAX + 20
>> +}
>> +
>> +int32_t i3b(void)
>> +{
>> + int32_t l = 30 + (int32_t)f();
>> + return l >= INT32_MIN + 30; // f() <= INT32_MAX - 30
>> +}
>> +
>> +int32_t i3c(void)
>> +{
>> + int32_t l = 40 + (int32_t)f();
>> + return l > INT32_MIN + 39; // f() < INT32_MIN - 40
>> +}
>> +
>> +/* { dg-final { scan-tree-dump-times "Removing dead stmt:.*? \\+" 4
>> "forwprop1" } } */
>> +/* { dg-final { scan-tree-dump-times "gimple_simplified to.* >=
>> -2147483638" 1 "forwprop1" } } */
>> +/* { dg-final { scan-tree-dump-times "gimple_simplified to.* >=
>> -2147483628" 1 "forwprop1" } } */
>> +/* { dg-final { scan-tree-dump-times "gimple_simplified to.* <= 2147483617"
>> 1 "forwprop1" } } */
>> +/* { dg-final { scan-tree-dump-times "gimple_simplified to.* <= 2147483607"
>> 1 "forwprop1" } } */
>> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr116024-2.c
>> b/gcc/testsuite/gcc.dg/tree-ssa/pr116024-2.c
>> new file mode 100644
>> index 00000000000..07b637fab40
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr116024-2.c
>> @@ -0,0 +1,38 @@
>> +/* PR tree-optimization/116024 */
>> +/* { dg-do compile } */
>> +/* { dg-options "-O1 -fdump-tree-forwprop1-details" } */
>> +
>> +#include <stdint.h>
>> +
>> +uint32_t f(void);
>> +
>> +int32_t i3(void)
>> +{
>> + uint32_t l = 2;
>> + l = 10 + (uint32_t)f();
>> + return l <= 9; // f() >= -10u
>> +}
>> +
>> +int32_t i3a(void)
>> +{
>> + uint32_t l = 20 + (uint32_t)f();
>> + return l < 20; // f() > -21u
>> +}
>> +
>> +int32_t i3b(void)
>> +{
>> + uint32_t l = 30 + (uint32_t)f();
>> + return l >= 30; // f() <= -31u
>> +}
>> +
>> +int32_t i3c(void)
>> +{
>> + uint32_t l = 40 + (uint32_t)f();
>> + return l > 39; // f() < -39u
>> +}
>> +
>> +/* { dg-final { scan-tree-dump-times "Removing dead stmt:.*? \\+" 4
>> "forwprop1" } } */
>> +/* { dg-final { scan-tree-dump-times "gimple_simplified to.* > 4294967285"
>> 1 "forwprop1" } } */
>> +/* { dg-final { scan-tree-dump-times "gimple_simplified to.* > 4294967275"
>> 1 "forwprop1" } } */
>> +/* { dg-final { scan-tree-dump-times "gimple_simplified to.* <= 4294967265"
>> 1 "forwprop1" } } */
>> +/* { dg-final { scan-tree-dump-times "gimple_simplified to.* <= 4294967255"
>> 1 "forwprop1" } } */
>> diff --git a/gcc/testsuite/gcc.target/aarch64/gtu_to_ltu_cmp_1.c
>> b/gcc/testsuite/gcc.target/aarch64/gtu_to_ltu_cmp_1.c
>> index 81c536c90af..bfcec6719de 100644
>> --- a/gcc/testsuite/gcc.target/aarch64/gtu_to_ltu_cmp_1.c
>> +++ b/gcc/testsuite/gcc.target/aarch64/gtu_to_ltu_cmp_1.c
>> @@ -10,4 +10,4 @@ f1 (int x, int t)
>> return t;
>> }
>>
>> -/* { dg-final { scan-assembler-times "cmn\\tw\[0-9\]+, #2" 1 } } */
>> +/* { dg-final { scan-assembler-times "cmn\\tw\[0-9\]+, #3" 1 } } */
>>
>