On 9/27/2024 1:29 PM, Richard Biener wrote:
> On Mon, 23 Sep 2024, Artemiy Volkov wrote:
> 
>> Whenever C1 and C2 are integer constants, X is of a wrapping type, and
>> cmp is a relational operator, the expression X +- C1 cmp C2 can be
>> simplified in the following cases:
>>
>> (a) If cmp is <= and C2 -+ C1 == +INF(1), we can transform the initial
>> comparison in the following way:
>>     X +- C1 <= C2
>>     -INF <= X +- C1 <= C2 (add left hand side which holds for any X, C1)
>>     -INF -+ C1 <= X <= C2 -+ C1 (add -+C1 to all 3 expressions)
>>     -INF -+ C1 <= X <= +INF (due to (1))
>>     -INF -+ C1 <= X (eliminate the right hand side since it holds for any X)
>>
>> (b) By analogy, if cmp if >= and C2 -+ C1 == -INF(1), use the following
>> sequence of transformations:
>>
>>     X +- C1 >= C2
>>     +INF >= X +- C1 >= C2 (add left hand side which holds for any X, C1)
>>     +INF -+ C1 >= X >= C2 -+ C1 (add -+C1 to all 3 expressions)
>>     +INF -+ C1 >= X >= -INF (due to (1))
>>     +INF -+ C1 >= X (eliminate the right hand side since it holds for any X)
>>
>> (c) The > and < cases are negations of (a) and (b), respectively.
>>
>> This transformation allows to occasionally save add / sub instructions,
>> for instance the expression
>>
>> 3 + (uint32_t)f() < 2
>>
>> compiles to
>>
>> cmn     w0, #4
>> cset    w0, ls
>>
>> instead of
>>
>> add     w0, w0, 3
>> cmp     w0, 2
>> cset    w0, ls
>>
>> on aarch64.
>>
>> Testcases that go together with this patch have been split into two
>> separate files, one containing testcases for unsigned variables and the
>> other for wrapping signed ones (and thus compiled with -fwrapv).
>> Additionally, one aarch64 test has been adjusted since the patch has
>> caused the generated code to change from
>>
>> cmn     w0, #2
>> csinc   w0, w1, wzr, cc   (x < -2)
>>
>> to
>>
>> cmn     w0, #3
>> csinc   w0, w1, wzr, cs   (x <= -3)
>>
>> This patch has been bootstrapped and regtested on aarch64, x86_64, and
>> i386, and additionally regtested on riscv32.
>>
>> gcc/ChangeLog:
>>
>>          PR tree-optimization/116024
>>          * match.pd: New transformation around integer comparison.
>>
>> gcc/testsuite/ChangeLog:
>>
>>          * gcc.dg/tree-ssa/pr116024-2.c: New test.
>>          * gcc.dg/tree-ssa/pr116024-2-fwrapv.c: Ditto.
>>          * gcc.target/aarch64/gtu_to_ltu_cmp_1.c: Adjust.
>>
>> Signed-off-by: Artemiy Volkov <arte...@synopsys.com>
>> ---
>>   gcc/match.pd                                  | 44 ++++++++++++++++++-
>>   .../gcc.dg/tree-ssa/pr116024-2-fwrapv.c       | 38 ++++++++++++++++
>>   gcc/testsuite/gcc.dg/tree-ssa/pr116024-2.c    | 38 ++++++++++++++++
>>   .../gcc.target/aarch64/gtu_to_ltu_cmp_1.c     |  2 +-
>>   4 files changed, 120 insertions(+), 2 deletions(-)
>>   create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr116024-2-fwrapv.c
>>   create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr116024-2.c
>>
>> diff --git a/gcc/match.pd b/gcc/match.pd
>> index bf3b4a2e3fe..3275a69252f 100644
>> --- a/gcc/match.pd
>> +++ b/gcc/match.pd
>> @@ -8896,6 +8896,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>>          (cmp @0 { TREE_OVERFLOW (res)
>>               ? drop_tree_overflow (res) : res; }))))))))
>>   (for cmp (lt le gt ge)
>> +     rcmp (gt ge lt le)
>>    (for op (plus minus)
>>         rop (minus plus)
>>     (simplify
>> @@ -8923,7 +8924,48 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>>                                "X cmp C2 -+ C1"),
>>                               WARN_STRICT_OVERFLOW_COMPARISON);
>>      }
>> -    (cmp @0 { res; })))))))))
>> +    (cmp @0 { res; })))))
>> +/* For wrapping types, simplify the following cases of X +- C1 CMP C2:
>> +
>> +   (a) If CMP is <= and C2 -+ C1 == +INF (1), simplify to X >= -INF -+ C1
>> +       by observing the following:
>> +
>> +    X +- C1 <= C2
>> +  ==>  -INF <= X +- C1 <= C2 (add left hand side which holds for any X, C1)
>> +  ==>  -INF -+ C1 <= X <= C2 -+ C1 (add -+C1 to all 3 expressions)
>> +  ==>  -INF -+ C1 <= X <= +INF (due to (1))
>> +  ==>  -INF -+ C1 <= X (eliminate the right hand side since it holds for 
>> any X)
>> +
>> +    (b) Similarly, if CMP is >= and C2 -+ C1 == -INF (1):
>> +
>> +    X +- C1 >= C2
>> +  ==>  +INF >= X +- C1 >= C2 (add left hand side which holds for any X, C1)
>> +  ==>  +INF -+ C1 >= X >= C2 -+ C1 (add -+C1 to all 3 expressions)
>> +  ==>  +INF -+ C1 >= X >= -INF (due to (1))
>> +  ==>  +INF -+ C1 >= X (eliminate the right hand side since it holds for 
>> any X)
>> +
>> +    (c) The > and < cases are negations of (a) and (b), respectively.  */
>> +   (if (TYPE_OVERFLOW_WRAPS (TREE_TYPE (@0)))
> 
> This only works for signed types, right?  So add a !TYPE_UNSIGNED check.

Nope, unlike the one in patch #3, this transformation is intended to 
work for both unsigned and signed wrapping types, so in this case 
there's no need for the check.

> 
>> +     (with
>> +       {
>> +    wide_int max = wi::max_value (TREE_TYPE (@0));
>> +    wide_int min = wi::min_value (TREE_TYPE (@0));
>> +
>> +    wide_int c2 = rop == PLUS_EXPR
>> +                          ? wi::add (wi::to_wide (@2), wi::to_wide (@1))
>> +                          : wi::sub (wi::to_wide (@2), wi::to_wide (@1));
>> +    }
>> +    (if (((cmp == LE_EXPR || cmp == GT_EXPR) && wi::eq_p (c2, max))
>> +        || ((cmp == LT_EXPR || cmp == GE_EXPR) && wi::eq_p (c2, min)))
>> +      (with
>> +       {
>> +         wide_int c1 = rop == PLUS_EXPR
>> +                       ? wi::add (wi::bit_not (c2), wi::to_wide (@1))
>> +                       : wi::sub (wi::bit_not (c2), wi::to_wide (@1));
>> +         tree c1_cst = build_uniform_cst (TREE_TYPE (@0),
>> +                            wide_int_to_tree (TREE_TYPE (@0), c1));
> 
> You can drop build_uniform_cst, we're not dealing with vectors here.

OK, will remove before resending.

Thanks,
Artemiy

> 
> OK with those changes.
> 
> Thanks,
> Richard.
> 
>> +       }
>> +       (rcmp @0 { c1_cst; })))))))))
>>   
>>   /* Invert sign of X in comparisons of the form C1 - X CMP C2.  */
>>   
>> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr116024-2-fwrapv.c 
>> b/gcc/testsuite/gcc.dg/tree-ssa/pr116024-2-fwrapv.c
>> new file mode 100644
>> index 00000000000..b9e88ba79fc
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr116024-2-fwrapv.c
>> @@ -0,0 +1,38 @@
>> +/* PR tree-optimization/116024 */
>> +/* { dg-do compile } */
>> +/* { dg-options "-O1 -fdump-tree-forwprop1-details -fwrapv" } */
>> +
>> +#include <stdint.h>
>> +#include <limits.h>
>> +
>> +uint32_t f(void);
>> +
>> +int32_t i3(void)
>> +{
>> +  int32_t l = -10 + (int32_t)f();
>> +  return l <= INT32_MAX - 10; // f() >= INT32_MIN + 10
>> +}
>> +
>> +int32_t i3a(void)
>> +{
>> +  int32_t l = -20 + (int32_t)f();
>> +  return l < INT32_MAX - 19; // f() > INT32_MAX + 20
>> +}
>> +
>> +int32_t i3b(void)
>> +{
>> +  int32_t l = 30 + (int32_t)f();
>> +  return l >= INT32_MIN + 30; // f() <= INT32_MAX - 30
>> +}
>> +
>> +int32_t i3c(void)
>> +{
>> +  int32_t l = 40 + (int32_t)f();
>> +  return l > INT32_MIN + 39; // f() < INT32_MIN - 40
>> +}
>> +
>> +/* { dg-final { scan-tree-dump-times "Removing dead stmt:.*? \\+" 4 
>> "forwprop1" } } */
>> +/* { dg-final { scan-tree-dump-times "gimple_simplified to.* >= 
>> -2147483638" 1 "forwprop1" } } */
>> +/* { dg-final { scan-tree-dump-times "gimple_simplified to.* >= 
>> -2147483628" 1 "forwprop1" } } */
>> +/* { dg-final { scan-tree-dump-times "gimple_simplified to.* <= 2147483617" 
>> 1 "forwprop1" } } */
>> +/* { dg-final { scan-tree-dump-times "gimple_simplified to.* <= 2147483607" 
>> 1 "forwprop1" } } */
>> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr116024-2.c 
>> b/gcc/testsuite/gcc.dg/tree-ssa/pr116024-2.c
>> new file mode 100644
>> index 00000000000..07b637fab40
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr116024-2.c
>> @@ -0,0 +1,38 @@
>> +/* PR tree-optimization/116024 */
>> +/* { dg-do compile } */
>> +/* { dg-options "-O1 -fdump-tree-forwprop1-details" } */
>> +
>> +#include <stdint.h>
>> +
>> +uint32_t f(void);
>> +
>> +int32_t i3(void)
>> +{
>> +  uint32_t l = 2;
>> +  l = 10 + (uint32_t)f();
>> +  return l <= 9; // f() >= -10u
>> +}
>> +
>> +int32_t i3a(void)
>> +{
>> +  uint32_t l = 20 + (uint32_t)f();
>> +  return l < 20; // f() > -21u
>> +}
>> +
>> +int32_t i3b(void)
>> +{
>> +  uint32_t l = 30 + (uint32_t)f();
>> +  return l >= 30; // f() <= -31u
>> +}
>> +
>> +int32_t i3c(void)
>> +{
>> +  uint32_t l = 40 + (uint32_t)f();
>> +  return l > 39; // f() < -39u
>> +}
>> +
>> +/* { dg-final { scan-tree-dump-times "Removing dead stmt:.*? \\+" 4 
>> "forwprop1" } } */
>> +/* { dg-final { scan-tree-dump-times "gimple_simplified to.* > 4294967285" 
>> 1 "forwprop1" } } */
>> +/* { dg-final { scan-tree-dump-times "gimple_simplified to.* > 4294967275" 
>> 1 "forwprop1" } } */
>> +/* { dg-final { scan-tree-dump-times "gimple_simplified to.* <= 4294967265" 
>> 1 "forwprop1" } } */
>> +/* { dg-final { scan-tree-dump-times "gimple_simplified to.* <= 4294967255" 
>> 1 "forwprop1" } } */
>> diff --git a/gcc/testsuite/gcc.target/aarch64/gtu_to_ltu_cmp_1.c 
>> b/gcc/testsuite/gcc.target/aarch64/gtu_to_ltu_cmp_1.c
>> index 81c536c90af..bfcec6719de 100644
>> --- a/gcc/testsuite/gcc.target/aarch64/gtu_to_ltu_cmp_1.c
>> +++ b/gcc/testsuite/gcc.target/aarch64/gtu_to_ltu_cmp_1.c
>> @@ -10,4 +10,4 @@ f1 (int x, int t)
>>     return t;
>>   }
>>   
>> -/* { dg-final { scan-assembler-times "cmn\\tw\[0-9\]+, #2" 1 } } */
>> +/* { dg-final { scan-assembler-times "cmn\\tw\[0-9\]+, #3" 1 } } */
>>
> 

Reply via email to