This patch adds support to the pattern for `(A == B) &/| (A CMP C)`
where the second A could be casted to a different type.
Some were handled correctly if using seperate `if` statements
but not if combined with BIT_AND/BIT_IOR.
In the case of pr111456-1.c, the testcase would pass if
`--param=logical-op-non-short-circuit=0` was used but now
can be optimized always.

OK? Bootstrapped and tested on x86_64-linux-gnu.

        PR tree-optimization/106164
        PR tree-optimization/111456

gcc/ChangeLog:

        * match.pd (`(A ==/!= B) & (A CMP C)`):
        Support an optional cast on the second A.
        (`(A ==/!= B) | (A CMP C)`): Likewise.

gcc/testsuite/ChangeLog:

        * gcc.dg/tree-ssa/cmpbit-6.c: New test.
        * gcc.dg/tree-ssa/cmpbit-7.c: New test.
        * gcc.dg/tree-ssa/pr111456-1.c: New test.
---
 gcc/match.pd                               | 76 +++++++++++++---------
 gcc/testsuite/gcc.dg/tree-ssa/cmpbit-6.c   | 22 +++++++
 gcc/testsuite/gcc.dg/tree-ssa/cmpbit-7.c   | 28 ++++++++
 gcc/testsuite/gcc.dg/tree-ssa/pr111456-1.c | 43 ++++++++++++
 4 files changed, 139 insertions(+), 30 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/cmpbit-6.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/cmpbit-7.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr111456-1.c

diff --git a/gcc/match.pd b/gcc/match.pd
index a37af05f873..0bf91bde486 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -2973,7 +2973,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
        && TYPE_OVERFLOW_WRAPS (TREE_TYPE (@1)))
   (gt @0 (minus @1 { build_int_cst (TREE_TYPE (@1), 1); }))))
 
-/* Convert (X == CST1) && (X OP2 CST2) to a known value
+/* Convert (X == CST1) && ((other)X OP2 CST2) to a known value
    based on CST1 OP2 CST2.  Similarly for (X != CST1).  */
 /* Convert (X == Y) && (X OP2 Y) to a known value if X is an integral type.
    Similarly for (X != Y).  */
@@ -2981,26 +2981,30 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 (for code1 (eq ne)
  (for code2 (eq ne lt gt le ge)
   (simplify
-   (bit_and:c (code1@3 @0 @1) (code2@4 @0 @2))
+   (bit_and:c (code1:c@3 @0 @1) (code2:c@4 (convert?@c0 @0) @2))
    (if ((TREE_CODE (@1) == INTEGER_CST
         && TREE_CODE (@2) == INTEGER_CST)
        || ((INTEGRAL_TYPE_P (TREE_TYPE (@1))
             || POINTER_TYPE_P (TREE_TYPE (@1)))
-           && operand_equal_p (@1, @2)))
+           && bitwise_equal_p (@1, @2)))
     (with
      {
       bool one_before = false;
       bool one_after = false;
       int cmp = 0;
+      bool allbits = true;
       if (TREE_CODE (@1) == INTEGER_CST
          && TREE_CODE (@2) == INTEGER_CST)
        {
-         cmp = tree_int_cst_compare (@1, @2);
+         allbits = TYPE_PRECISION (TREE_TYPE (@1)) <= TYPE_PRECISION 
(TREE_TYPE (@2));
+         auto t1 = wi::to_wide (fold_convert (TREE_TYPE (@2), @1));
+         auto t2 = wi::to_wide (@2);
+         cmp = wi::cmp (t1, t2, TYPE_SIGN (TREE_TYPE (@2)));
          if (cmp < 0
-             && wi::to_wide (@1) == wi::to_wide (@2) - 1)
+             && t1 == t2 - 1)
            one_before = true;
          if (cmp > 0
-             && wi::to_wide (@1) == wi::to_wide (@2) + 1)
+             && t1 == t2 + 1)
            one_after = true;
        }
       bool val;
@@ -3018,25 +3022,29 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
      (switch
       (if (code1 == EQ_EXPR && val) @3)
       (if (code1 == EQ_EXPR && !val) { constant_boolean_node (false, type); })
-      (if (code1 == NE_EXPR && !val) @4)
+      (if (code1 == NE_EXPR && !val && allbits) @4)
       (if (code1 == NE_EXPR
            && code2 == GE_EXPR
-          && cmp == 0)
-       (gt @0 @1))
+          && cmp == 0
+          && allbits)
+       (gt @c0 (convert @1)))
       (if (code1 == NE_EXPR
            && code2 == LE_EXPR
-          && cmp == 0)
-       (lt @0 @1))
+          && cmp == 0
+          && allbits)
+       (lt @c0 (convert @1)))
       /* (a != (b+1)) & (a > b) -> a > (b+1) */
       (if (code1 == NE_EXPR
            && code2 == GT_EXPR
-          && one_after)
-       (gt @0 @1))
+          && one_after
+          && allbits)
+       (gt @c0 (convert @1)))
       /* (a != (b-1)) & (a < b) -> a < (b-1) */
       (if (code1 == NE_EXPR
            && code2 == LT_EXPR
-          && one_before)
-       (lt @0 @1))
+          && one_before
+          && allbits)
+       (lt @c0 (convert @1)))
      )
     )
    )
@@ -3100,26 +3108,30 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 (for code1 (eq ne)
  (for code2 (eq ne lt gt le ge)
   (simplify
-   (bit_ior:c (code1@3 @0 @1) (code2@4 @0 @2))
+   (bit_ior:c (code1:c@3 @0 @1) (code2:c@4 (convert?@c0 @0) @2))
    (if ((TREE_CODE (@1) == INTEGER_CST
         && TREE_CODE (@2) == INTEGER_CST)
        || ((INTEGRAL_TYPE_P (TREE_TYPE (@1))
            || POINTER_TYPE_P (TREE_TYPE (@1)))
-           && operand_equal_p (@1, @2)))
+           && bitwise_equal_p (@1, @2)))
     (with
      {
       bool one_before = false;
       bool one_after = false;
       int cmp = 0;
+      bool allbits = true;
       if (TREE_CODE (@1) == INTEGER_CST
          && TREE_CODE (@2) == INTEGER_CST)
        {
-         cmp = tree_int_cst_compare (@1, @2);
+         allbits = TYPE_PRECISION (TREE_TYPE (@1)) <= TYPE_PRECISION 
(TREE_TYPE (@2));
+         auto t1 = wi::to_wide (fold_convert (TREE_TYPE (@2), @1));
+         auto t2 = wi::to_wide (@2);
+         cmp = wi::cmp (t1, t2, TYPE_SIGN (TREE_TYPE (@2)));
          if (cmp < 0
-             && wi::to_wide (@1) == wi::to_wide (@2) - 1)
+             && t1 == t2 - 1)
            one_before = true;
          if (cmp > 0
-             && wi::to_wide (@1) == wi::to_wide (@2) + 1)
+             && t1 == t2 + 1)
            one_after = true;
        }
       bool val;
@@ -3136,26 +3148,30 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
      }
      (switch
       (if (code1 == EQ_EXPR && val) @4)
-      (if (code1 == NE_EXPR && val) { constant_boolean_node (true, type); })
-      (if (code1 == NE_EXPR && !val) @3)
+      (if (code1 == NE_EXPR && val && allbits) { constant_boolean_node (true, 
type); })
+      (if (code1 == NE_EXPR && !val && allbits) @3)
       (if (code1 == EQ_EXPR
            && code2 == GT_EXPR
-          && cmp == 0)
-       (ge @0 @1))
+          && cmp == 0
+          && allbits)
+       (ge @c0 @2))
       (if (code1 == EQ_EXPR
            && code2 == LT_EXPR
-          && cmp == 0)
-       (le @0 @1))
+          && cmp == 0
+          && allbits)
+       (le @c0 @2))
       /* (a == (b-1)) | (a >= b) -> a >= (b-1) */
       (if (code1 == EQ_EXPR
            && code2 == GE_EXPR
-          && one_before)
-       (ge @0 @1))
+          && one_before
+          && allbits)
+       (ge @c0 (convert @1)))
       /* (a == (b+1)) | (a <= b) -> a <= (b-1) */
       (if (code1 == EQ_EXPR
            && code2 == LE_EXPR
-          && one_after)
-       (le @0 @1))
+          && one_after
+          && allbits)
+       (le @c0 (convert @1)))
      )
     )
    )
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/cmpbit-6.c 
b/gcc/testsuite/gcc.dg/tree-ssa/cmpbit-6.c
new file mode 100644
index 00000000000..4ec73f030bb
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/cmpbit-6.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+/* PR tree-optimization/106164 */
+/* PR tree-optimization/111456 */
+
+_Bool iu(int a)
+{
+  _Bool t = a == 0;
+  unsigned t1 = a;
+  _Bool t2 = t1 >= 3;
+  return t & t2;
+}
+
+_Bool is(int a)
+{
+  _Bool t = a == 0;
+  short t1 = a;
+  _Bool t2 = t1 >= 3;
+  return t & t2;
+}
+
+/* { dg-final { scan-tree-dump-times "return 0" 2 "optimized" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/cmpbit-7.c 
b/gcc/testsuite/gcc.dg/tree-ssa/cmpbit-7.c
new file mode 100644
index 00000000000..ee04ebbaaed
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/cmpbit-7.c
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-forwprop1" } */
+/* PR tree-optimization/106164 */
+/* PR tree-optimization/111456 */
+
+_Bool f(int a)
+{
+        _Bool t = a == 3;
+        unsigned t1 = a;
+        _Bool t2 = t1 >= 3;
+        return t | t2;
+}
+
+/* Should be able to optimize down to just `a > 2` during forwprop1 */
+/* { dg-final { scan-tree-dump-not "a_\[0-9\]+.D. == 3" "forwprop1" } } */
+
+_Bool f1(int b)
+{
+        _Bool t = b == 3;
+        short t1 = b;
+        _Bool t2 = t1 >= 3;
+        return t | t2;
+}
+
+/* Should be able to optimize down to just `a > 2` during forwprop1 as 
`((short)a) >= 3` is
+   true already when `a == 3`. */
+/* { dg-final { scan-tree-dump-not "b_\[0-9\]+.D. == 3" "forwprop1" } } */
+/* { dg-final { scan-tree-dump-times "_\[0-9\]+ > 2" 2 "forwprop1" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr111456-1.c 
b/gcc/testsuite/gcc.dg/tree-ssa/pr111456-1.c
new file mode 100644
index 00000000000..8a2f730b387
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr111456-1.c
@@ -0,0 +1,43 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+/* PR tree-optimization/111456 */
+
+void foo(void);
+static int i;
+static int *j = &i;
+static char l;
+static void(a)(char) {}
+static short(b)(short c, short d) { return c - d; }
+static short(e)(short f, int g) {
+    return f < 0 || g < 0 || g >= 32 ? f : f >> g;
+}
+static short(h)(short f, int g) { return g >= 2 ?: f >> g; }
+static char k(char m, short n) {
+    short o;
+    int *p = &i;
+    if (!(((m) >= 1) && ((m) <= 1))) {
+        __builtin_unreachable();
+    }
+    o = e(i, i);
+    if (h(1, o))
+        ;
+    else {
+        m = 0;
+        for (; m >= -20; m = b(m, 9))
+            if (a(i), n) {
+                if (*p)
+                    ;
+                else
+                    foo();
+                ;
+            } else
+                return l;
+    }
+    return i;
+}
+int main() { k(0 <= 0 > *j, i); }
+
+
+/* { dg-final { scan-tree-dump-not "foo " "optimized" } } */
+/* { dg-final { scan-tree-dump "return 0;" "optimized" } } */
+
-- 
2.31.1

Reply via email to