Hi!

This patch moves this optimization from fold-const.c to match.pd where it
is actually much shorter to do and lets optimize even code not seen together
in a single expression in the source, as the first step towards fixing the
PR.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for stage1?

2020-04-24  Jakub Jelinek  <ja...@redhat.com>

        PR tree-optimization/94718
        * fold-const.c (fold_binary_loc): Move (X & C) eqne (Y & C)
        -> (X ^ Y) & C eqne 0 optimization to ...
        * match.pd ((X & C) op (Y & C) into (X ^ Y) & C op 0): ... here.

        * gcc.dg/tree-ssa/pr94718-1.c: New test.
        * gcc.dg/tree-ssa/pr94718-2.c: New test.

--- gcc/fold-const.c.jj 2020-04-23 19:50:11.693805797 +0200
+++ gcc/fold-const.c    2020-04-24 18:34:04.245559495 +0200
@@ -11631,50 +11631,6 @@ fold_binary_loc (location_t loc, enum tr
          return omit_one_operand_loc (loc, type, res, arg0);
        }
 
-      /* Fold (X & C) op (Y & C) as (X ^ Y) & C op 0", and symmetries.  */
-      if (TREE_CODE (arg0) == BIT_AND_EXPR
-         && TREE_CODE (arg1) == BIT_AND_EXPR)
-       {
-         tree arg00 = TREE_OPERAND (arg0, 0);
-         tree arg01 = TREE_OPERAND (arg0, 1);
-         tree arg10 = TREE_OPERAND (arg1, 0);
-         tree arg11 = TREE_OPERAND (arg1, 1);
-         tree itype = TREE_TYPE (arg0);
-
-         if (operand_equal_p (arg01, arg11, 0))
-           {
-             tem = fold_convert_loc (loc, itype, arg10);
-             tem = fold_build2_loc (loc, BIT_XOR_EXPR, itype, arg00, tem);
-             tem = fold_build2_loc (loc, BIT_AND_EXPR, itype, tem, arg01);
-             return fold_build2_loc (loc, code, type, tem,
-                                     build_zero_cst (itype));
-           }
-         if (operand_equal_p (arg01, arg10, 0))
-           {
-             tem = fold_convert_loc (loc, itype, arg11);
-             tem = fold_build2_loc (loc, BIT_XOR_EXPR, itype, arg00, tem);
-             tem = fold_build2_loc (loc, BIT_AND_EXPR, itype, tem, arg01);
-             return fold_build2_loc (loc, code, type, tem,
-                                     build_zero_cst (itype));
-           }
-         if (operand_equal_p (arg00, arg11, 0))
-           {
-             tem = fold_convert_loc (loc, itype, arg10);
-             tem = fold_build2_loc (loc, BIT_XOR_EXPR, itype, arg01, tem);
-             tem = fold_build2_loc (loc, BIT_AND_EXPR, itype, tem, arg00);
-             return fold_build2_loc (loc, code, type, tem,
-                                     build_zero_cst (itype));
-           }
-         if (operand_equal_p (arg00, arg10, 0))
-           {
-             tem = fold_convert_loc (loc, itype, arg11);
-             tem = fold_build2_loc (loc, BIT_XOR_EXPR, itype, arg01, tem);
-             tem = fold_build2_loc (loc, BIT_AND_EXPR, itype, tem, arg00);
-             return fold_build2_loc (loc, code, type, tem,
-                                     build_zero_cst (itype));
-           }
-       }
-
       if (TREE_CODE (arg0) == BIT_XOR_EXPR
          && TREE_CODE (arg1) == BIT_XOR_EXPR)
        {
--- gcc/match.pd.jj     2020-04-23 19:50:11.710805542 +0200
+++ gcc/match.pd        2020-04-24 18:34:04.247559465 +0200
@@ -4335,7 +4335,12 @@ (define_operator_list COND_TERNARY
  (simplify
   (cmp (convert? addr@0) integer_zerop)
   (if (tree_single_nonzero_warnv_p (@0, NULL))
-   { constant_boolean_node (cmp == NE_EXPR, type); })))
+   { constant_boolean_node (cmp == NE_EXPR, type); }))
+
+ /* (X & C) op (Y & C) into (X ^ Y) & C op 0.  */
+ (simplify
+  (cmp (bit_and:cs @0 @2) (bit_and:cs @1 @2))
+  (cmp (bit_and (bit_xor @0 @1) @2) { build_zero_cst (TREE_TYPE (@2)); })))
 
 /* If we have (A & C) == C where C is a power of 2, convert this into
    (A & C) != 0.  Similarly for NE_EXPR.  */
--- gcc/testsuite/gcc.dg/tree-ssa/pr94718-1.c.jj        2020-04-24 
17:31:10.392472875 +0200
+++ gcc/testsuite/gcc.dg/tree-ssa/pr94718-1.c   2020-04-24 18:43:35.829119496 
+0200
@@ -0,0 +1,49 @@
+/* PR tree-optimization/94718 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fno-ipa-icf -fdump-tree-optimized" } */
+/* { dg-final { scan-tree-dump-times "= \[xy]_\[0-9]+\\\(D\\\) \\^ 
\[xy]_\[0-9]+\\\(D\\\);" 6 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "\[0-9]+ < 0;" 6 "optimized" } } */
+
+#define I (-__INT_MAX__ - 1)
+
+int
+f1 (int x, int y)
+{
+  return (x & I) != (y & I);
+}
+
+int
+f2 (int x, int y)
+{
+  return (~x & I) != (~y & I);
+}
+
+int
+f3 (int x, int y)
+{
+  return ((x & I) ^ I) != ((y & I) ^ I);
+}
+
+int
+f4 (int x, int y)
+{
+  int s = (x & I);
+  int t = (y & I);
+  return s != t;
+}
+
+int
+f5 (int x, int y)
+{
+  int s = (~x & I);
+  int t = (~y & I);
+  return s != t;
+}
+
+int
+f6 (int x, int y)
+{
+  int s = ((x & I) ^ I);
+  int t = ((y & I) ^ I);
+  return s != t;
+}
--- gcc/testsuite/gcc.dg/tree-ssa/pr94718-2.c.jj        2020-04-24 
17:31:13.673424244 +0200
+++ gcc/testsuite/gcc.dg/tree-ssa/pr94718-2.c   2020-04-24 18:43:44.669988955 
+0200
@@ -0,0 +1,49 @@
+/* PR tree-optimization/94718 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fno-ipa-icf -fdump-tree-optimized" } */
+/* { dg-final { scan-tree-dump-times "= \[xy]_\[0-9]+\\\(D\\\) \\^ 
\[xy]_\[0-9]+\\\(D\\\);" 6 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "\[0-9]+ >= 0;" 6 "optimized" } } */
+
+#define I (-__INT_MAX__ - 1)
+
+int
+f1 (int x, int y)
+{
+  return (x & I) == (y & I);
+}
+
+int
+f2 (int x, int y)
+{
+  return (~x & I) == (~y & I);
+}
+
+int
+f3 (int x, int y)
+{
+  return ((x & I) ^ I) == ((y & I) ^ I);
+}
+
+int
+f4 (int x, int y)
+{
+  int s = (x & I);
+  int t = (y & I);
+  return s == t;
+}
+
+int
+f5 (int x, int y)
+{
+  int s = (~x & I);
+  int t = (~y & I);
+  return s == t;
+}
+
+int
+f6 (int x, int y)
+{
+  int s = ((x & I) ^ I);
+  int t = ((y & I) ^ I);
+  return s == t;
+}

        Jakub

Reply via email to