Hi!

The following patch generalizes the x ? 1 : 0 -> (int) x optimization
to handle also left shifts by constant.

During x86_64-linux and i686-linux bootstraps + regtests it triggered
in 1514 unique non-LTO -m64 cases (sort -u on log mentioning
filename, function name and shift count) and 1866 -m32 cases.

Unfortunately, the patch regresses:
+FAIL: gcc.dg/tree-ssa/ssa-ccp-11.c scan-tree-dump-times optimized "if " 0
+FAIL: gcc.dg/vect/bb-slp-pattern-2.c -flto -ffat-lto-objects  
scan-tree-dump-times slp1 "optimized: basic block" 1
+FAIL: gcc.dg/vect/bb-slp-pattern-2.c scan-tree-dump-times slp1 "optimized: 
basic block" 1
and in both cases it actually results in worse code.

In ssa-ccp-11.c since phiopt2 it results in smaller IL due to the
optimization, e.g.
-  if (_1 != 0)
-    goto <bb 5>; [21.72%]
-  else
-    goto <bb 6>; [78.28%]
-
-  <bb 5> [local count: 233216728]:
-
-  <bb 6> [local count: 1073741824]:
-  # _4 = PHI <2(5), 0(4)>
-  return _4;
+  _7 = (int) _1;
+  _8 = _7 << 1;
+  return _8;
but dom2 actually manages to optimize it only without this optimization:
-  # a_7 = PHI <0(3), 1(2)>
-  # b_8 = PHI <1(3), 0(2)>
-  _9 = a_7 & b_8;
-  return 0;
+  # a_2 = PHI <1(2), 0(3)>
+  # b_3 = PHI <0(2), 1(3)>
+  _1 = a_2 & b_3;
+  _7 = (int) _1;
+  _8 = _7 << 1;
+  return _8;
We'd need some optimization that would go through all PHI edges and
compute if some use of the phi results don't actually compute a constant
across all the PHI edges - 1 & 0 and 0 & 1 is always 0.  Similarly in the
other function
+  # a_1 = PHI <3(2), 2(3)>
+  # b_2 = PHI <2(2), 3(3)>
+  c_5 = a_1 + b_2;
is always c_5 = 5;
Similarly, in the slp vectorization test there is:
     a[0] = b[0] ? 1 : 7;
     a[1] = b[1] ? 2 : 0;
     a[2] = b[2] ? 3 : 0;
     a[3] = b[3] ? 4 : 0;
     a[4] = b[4] ? 5 : 0;
     a[5] = b[5] ? 6 : 0;
     a[6] = b[6] ? 7 : 0;
     a[7] = b[7] ? 8 : 0;
and obviously if the ? 2 : 0 and ? 4 : 0 and ? 8 : 0 are optimized
into shifts, it doesn't match anymore.

So, I wonder if we e.g. shouldn't perform this optimization only in the last
phiopt pass (i.e. change the bool early argument to int late where it would
be 0 (early), 1 (late) and 2 (very late) and perform this only if very late.

Thoughts on this?

2020-11-03  Jakub Jelinek  <ja...@redhat.com>

        PR tree-optimization/97690
        * tree-ssa-phiopt.c (conditional_replacement): Also optimize
        cond ? pow2p_cst : 0 as ((type) cond) << cst.

        * gcc.dg/tree-ssa/phi-opt-22.c: New test.

--- gcc/tree-ssa-phiopt.c.jj    2020-10-22 09:36:25.602484491 +0200
+++ gcc/tree-ssa-phiopt.c       2020-11-03 17:59:18.133662581 +0100
@@ -752,7 +752,9 @@ conditional_replacement (basic_block con
   gimple_stmt_iterator gsi;
   edge true_edge, false_edge;
   tree new_var, new_var2;
-  bool neg;
+  bool neg = false;
+  int shift = 0;
+  tree nonzero_arg;
 
   /* FIXME: Gimplification of complex type is too hard for now.  */
   /* We aren't prepared to handle vectors either (and it is a question
@@ -763,14 +765,22 @@ conditional_replacement (basic_block con
           || POINTER_TYPE_P (TREE_TYPE (arg1))))
     return false;
 
-  /* The PHI arguments have the constants 0 and 1, or 0 and -1, then
-     convert it to the conditional.  */
-  if ((integer_zerop (arg0) && integer_onep (arg1))
-      || (integer_zerop (arg1) && integer_onep (arg0)))
-    neg = false;
-  else if ((integer_zerop (arg0) && integer_all_onesp (arg1))
-          || (integer_zerop (arg1) && integer_all_onesp (arg0)))
+  /* The PHI arguments have the constants 0 and 1, or 0 and -1 or
+     0 and (1 << cst), then convert it to the conditional.  */
+  if (integer_zerop (arg0))
+    nonzero_arg = arg1;
+  else if (integer_zerop (arg1))
+    nonzero_arg = arg0;
+  else
+    return false;
+  if (integer_all_onesp (nonzero_arg))
     neg = true;
+  else if (integer_pow2p (nonzero_arg))
+    {
+      shift = tree_log2 (nonzero_arg);
+      if (shift && POINTER_TYPE_P (TREE_TYPE (nonzero_arg)))
+       return false;
+    }
   else
     return false;
 
@@ -782,12 +792,12 @@ conditional_replacement (basic_block con
      falls through into BB.
 
      There is a single PHI node at the join point (BB) and its arguments
-     are constants (0, 1) or (0, -1).
+     are constants (0, 1) or (0, -1) or (0, (1 << shift)).
 
      So, given the condition COND, and the two PHI arguments, we can
      rewrite this PHI into non-branching code:
 
-       dest = (COND) or dest = COND'
+       dest = (COND) or dest = COND' or dest = (COND) << shift
 
      We use the condition as-is if the argument associated with the
      true edge has the value one or the argument associated with the
@@ -822,6 +832,14 @@ conditional_replacement (basic_block con
       cond = fold_build1_loc (gimple_location (stmt),
                               NEGATE_EXPR, TREE_TYPE (cond), cond);
     }
+  else if (shift)
+    {
+      cond = fold_convert_loc (gimple_location (stmt),
+                              TREE_TYPE (result), cond);
+      cond = fold_build2_loc (gimple_location (stmt),
+                             LSHIFT_EXPR, TREE_TYPE (cond), cond,
+                             build_int_cst (integer_type_node, shift));
+    }
 
   /* Insert our new statements at the end of conditional block before the
      COND_STMT.  */
--- gcc/testsuite/gcc.dg/tree-ssa/phi-opt-22.c.jj       2020-11-03 
18:22:19.756124543 +0100
+++ gcc/testsuite/gcc.dg/tree-ssa/phi-opt-22.c  2020-11-03 18:25:12.795176885 
+0100
@@ -0,0 +1,11 @@
+/* PR tree-optimization/97690 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-phiopt2" } */
+
+int foo (_Bool d) { return d ? 2 : 0; }
+int bar (_Bool d) { return d ? 1 : 0; }
+int baz (_Bool d) { return d ? -__INT_MAX__ - 1 : 0; }
+int qux (_Bool d) { return d ? 1024 : 0; }
+
+/* { dg-final { scan-tree-dump-not "if" "phiopt2" } } */
+/* { dg-final { scan-tree-dump-times " << " 3 "phiopt2" } } */

        Jakub

Reply via email to