[PATCH 3/3]Improve induction variable elimination

Bin Cheng Thu, 17 Jul 2014 02:10:31 -0700

Hi,
Function iv_elimination_compare_lt is used to eliminate induction variable
when the loop's latch could run for zero time (i.e., may_be_zero in loop
niter information evaluates to true).  As stated in the first message, it
only handles very specific case that rarely happens for either GCC bootstrap
or spec2k/spec2k6 compilation.  The function has two restrictions which
could be improved:
  a) When checking that candidate iv doesn't overflow, it only handles
candidates that are computed in a type that guarantees no overflows.  More
complex analysis can be used to prove the non-overflow ness,  as in this
patch.
  b) The function only handles the original form of may_be_zero like "a + 1
> b", but that expression could have been folded into other forms.  This
patch handles three folded forms and does iv elimination as well.  I think
this isn't a very corner case, because for many loops iterating from "0"
(i.e., we have "a == 0"), the expression will be folded.


I also refactored period check from may_eliminate_iv into a single function
so that it can be reused.

Thanks,
bin


2014-07-17  Bin Cheng  <bin.ch...@arm.com>

        * tree-ssa-loop-ivopts.c (iv_nowrap_period)
        (nowrap_cand_for_loop_niter_p): New functions.
        (period_greater_niter_exit): New function refactored from
        may_eliminate_iv.
        (iv_elimination_compare_lt): New parameter.  Check wrapping
        behavior for candidate of wrapping type.  Handle folded forms
        of may_be_zero expression.
        (may_eliminate_iv): Call period_greater_niter_exit.  Pass new
        argument for iv_elimination_compare_lt.

gcc/testsuite/ChangeLog
2014-07-17  Bin Cheng  <bin.ch...@arm.com>

        * gcc.dg/tree-ssa/ivopts-lt-3.c: New test.
        * gcc.dg/tree-ssa/ivopts-lt-4.c: New test.

Index: gcc/tree-ssa-loop-ivopts.c
===================================================================
--- gcc/tree-ssa-loop-ivopts.c  (revision 212387)
+++ gcc/tree-ssa-loop-ivopts.c  (working copy)
@@ -4432,6 +4432,44 @@ iv_period (struct iv *iv)
   return period;
 }
 
+/* Returns no wrapping period of induction variable IV.  For now
+   only unsigned type IV is handled, we could extend it in case
+   of non-overflow for signed ones.  Return zero if it can't be
+   decided.  */
+
+static tree
+iv_nowrap_period (struct iv *iv)
+{
+  bool overflow;
+  tree type;
+  tree base = iv->base, step = iv->step;
+  widest_int base_val, step_val, max_val, span, period;
+
+  gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
+
+  type = TREE_TYPE (base);
+  if (!TYPE_UNSIGNED (type) || TREE_CODE (base) != INTEGER_CST)
+    return integer_zero_node;
+
+  base_val = wi::to_widest (base);
+  step_val = wi::to_widest (step);
+  if (!POINTER_TYPE_P (type) && TYPE_MAX_VALUE (type)
+      && TREE_CODE (TYPE_MAX_VALUE (type)) == INTEGER_CST)
+    max_val = wi::to_widest (TYPE_MAX_VALUE (type));
+  else
+    {
+      wide_int max_wi = wi::max_value (TYPE_PRECISION (type), UNSIGNED);
+      max_val = wi::to_widest (wide_int_to_tree (type, max_wi));
+    }
+
+  span = max_val - base_val + step_val - 1;
+  period = wi::div_trunc (span, step_val, UNSIGNED, &overflow);
+  if (overflow)
+    return integer_zero_node;
+
+  return wide_int_to_tree (type, period);
+}
+
 /* Returns the comparison operator used when eliminating the iv USE.  */
 
 static enum tree_code
@@ -4560,7 +4598,84 @@ difference_cannot_overflow_p (tree base, tree offs
     }
 }
 
-/* Tries to replace loop exit by one formulated in terms of a LT_EXPR
+/* Check whether PERIOD of CAND is greater than the number of iterations
+   described by DESC for which the exit condition is true.  The exit
+   condition is comparison against USE.  */
+
+static bool
+period_greater_niter_exit (struct ivopts_data *data,
+                          struct iv_use *use, struct iv_cand *cand,
+                          tree period, struct tree_niter_desc *desc)
+{
+  struct loop *loop = data->current_loop;
+
+  /* If the number of iterations is constant, compare against it directly.  */
+  if (TREE_CODE (desc->niter) == INTEGER_CST)
+    {
+      /* See cand_value_at.  */
+      if (stmt_after_increment (loop, cand, use->stmt))
+        {
+          if (!tree_int_cst_lt (desc->niter, period))
+            return false;
+        }
+      else
+        {
+          if (tree_int_cst_lt (period, desc->niter))
+            return false;
+        }
+    }
+
+  /* If not, and if this is the only possible exit of the loop, see whether
+     we can get a conservative estimate on the number of iterations of the
+     entire loop and compare against that instead.  */
+  else
+    {
+      widest_int period_value, max_niter;
+
+      max_niter = desc->max;
+      if (stmt_after_increment (loop, cand, use->stmt))
+        max_niter += 1;
+      period_value = wi::to_widest (period);
+      if (wi::gtu_p (max_niter, period_value))
+        {
+          /* See if we can take advantage of inferred loop bound information.  
*/
+          if (data->loop_single_exit_p)
+            {
+              if (!max_loop_iterations (loop, &max_niter))
+                return false;
+              /* The loop bound is already adjusted by adding 1.  */
+              if (wi::gtu_p (max_niter, period_value))
+                return false;
+            }
+          else
+            return false;
+        }
+    }
+
+  return true;
+}
+
+/* Determine whether no wrapping period of CAND is greater than
+   the number of iterations described by DESC for which exit
+   conditionis true.  The exit condition is comparison against USE.  */
+
+static bool
+nowrap_cand_for_loop_niter_p (struct ivopts_data *data,
+                             struct iv_use *use,
+                             struct iv_cand *cand,
+                             struct tree_niter_desc *desc)
+{
+  tree period;
+  
+  period = iv_nowrap_period (cand->iv);
+  if (period != integer_zero_node
+      && period_greater_niter_exit (data, use, cand, period, desc))
+    return true;
+
+  return false;
+}
+
+/* Tries to replace loop exit in USE by one formulated in terms of a LT_EXPR
    comparison with CAND.  NITER describes the number of iterations of
    the loops.  If successful, the comparison in COMP_P is altered accordingly.
 
@@ -4597,11 +4712,18 @@ difference_cannot_overflow_p (tree base, tree offs
    1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is 
no
       overflow in computing it or the values of p.
    2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
-      overflow.  To prove this, we use the fact that p_0 = base + a.  */
+      overflow.  To prove this, we use the fact that p_0 = base + a.
 
+
+   Moreover, with more complex overflow analysis for unsigned type, it is
+   possible to eliminate I with P if P is an induction candidate of unsigned
+   integer type other than pointer if we can prove that P doesn't  overflow
+   during all iterations of current loop.  Also special forms of MAY_BE_ZERO
+   in NITER is checked because "a + 1 > b" could be folded.  */
+
 static bool
-iv_elimination_compare_lt (struct ivopts_data *data,
-                           struct iv_cand *cand, enum tree_code *comp_p,
+iv_elimination_compare_lt (struct ivopts_data *data, struct iv_use *use,
+                          struct iv_cand *cand, enum tree_code *comp_p,
                           struct tree_niter_desc *niter)
 {
   tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
@@ -4610,11 +4732,13 @@ static bool
   HOST_WIDE_INT step;
 
   /* We need to know that the candidate induction variable does not overflow.
-     While more complex analysis may be used to prove this, for now just
-     check that the variable appears in the original program and that it
-     is computed in a type that guarantees no overflows.  */
+     Apart from checking that the variable appears in the original program
+     and that is is computed in a type that guarantees no overflows, we also
+     check no wrapping periord of the variable covers the niter if it has
+     unsigned type.  More complex analysis may be used to prove this.  */
   cand_type = TREE_TYPE (cand->iv->base);
-  if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
+  if ((cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
+      && !nowrap_cand_for_loop_niter_p (data, use, cand, niter))
     return false;
 
   /* Make sure that the loop iterates till the loop bound is hit, as otherwise
@@ -4657,6 +4781,67 @@ static bool
       else
        return false;
     }
+  else if (TREE_CODE (mbz) == EQ_EXPR)
+    {
+      /* Check whether may_be_zero is in below three forms:
+          1) b' == TYPE_MAX_VALUE (TREE_TYPE (b'))
+             where b' is of type nit_type.  The expression could be
+             folded from "b' + 1 < 1".  In this case, A is "0" and B
+             is "b' + 1".
+          2) b == 0
+             where b is of type nit_type.  The expression could be
+             folded from "b < 1".  In this case, A is "0" and B is
+             "b".
+          3) b' == -1
+             where b' is of signed type which has nit_type as its
+             unsigned counterpart.  The expression could be folded
+             from "(nit_type)b' + 1 < 1".  In this case, A is "0"
+             and "(nit_type)b' + 1".  */
+      tree type;
+      tree op0 = TREE_OPERAND (mbz, 0);
+      tree op1 = TREE_OPERAND (mbz, 1);
+
+      if (TREE_CODE (op1) != INTEGER_CST)
+       {
+         tree tmp = op0;
+         op0 = op1;
+         op1 = tmp;
+       }
+      type = TREE_TYPE (op0);
+      if (TREE_CODE (op1) != INTEGER_CST)
+       return false;
+
+      /* Convert case 3 to case 1.  */
+      if (!TYPE_UNSIGNED (type)
+         && operand_equal_p (op1, integer_minus_one_node, 0)
+         && types_compatible_p (unsigned_type_for (type), nit_type))
+       {
+         type = nit_type;
+         op0 = fold_convert_loc (UNKNOWN_LOCATION, nit_type, op0);
+         op1 = fold_convert_loc (UNKNOWN_LOCATION, nit_type, op1);
+       }
+
+      if (!TYPE_UNSIGNED (type))
+       return false;
+
+      /* Case 1.  */
+      if (TYPE_MAX_VALUE (type)
+         && TREE_CODE (op1) == INTEGER_CST
+         && operand_equal_p (op1, TYPE_MAX_VALUE (type), 0))
+       {
+         a = integer_zero_node;
+         b = fold_build2 (PLUS_EXPR, type, op0, integer_one_node);
+       }
+      /* Case 2.  */
+      else if (TREE_CODE (op1) == INTEGER_CST
+              && operand_equal_p (op1, integer_zero_node, 0))
+       {
+         a = integer_zero_node;
+         b = op0;
+       }
+      else
+       return false;
+    }
   else
     return false;
 
@@ -4673,10 +4858,14 @@ static bool
     return false;
 
   /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
-     overflow.  */
-  offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
-                       cand->iv->step,
-                       fold_convert (TREE_TYPE (cand->iv->step), a));
+     overflow.  It is uncessary to build offset when A equals to ZERO.  */
+  if (a != integer_zero_node)
+    offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
+                         cand->iv->step,
+                         fold_convert (TREE_TYPE (cand->iv->step), a));
+  else
+    offset = a;
+
   if (!difference_cannot_overflow_p (cand->iv->base, offset))
     return false;
 
@@ -4733,50 +4922,9 @@ may_eliminate_iv (struct ivopts_data *data,
      This is the case iff the period of the induction variable is greater
      than the number of iterations for which the exit condition is true.  */
   period = iv_period (cand->iv);
+  if (!period_greater_niter_exit (data, use, cand, period, desc))
+    return false;
 
-  /* If the number of iterations is constant, compare against it directly.  */
-  if (TREE_CODE (desc->niter) == INTEGER_CST)
-    {
-      /* See cand_value_at.  */
-      if (stmt_after_increment (loop, cand, use->stmt))
-        {
-          if (!tree_int_cst_lt (desc->niter, period))
-            return false;
-        }
-      else
-        {
-          if (tree_int_cst_lt (period, desc->niter))
-            return false;
-        }
-    }
-
-  /* If not, and if this is the only possible exit of the loop, see whether
-     we can get a conservative estimate on the number of iterations of the
-     entire loop and compare against that instead.  */
-  else
-    {
-      widest_int period_value, max_niter;
-
-      max_niter = desc->max;
-      if (stmt_after_increment (loop, cand, use->stmt))
-        max_niter += 1;
-      period_value = wi::to_widest (period);
-      if (wi::gtu_p (max_niter, period_value))
-        {
-          /* See if we can take advantage of inferred loop bound information.  
*/
-          if (data->loop_single_exit_p)
-            {
-              if (!max_loop_iterations (loop, &max_niter))
-                return false;
-              /* The loop bound is already adjusted by adding 1.  */
-              if (wi::gtu_p (max_niter, period_value))
-                return false;
-            }
-          else
-            return false;
-        }
-    }
-
   cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
 
   *bound = fold_convert (TREE_TYPE (cand->iv->base),
@@ -4796,7 +4944,7 @@ may_eliminate_iv (struct ivopts_data *data,
           base the exit condition on it.  However, that is often too
           expensive.  */
   if (!integer_zerop (desc->may_be_zero))
-    return iv_elimination_compare_lt (data, cand, comp, desc);
+    return iv_elimination_compare_lt (data, use, cand, comp, desc);
 
   return true;
 }
Index: gcc/testsuite/gcc.dg/tree-ssa/ivopts-lt-3.c
===================================================================
--- gcc/testsuite/gcc.dg/tree-ssa/ivopts-lt-3.c (revision 0)
+++ gcc/testsuite/gcc.dg/tree-ssa/ivopts-lt-3.c (revision 0)
@@ -0,0 +1,38 @@
+/* { dg-do compile { target { lp64 } } } */
+/* { dg-options "-O2 -fdump-tree-ivopts-details" } */
+
+typedef long unsigned int size_t;
+extern float a[100], b[100];
+
+int foo (int M, unsigned int l)
+{
+  unsigned ivtmp = 0, niters, _37, _38, bnd;
+  size_t _67, _1;
+  float *vectp_a, *vectp_b, *vectp_a2;
+  float vect__6, vect__7, vect__8;
+
+  _38 = (unsigned int)l;
+  bnd = _38 + 1;
+
+  _1 = (size_t) M;
+  _67 = _1 * 4;
+  vectp_a = a; vectp_b = b; vectp_a2 = a + _67;
+
+  do
+    {
+      vect__6 = *vectp_a;
+      vect__7 = *vectp_b;
+      vect__8 = vect__6 + vect__7;
+      *vectp_a = vect__8;
+      vectp_a = vectp_a + 4;
+      vectp_b = vectp_b + 4;
+      vectp_a2 = vectp_a2 + 4;
+      ivtmp += 1;
+    }
+  while (ivtmp < bnd);
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "Replacing exit test: if \\(bnd.*\\)" 1 
"ivopts" } } */
+/* { dg-final { cleanup-tree-dump "ivopts" } } */
Index: gcc/testsuite/gcc.dg/tree-ssa/ivopts-lt-4.c
===================================================================
--- gcc/testsuite/gcc.dg/tree-ssa/ivopts-lt-4.c (revision 0)
+++ gcc/testsuite/gcc.dg/tree-ssa/ivopts-lt-4.c (revision 0)
@@ -0,0 +1,38 @@
+/* { dg-do compile { target { lp64 } } } */
+/* { dg-options "-O2 -fdump-tree-ivopts-details" } */
+
+typedef long unsigned int size_t;
+extern float a[100], b[100];
+
+int foo (int M, int l)
+{
+  unsigned ivtmp = 0, niters, _37, _38, bnd;
+  size_t _67, _1;
+  float *vectp_a, *vectp_b, *vectp_a2;
+  float vect__6, vect__7, vect__8;
+
+  _38 = (unsigned int)l;
+  bnd = _38 + 1;
+
+  _1 = (size_t) M;
+  _67 = _1 * 4;
+  vectp_a = a; vectp_b = b; vectp_a2 = a + _67;
+
+  do
+    {
+      vect__6 = *vectp_a;
+      vect__7 = *vectp_b;
+      vect__8 = vect__6 + vect__7;
+      *vectp_a = vect__8;
+      vectp_a = vectp_a + 4;
+      vectp_b = vectp_b + 4;
+      vectp_a2 = vectp_a2 + 4;
+      ivtmp += 1;
+    }
+  while (ivtmp < bnd);
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "Replacing exit test: if \\(bnd.*\\)" 1 
"ivopts" } } */
+/* { dg-final { cleanup-tree-dump "ivopts" } } */

[PATCH 3/3]Improve induction variable elimination

Reply via email to