Re: ivopts improvement

Tom de Vries Thu, 03 Mar 2011 06:29:02 -0800

Hi Paolo,

On 03/03/2011 09:44 AM, Paolo Bonzini wrote:
> On 03/02/2011 11:01 PM, Tom de Vries wrote:
>> +  if (TREE_CODE (nit) == COND_EXPR)
>> +    {
>> +      if (!loop_only_exit_p (loop, exit))
>> +        return false;
>> +
>> +      return iv_elimination_compare_lt (use, cand, bound, nit, comp);
>> +    }
>> +
> 
> You probably need a comment on top of iv_elimination_compare_lt, 
> otherwise I'm left wondering why this isn't
> 
>    if (TREE_CODE (nit) == COND_EXPR
>        && loop_only_exit_p (loop, exit)
>        && iv_elimination_compare_lt (use, cand, bound, nit, comp))
>      return true;
>


You're right, there's a comment missing. I added it now.

> Also, the check on nit is an optimization.  

It's not, hopefully now explained by the comment.

> Perhaps you should add a 
> gcc_checking_assert to i_elimination_compare_lt and/or remove this from 
> get_lt_bound:
> 
>> +  if (TREE_CODE (nit) != COND_EXPR)
>> +    return NULL_TREE;
>> +

It's duplicate test, so I turned it into a gcc_checking_assert.

> Or perhaps loop_only_exit_p could be optimized by computing it ahead of 
> time, possibly at the same time as loop_body_includes_call.  This way it 
> becomes very cheap and the code above can just call 
> iv_elimination_compare_lt without any pre-screening.

Gave it a try in a new patch.

reg-tested on x86_64. Better?

Thanks,
- Tom

diff -u gcc/tree-ssa-loop-ivopts.c gcc/tree-ssa-loop-ivopts.c
--- gcc/tree-ssa-loop-ivopts.c	(working copy)
+++ gcc/tree-ssa-loop-ivopts.c	(working copy)
@@ -292,6 +292,10 @@
 
   /* Whether the loop body includes any function calls.  */
   bool body_includes_call;
+
+  /* Whether the loop body includes any function calls that possibly have side
+     effects.  */
+  bool body_includes_side_effect_call;
 };
 
 /* An assignment of iv candidates to uses.  */
@@ -456,6 +460,20 @@
   return exit;
 }
 
+/* Returns true if single_exit (DATA->current_loop) is the only possible exit.
+   Uses the same logic as loop_only_exit_p.  */
+
+static bool
+loop_single_exit_p (struct ivopts_data *data)
+{
+  edge exit = single_exit (data->current_loop);
+
+  if (!exit)
+    return false;
+
+  return !data->body_includes_side_effect_call;
+}
+
 /* Dumps information about the induction variable IV to FILE.  */
 
 extern void dump_iv (FILE *, struct iv *);
@@ -4403,7 +4421,7 @@
       if (double_int_ucmp (max_niter, period_value) > 0)
         {
           /* See if we can take advantage of infered loop bound information.  */
-          if (loop_only_exit_p (loop, exit))
+          if (loop_single_exit_p (data))
             {
               if (!estimated_loop_iterations (loop, true, &max_niter))
                 return false;
@@ -6343,23 +6361,34 @@
   htab_delete (data->inv_expr_tab);
 }
 
-/* Returns true if the loop body BODY includes any function calls.  */
+/* Find any functions calls in loop body BODY and stores a classification of
+   those in calls in DATA.  */
 
-static bool
-loop_body_includes_call (basic_block *body, unsigned num_nodes)
+static void
+find_calls_in_loop_body (struct ivopts_data *data, basic_block *body,
+                         unsigned num_nodes)
 {
   gimple_stmt_iterator gsi;
   unsigned i;
+  bool call = false;
+  bool se_call = false;
+  bool done = false;
 
-  for (i = 0; i < num_nodes; i++)
-    for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
+  for (i = 0; i < num_nodes && !done; i++)
+    for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi) && !done;
+         gsi_next (&gsi))
       {
-	gimple stmt = gsi_stmt (gsi);
-	if (is_gimple_call (stmt)
-	    && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
-	  return true;
+        gimple stmt = gsi_stmt (gsi);
+        if (!is_gimple_call (stmt))
+          continue;
+
+        call = call || is_inexpensive_builtin (gimple_call_fndecl (stmt));
+        se_call = se_call || gimple_has_side_effects (stmt);
+        done = call && se_call;
       }
-  return false;
+
+  data->body_includes_call = call;
+  data->body_includes_side_effect_call = se_call;
 }
 
 /* Optimizes the LOOP.  Returns true if anything changed.  */
@@ -6393,7 +6422,7 @@
     }
 
   body = get_loop_body (loop);
-  data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
+  find_calls_in_loop_body (data, body, loop->num_nodes);
   renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
   free (body);

diff -u gcc/tree-ssa-loop-ivopts.c gcc/tree-ssa-loop-ivopts.c
--- gcc/tree-ssa-loop-ivopts.c	(working copy)
+++ gcc/tree-ssa-loop-ivopts.c	(working copy)
@@ -832,17 +832,25 @@
 
   if (!slot)
     {
-      /* Try to determine number of iterations.  We must know it
-	 unconditionally (i.e., without possibility of # of iterations
-	 being zero).  Also, we cannot safely work with ssa names that
-	 appear in phi nodes on abnormal edges, so that we do not create
-	 overlapping life ranges for them (PR 27283).  */
+      /* Try to determine number of iterations.  We cannot safely work with ssa
+         names that appear in phi nodes on abnormal edges, so that we do not
+         create overlapping life ranges for them (PR 27283).  */
       desc = XNEW (struct tree_niter_desc);
       if (number_of_iterations_exit (data->current_loop,
 				     exit, desc, true)
-	  && integer_zerop (desc->may_be_zero)
      	  && !contains_abnormal_ssa_name_p (desc->niter))
-	niter = desc->niter;
+	{
+	  if (!integer_zerop (desc->may_be_zero))
+            /* Construct COND_EXPR that describes the number of iterations.
+               Either the COND_EXPR is not too expensive, and we can use it as
+               loop bound, or we can deduce a LT_EXPR bound from it.  */
+	    niter
+	      = build3 (COND_EXPR, TREE_TYPE (desc->niter), desc->may_be_zero,
+			build_int_cst_type (TREE_TYPE (desc->niter), 0),
+			desc->niter);
+	  else
+	    niter = desc->niter;
+	}
       else
 	niter = NULL_TREE;
 
@@ -4360,6 +4368,126 @@
   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
 }
 
+/* Tries to detect
+     NIT == (use_iv_max < USE->iv->base)
+            ? 0
+            : (use_iv_max - USE->iv->base)
+   where
+     use_iv_real_base == (USE->iv->base - USE->iv->step)
+     && CAND->iv->base == base_ptr + use_iv_real_base
+   and returns the exclusive upper bound for CAND->var_after:
+     base_ptr + use_iv_max.  */
+
+static tree
+get_lt_bound (struct iv_use *use, struct iv_cand *cand, tree nit)
+{
+  tree comp, base_ptr, n, n0, n1;
+  tree use_iv_real_base, cand_iv_base, use_iv_max;
+  gimple def_stmt;
+  int npos, mpos;
+  enum tree_code compare;
+  tree cand_type = TREE_TYPE (cand->var_before);
+
+  gcc_assert (TREE_CODE (nit) == COND_EXPR);
+
+  /* use_iv_real_base == use->iv->base - use->iv->step.  */
+  use_iv_real_base = fold_build_plus (MINUS_EXPR, use->iv->base, use->iv->step);
+
+  /* cand_iv_base.  */
+  cand_iv_base = cand->iv->base;
+  STRIP_NOPS (cand_iv_base);
+
+  /* cand->iv->base == base_ptr + use_iv_real_base.  */
+  if (TREE_CODE (cand_iv_base) != SSA_NAME)
+    return NULL_TREE;
+  def_stmt = SSA_NAME_DEF_STMT (cand_iv_base);
+  if (!is_gimple_assign (def_stmt)
+      || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR)
+    return NULL_TREE;
+  if (gimple_assign_rhs2 (def_stmt) != use_iv_real_base)
+    return NULL_TREE;
+  base_ptr = gimple_assign_rhs1 (def_stmt);
+
+  /* 0.  */
+  if (tree_int_cst_equal (TREE_OPERAND (nit, 1), integer_zero_node))
+    npos = 2;
+  else if (tree_int_cst_equal (TREE_OPERAND (nit, 2), integer_zero_node))
+    npos = 1;
+  else
+    return NULL_TREE;
+
+  /* n == use_iv_max - use->iv->base.  */
+  n = TREE_OPERAND (nit, npos);
+  if (TREE_CODE (n) != PLUS_EXPR)
+    return NULL_TREE;
+  n0 = TREE_OPERAND (n, 0);
+  n1 = TREE_OPERAND (n, 1);
+  if (tree_int_cst_equal (fold_build_plus (PLUS_EXPR, use->iv->base, n0),
+                          integer_zero_node))
+    use_iv_max = n1;
+  else if (tree_int_cst_equal (fold_build_plus (PLUS_EXPR, use->iv->base, n1),
+                               integer_zero_node))
+    use_iv_max = n0;
+  else
+    return NULL_TREE;
+
+  /* comp == use_iv_max < use->iv->base.  */
+  comp = TREE_OPERAND (nit, 0);
+  compare = TREE_CODE (comp);
+  if ((npos == 2 && compare == LT_EXPR)
+      || (npos == 1 && compare == GE_EXPR))
+    mpos = 0;
+  else if ((npos == 2 && compare == GT_EXPR)
+           || (npos == 1 && compare == LE_EXPR))
+    mpos = 1;
+  else
+    return NULL_TREE;
+  if (TREE_OPERAND (comp, mpos) != use_iv_max
+      || !tree_int_cst_equal (fold_build_plus (MINUS_EXPR, use->iv->base,
+                                               TREE_OPERAND (comp, 1 - mpos)),
+                              integer_zero_node))
+    return NULL_TREE;
+
+  /* Calculate bound.  */
+  return fold_build_plus (PLUS_EXPR, convert (cand_type, base_ptr), use_iv_max);
+}
+
+/* Tries to replace loop exit test USE, which allows NIT iterations, by one
+   formulated in terms of a LT_EXPR comparison with CAND.  Stores the resulting
+   comparison in COMP_P and bound in BOUND_P.  */
+
+static bool
+iv_elimination_compare_lt (struct ivopts_data *data, struct iv_use *use,
+                           struct iv_cand *cand, tree *bound_p, tree nit,
+                           enum tree_code *comp_p)
+{
+  tree bound;
+
+  if (!(cand->pos == IP_ORIGINAL
+        && POINTER_TYPE_P (TREE_TYPE (cand->var_before))
+        && POINTER_TYPE_OVERFLOW_UNDEFINED))
+    return false;
+
+  if (*comp_p != NE_EXPR)
+    return false;
+
+  if (!loop_single_exit_p (data))
+    return false;
+
+  bound = get_lt_bound (use, cand, nit);
+
+  if (bound == NULL_TREE)
+    return false;
+
+  if (expression_expensive_p (bound))
+    return false;
+
+  *comp_p = LT_EXPR;
+  *bound_p = bound;
+
+  return true;
+}
+
 /* Check whether it is possible to express the condition in USE by comparison
    of candidate CAND.  If so, store the value compared with to BOUND, and the
    comparison operator to COMP.  */
@@ -4438,6 +4566,21 @@
   *bound = aff_combination_to_tree (&bnd);
   *comp = iv_elimination_compare (data, use);
 
+  /* Try to implement nit using a '<' instead.  */
+  if (TREE_CODE (nit) == COND_EXPR)
+    {
+      if (iv_elimination_compare_lt (data, use, cand, bound, nit, comp))
+        return true;
+
+      /* We could try to see if the non-lt bound is not too expensive, but the
+         cost infrastructure needs tuning for that first.  Even though
+         expression_expensive_p always returns true for COND_EXPRs, it happens
+         that the bound is folded into a MAX_EXPR, which is approved by
+         expression_expensive_p, but attributed a too low cost by force_var_cost
+         in case the MAX_EXPR would expand into control flow.  */
+      return false;
+    }
+
   /* It is unlikely that computing the number of iterations using division
      would be more profitable than keeping the original induction variable.  */
   if (expression_expensive_p (*bound))

Re: ivopts improvement

Reply via email to