Hi,
Function iv_elimination_compare_lt is used to eliminate induction variable
when the loop's latch could run for zero time (i.e., may_be_zero in loop
niter information evaluates to true). As stated in the first message, it
only handles very specific case that rarely happens for either GCC bootstrap
or spec2k/spec2k6 compilation. The function has two restrictions which
could be improved:
a) When checking that candidate iv doesn't overflow, it only handles
candidates that are computed in a type that guarantees no overflows. More
complex analysis can be used to prove the non-overflow ness, as in this
patch.
b) The function only handles the original form of may_be_zero like "a + 1
> b", but that expression could have been folded into other forms. This
patch handles three folded forms and does iv elimination as well. I think
this isn't a very corner case, because for many loops iterating from "0"
(i.e., we have "a == 0"), the expression will be folded.
I also refactored period check from may_eliminate_iv into a single function
so that it can be reused.
Thanks,
bin
2014-07-17 Bin Cheng <bin.ch...@arm.com>
* tree-ssa-loop-ivopts.c (iv_nowrap_period)
(nowrap_cand_for_loop_niter_p): New functions.
(period_greater_niter_exit): New function refactored from
may_eliminate_iv.
(iv_elimination_compare_lt): New parameter. Check wrapping
behavior for candidate of wrapping type. Handle folded forms
of may_be_zero expression.
(may_eliminate_iv): Call period_greater_niter_exit. Pass new
argument for iv_elimination_compare_lt.
gcc/testsuite/ChangeLog
2014-07-17 Bin Cheng <bin.ch...@arm.com>
* gcc.dg/tree-ssa/ivopts-lt-3.c: New test.
* gcc.dg/tree-ssa/ivopts-lt-4.c: New test.
Index: gcc/tree-ssa-loop-ivopts.c
===================================================================
--- gcc/tree-ssa-loop-ivopts.c (revision 212387)
+++ gcc/tree-ssa-loop-ivopts.c (working copy)
@@ -4432,6 +4432,44 @@ iv_period (struct iv *iv)
return period;
}
+/* Returns no wrapping period of induction variable IV. For now
+ only unsigned type IV is handled, we could extend it in case
+ of non-overflow for signed ones. Return zero if it can't be
+ decided. */
+
+static tree
+iv_nowrap_period (struct iv *iv)
+{
+ bool overflow;
+ tree type;
+ tree base = iv->base, step = iv->step;
+ widest_int base_val, step_val, max_val, span, period;
+
+ gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
+
+ type = TREE_TYPE (base);
+ if (!TYPE_UNSIGNED (type) || TREE_CODE (base) != INTEGER_CST)
+ return integer_zero_node;
+
+ base_val = wi::to_widest (base);
+ step_val = wi::to_widest (step);
+ if (!POINTER_TYPE_P (type) && TYPE_MAX_VALUE (type)
+ && TREE_CODE (TYPE_MAX_VALUE (type)) == INTEGER_CST)
+ max_val = wi::to_widest (TYPE_MAX_VALUE (type));
+ else
+ {
+ wide_int max_wi = wi::max_value (TYPE_PRECISION (type), UNSIGNED);
+ max_val = wi::to_widest (wide_int_to_tree (type, max_wi));
+ }
+
+ span = max_val - base_val + step_val - 1;
+ period = wi::div_trunc (span, step_val, UNSIGNED, &overflow);
+ if (overflow)
+ return integer_zero_node;
+
+ return wide_int_to_tree (type, period);
+}
+
/* Returns the comparison operator used when eliminating the iv USE. */
static enum tree_code
@@ -4560,7 +4598,84 @@ difference_cannot_overflow_p (tree base, tree offs
}
}
-/* Tries to replace loop exit by one formulated in terms of a LT_EXPR
+/* Check whether PERIOD of CAND is greater than the number of iterations
+ described by DESC for which the exit condition is true. The exit
+ condition is comparison against USE. */
+
+static bool
+period_greater_niter_exit (struct ivopts_data *data,
+ struct iv_use *use, struct iv_cand *cand,
+ tree period, struct tree_niter_desc *desc)
+{
+ struct loop *loop = data->current_loop;
+
+ /* If the number of iterations is constant, compare against it directly. */
+ if (TREE_CODE (desc->niter) == INTEGER_CST)
+ {
+ /* See cand_value_at. */
+ if (stmt_after_increment (loop, cand, use->stmt))
+ {
+ if (!tree_int_cst_lt (desc->niter, period))
+ return false;
+ }
+ else
+ {
+ if (tree_int_cst_lt (period, desc->niter))
+ return false;
+ }
+ }
+
+ /* If not, and if this is the only possible exit of the loop, see whether
+ we can get a conservative estimate on the number of iterations of the
+ entire loop and compare against that instead. */
+ else
+ {
+ widest_int period_value, max_niter;
+
+ max_niter = desc->max;
+ if (stmt_after_increment (loop, cand, use->stmt))
+ max_niter += 1;
+ period_value = wi::to_widest (period);
+ if (wi::gtu_p (max_niter, period_value))
+ {
+ /* See if we can take advantage of inferred loop bound information.
*/
+ if (data->loop_single_exit_p)
+ {
+ if (!max_loop_iterations (loop, &max_niter))
+ return false;
+ /* The loop bound is already adjusted by adding 1. */
+ if (wi::gtu_p (max_niter, period_value))
+ return false;
+ }
+ else
+ return false;
+ }
+ }
+
+ return true;
+}
+
+/* Determine whether no wrapping period of CAND is greater than
+ the number of iterations described by DESC for which exit
+ conditionis true. The exit condition is comparison against USE. */
+
+static bool
+nowrap_cand_for_loop_niter_p (struct ivopts_data *data,
+ struct iv_use *use,
+ struct iv_cand *cand,
+ struct tree_niter_desc *desc)
+{
+ tree period;
+
+ period = iv_nowrap_period (cand->iv);
+ if (period != integer_zero_node
+ && period_greater_niter_exit (data, use, cand, period, desc))
+ return true;
+
+ return false;
+}
+
+/* Tries to replace loop exit in USE by one formulated in terms of a LT_EXPR
comparison with CAND. NITER describes the number of iterations of
the loops. If successful, the comparison in COMP_P is altered accordingly.
@@ -4597,11 +4712,18 @@ difference_cannot_overflow_p (tree base, tree offs
1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is
no
overflow in computing it or the values of p.
2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
- overflow. To prove this, we use the fact that p_0 = base + a. */
+ overflow. To prove this, we use the fact that p_0 = base + a.
+
+ Moreover, with more complex overflow analysis for unsigned type, it is
+ possible to eliminate I with P if P is an induction candidate of unsigned
+ integer type other than pointer if we can prove that P doesn't overflow
+ during all iterations of current loop. Also special forms of MAY_BE_ZERO
+ in NITER is checked because "a + 1 > b" could be folded. */
+
static bool
-iv_elimination_compare_lt (struct ivopts_data *data,
- struct iv_cand *cand, enum tree_code *comp_p,
+iv_elimination_compare_lt (struct ivopts_data *data, struct iv_use *use,
+ struct iv_cand *cand, enum tree_code *comp_p,
struct tree_niter_desc *niter)
{
tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
@@ -4610,11 +4732,13 @@ static bool
HOST_WIDE_INT step;
/* We need to know that the candidate induction variable does not overflow.
- While more complex analysis may be used to prove this, for now just
- check that the variable appears in the original program and that it
- is computed in a type that guarantees no overflows. */
+ Apart from checking that the variable appears in the original program
+ and that is is computed in a type that guarantees no overflows, we also
+ check no wrapping periord of the variable covers the niter if it has
+ unsigned type. More complex analysis may be used to prove this. */
cand_type = TREE_TYPE (cand->iv->base);
- if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
+ if ((cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
+ && !nowrap_cand_for_loop_niter_p (data, use, cand, niter))
return false;
/* Make sure that the loop iterates till the loop bound is hit, as otherwise
@@ -4657,6 +4781,67 @@ static bool
else
return false;
}
+ else if (TREE_CODE (mbz) == EQ_EXPR)
+ {
+ /* Check whether may_be_zero is in below three forms:
+ 1) b' == TYPE_MAX_VALUE (TREE_TYPE (b'))
+ where b' is of type nit_type. The expression could be
+ folded from "b' + 1 < 1". In this case, A is "0" and B
+ is "b' + 1".
+ 2) b == 0
+ where b is of type nit_type. The expression could be
+ folded from "b < 1". In this case, A is "0" and B is
+ "b".
+ 3) b' == -1
+ where b' is of signed type which has nit_type as its
+ unsigned counterpart. The expression could be folded
+ from "(nit_type)b' + 1 < 1". In this case, A is "0"
+ and "(nit_type)b' + 1". */
+ tree type;
+ tree op0 = TREE_OPERAND (mbz, 0);
+ tree op1 = TREE_OPERAND (mbz, 1);
+
+ if (TREE_CODE (op1) != INTEGER_CST)
+ {
+ tree tmp = op0;
+ op0 = op1;
+ op1 = tmp;
+ }
+ type = TREE_TYPE (op0);
+ if (TREE_CODE (op1) != INTEGER_CST)
+ return false;
+
+ /* Convert case 3 to case 1. */
+ if (!TYPE_UNSIGNED (type)
+ && operand_equal_p (op1, integer_minus_one_node, 0)
+ && types_compatible_p (unsigned_type_for (type), nit_type))
+ {
+ type = nit_type;
+ op0 = fold_convert_loc (UNKNOWN_LOCATION, nit_type, op0);
+ op1 = fold_convert_loc (UNKNOWN_LOCATION, nit_type, op1);
+ }
+
+ if (!TYPE_UNSIGNED (type))
+ return false;
+
+ /* Case 1. */
+ if (TYPE_MAX_VALUE (type)
+ && TREE_CODE (op1) == INTEGER_CST
+ && operand_equal_p (op1, TYPE_MAX_VALUE (type), 0))
+ {
+ a = integer_zero_node;
+ b = fold_build2 (PLUS_EXPR, type, op0, integer_one_node);
+ }
+ /* Case 2. */
+ else if (TREE_CODE (op1) == INTEGER_CST
+ && operand_equal_p (op1, integer_zero_node, 0))
+ {
+ a = integer_zero_node;
+ b = op0;
+ }
+ else
+ return false;
+ }
else
return false;
@@ -4673,10 +4858,14 @@ static bool
return false;
/* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
- overflow. */
- offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
- cand->iv->step,
- fold_convert (TREE_TYPE (cand->iv->step), a));
+ overflow. It is uncessary to build offset when A equals to ZERO. */
+ if (a != integer_zero_node)
+ offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
+ cand->iv->step,
+ fold_convert (TREE_TYPE (cand->iv->step), a));
+ else
+ offset = a;
+
if (!difference_cannot_overflow_p (cand->iv->base, offset))
return false;
@@ -4733,50 +4922,9 @@ may_eliminate_iv (struct ivopts_data *data,
This is the case iff the period of the induction variable is greater
than the number of iterations for which the exit condition is true. */
period = iv_period (cand->iv);
+ if (!period_greater_niter_exit (data, use, cand, period, desc))
+ return false;
- /* If the number of iterations is constant, compare against it directly. */
- if (TREE_CODE (desc->niter) == INTEGER_CST)
- {
- /* See cand_value_at. */
- if (stmt_after_increment (loop, cand, use->stmt))
- {
- if (!tree_int_cst_lt (desc->niter, period))
- return false;
- }
- else
- {
- if (tree_int_cst_lt (period, desc->niter))
- return false;
- }
- }
-
- /* If not, and if this is the only possible exit of the loop, see whether
- we can get a conservative estimate on the number of iterations of the
- entire loop and compare against that instead. */
- else
- {
- widest_int period_value, max_niter;
-
- max_niter = desc->max;
- if (stmt_after_increment (loop, cand, use->stmt))
- max_niter += 1;
- period_value = wi::to_widest (period);
- if (wi::gtu_p (max_niter, period_value))
- {
- /* See if we can take advantage of inferred loop bound information.
*/
- if (data->loop_single_exit_p)
- {
- if (!max_loop_iterations (loop, &max_niter))
- return false;
- /* The loop bound is already adjusted by adding 1. */
- if (wi::gtu_p (max_niter, period_value))
- return false;
- }
- else
- return false;
- }
- }
-
cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
*bound = fold_convert (TREE_TYPE (cand->iv->base),
@@ -4796,7 +4944,7 @@ may_eliminate_iv (struct ivopts_data *data,
base the exit condition on it. However, that is often too
expensive. */
if (!integer_zerop (desc->may_be_zero))
- return iv_elimination_compare_lt (data, cand, comp, desc);
+ return iv_elimination_compare_lt (data, use, cand, comp, desc);
return true;
}
Index: gcc/testsuite/gcc.dg/tree-ssa/ivopts-lt-3.c
===================================================================
--- gcc/testsuite/gcc.dg/tree-ssa/ivopts-lt-3.c (revision 0)
+++ gcc/testsuite/gcc.dg/tree-ssa/ivopts-lt-3.c (revision 0)
@@ -0,0 +1,38 @@
+/* { dg-do compile { target { lp64 } } } */
+/* { dg-options "-O2 -fdump-tree-ivopts-details" } */
+
+typedef long unsigned int size_t;
+extern float a[100], b[100];
+
+int foo (int M, unsigned int l)
+{
+ unsigned ivtmp = 0, niters, _37, _38, bnd;
+ size_t _67, _1;
+ float *vectp_a, *vectp_b, *vectp_a2;
+ float vect__6, vect__7, vect__8;
+
+ _38 = (unsigned int)l;
+ bnd = _38 + 1;
+
+ _1 = (size_t) M;
+ _67 = _1 * 4;
+ vectp_a = a; vectp_b = b; vectp_a2 = a + _67;
+
+ do
+ {
+ vect__6 = *vectp_a;
+ vect__7 = *vectp_b;
+ vect__8 = vect__6 + vect__7;
+ *vectp_a = vect__8;
+ vectp_a = vectp_a + 4;
+ vectp_b = vectp_b + 4;
+ vectp_a2 = vectp_a2 + 4;
+ ivtmp += 1;
+ }
+ while (ivtmp < bnd);
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "Replacing exit test: if \\(bnd.*\\)" 1
"ivopts" } } */
+/* { dg-final { cleanup-tree-dump "ivopts" } } */
Index: gcc/testsuite/gcc.dg/tree-ssa/ivopts-lt-4.c
===================================================================
--- gcc/testsuite/gcc.dg/tree-ssa/ivopts-lt-4.c (revision 0)
+++ gcc/testsuite/gcc.dg/tree-ssa/ivopts-lt-4.c (revision 0)
@@ -0,0 +1,38 @@
+/* { dg-do compile { target { lp64 } } } */
+/* { dg-options "-O2 -fdump-tree-ivopts-details" } */
+
+typedef long unsigned int size_t;
+extern float a[100], b[100];
+
+int foo (int M, int l)
+{
+ unsigned ivtmp = 0, niters, _37, _38, bnd;
+ size_t _67, _1;
+ float *vectp_a, *vectp_b, *vectp_a2;
+ float vect__6, vect__7, vect__8;
+
+ _38 = (unsigned int)l;
+ bnd = _38 + 1;
+
+ _1 = (size_t) M;
+ _67 = _1 * 4;
+ vectp_a = a; vectp_b = b; vectp_a2 = a + _67;
+
+ do
+ {
+ vect__6 = *vectp_a;
+ vect__7 = *vectp_b;
+ vect__8 = vect__6 + vect__7;
+ *vectp_a = vect__8;
+ vectp_a = vectp_a + 4;
+ vectp_b = vectp_b + 4;
+ vectp_a2 = vectp_a2 + 4;
+ ivtmp += 1;
+ }
+ while (ivtmp < bnd);
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "Replacing exit test: if \\(bnd.*\\)" 1
"ivopts" } } */
+/* { dg-final { cleanup-tree-dump "ivopts" } } */