Hi,
Given test as reported in PR79159:
void foo(float tmpCorr[9][9]);
float bar;
void finalDigits(int& n)
{
float tmpCorr[9][9] = {{0}};
foo(tmpCorr);
for (int i = 0; i < n; i++) {
for (int j = i+1; j < n; j++) {
bar = tmpCorr[i][j];
}
}
}
Pass cunrolli unrolls the inner loop with unrolling number 9 which is inferred
from bound of local array definition: "tmpCorr[9][9]". In fact, it only needs
to be unrolled by 8 times because the starting value of "j" is 1. However,
loop niter analyzer fails to compute the accurate niter bound because cunrolli
is before vrp pass and it doesn't know anything about outer loop's induction
variable in inner loop handling. This patch computes init value of induction
variable and uses that to improve boundary analysis.
Bootstrap and test on x86_64 and AArch64. Is it OK?
Thanks,
bin
2017-01-23 Bin Cheng <bin.ch...@arm.com>
PR tree-optimization/79159
* tree-ssa-loop-niter.c (get_cst_init_from_scev): New function.
(record_nonwrapping_iv): Imporve boundary using above function if no
value range information.
gcc/testsuite/ChangeLog
2017-01-23 Bin Cheng <bin.ch...@arm.com>
PR tree-optimization/79159
* g++.dg/tree-ssa/pr79159.C: New test.
diff --git a/gcc/testsuite/g++.dg/tree-ssa/pr79159.C
b/gcc/testsuite/g++.dg/tree-ssa/pr79159.C
new file mode 100644
index 0000000..e15e117
--- /dev/null
+++ b/gcc/testsuite/g++.dg/tree-ssa/pr79159.C
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -Wall" } */
+
+void foo(float tmpCorr[9][9]);
+float bar;
+
+void finalDigits(int& n)
+{
+ float tmpCorr[9][9] = {{0}};
+
+ foo(tmpCorr);
+ for (int i = 0; i < n; i++) {
+ for (int j = i+1; j < n; j++) {
+ bar = tmpCorr[i][j];
+ }
+ }
+}
+
diff --git a/gcc/tree-ssa-loop-niter.c b/gcc/tree-ssa-loop-niter.c
index 8328625..efcf3ed 100644
--- a/gcc/tree-ssa-loop-niter.c
+++ b/gcc/tree-ssa-loop-niter.c
@@ -3059,6 +3059,43 @@ record_control_iv (struct loop *loop, struct
tree_niter_desc *niter)
return;
}
+/* This function returns TRUE if below conditions are satisfied:
+ 1) VAR is SSA variable.
+ 2) VAR is an IV:{base, step} in its defining loop.
+ 3) IV doesn't overflow.
+ 4) Both base and step are integer constants.
+ 5) Base is the MIN/MAX value depends on IS_MIN.
+ Store value of base to INIT correspondingly. */
+
+static bool
+get_cst_init_from_scev (tree var, wide_int *init, bool is_min)
+{
+ if (TREE_CODE (var) != SSA_NAME)
+ return false;
+
+ gimple *def_stmt = SSA_NAME_DEF_STMT (var);
+ struct loop *loop = loop_containing_stmt (def_stmt);
+
+ if (loop == NULL)
+ return false;
+
+ affine_iv iv;
+ if (!simple_iv (loop, loop, var, &iv, false))
+ return false;
+
+ if (!iv.no_overflow)
+ return false;
+
+ if (TREE_CODE (iv.base) != INTEGER_CST || TREE_CODE (iv.step) != INTEGER_CST)
+ return false;
+
+ if (is_min == tree_int_cst_sign_bit (iv.step))
+ return false;
+
+ *init = iv.base;
+ return true;
+}
+
/* Record the estimate on number of iterations of LOOP based on the fact that
the induction variable BASE + STEP * i evaluated in STMT does not wrap and
its values belong to the range <LOW, HIGH>. REALISTIC is true if the
@@ -3100,7 +3137,8 @@ record_nonwrapping_iv (struct loop *loop, tree base, tree
step, gimple *stmt,
if (TREE_CODE (orig_base) == SSA_NAME
&& TREE_CODE (high) == INTEGER_CST
&& INTEGRAL_TYPE_P (TREE_TYPE (orig_base))
- && get_range_info (orig_base, &min, &max) == VR_RANGE
+ && (get_range_info (orig_base, &min, &max) == VR_RANGE
+ || get_cst_init_from_scev (orig_base, &max, false))
&& wi::gts_p (high, max))
base = wide_int_to_tree (unsigned_type, max);
else if (TREE_CODE (base) != INTEGER_CST
@@ -3117,7 +3155,8 @@ record_nonwrapping_iv (struct loop *loop, tree base, tree
step, gimple *stmt,
if (TREE_CODE (orig_base) == SSA_NAME
&& TREE_CODE (low) == INTEGER_CST
&& INTEGRAL_TYPE_P (TREE_TYPE (orig_base))
- && get_range_info (orig_base, &min, &max) == VR_RANGE
+ && (get_range_info (orig_base, &min, &max) == VR_RANGE
+ || get_cst_init_from_scev (orig_base, &min, true))
&& wi::gts_p (min, low))
base = wide_int_to_tree (unsigned_type, min);
else if (TREE_CODE (base) != INTEGER_CST