This is the execution failure of gfortran.dg/vector_subscript_1.f90 on
SPARC/Solaris at -O3 -funroll-loops. The loop2_unroll dump of the reduced
testcase reads:
Loop 3 is simple:
simple exit 10 -> 12
number of iterations: (const_int -1 [0xffffffffffffffff])
upper bound: 1073741823
realistic bound: -1
The number of iterations is of course bogus.
The problem is in simplify_using_initial_values: it uses the condition in:
(insn 105 104 106 6 (set (reg:CC 100 %icc)
(compare:CC (reg:SI 153 [ D.1086 ])
(reg:SI 229 [ D.1086 ]))) vector_subscript_1.f90:21 1
{*cmpsi_insn}
(expr_list:REG_DEAD (reg:SI 229 [ D.1086 ])
(nil)))
to record an equivalence between (reg:SI 153) and (reg:SI 229) and then:
(insn 179 101 103 6 (set (reg:SI 229 [ D.1086 ])
(reg:SI 245 [ D.1086 ])) vector_subscript_1.f90:21 -1
(nil))
plus the equivalence to simplify the niter expression. But there is:
(insn 104 103 105 6 (set (reg:SI 229 [ D.1086 ])
(if_then_else:SI (ge:CC (reg:CC 100 %icc)
(const_int 0 [0]))
(reg:SI 132 [ D.1086 ])
(reg:SI 245 [ D.1086 ]))) vector_subscript_1.f90:21 105
{*movsi_cc_v9}
(expr_list:REG_EQUAL (if_then_else:SI (ge:CC (reg:CC 100 %icc)
(const_int 0 [0]))
(reg:SI 132 [ D.1086 ])
(const_int 0 [0]))
(expr_list:REG_DEAD (reg:SI 132 [ D.1086 ])
(expr_list:REG_DEAD (reg:CC 100 %icc)
(nil)))))
between the two above instructions, which invalidates the equivalence.
So the equivalence should have been pruned when insn 104 was processed.
The patch also does a bit of cosmetic cleanup in loop-unroll.c, for
example in the dump file. We have in the loop2_unroll dump of the
original testcase:
;; *** Considering loop 3 ***
;; Considering unrolling loop with constant number of iterations
;; max_unroll 5 (6 copies, initial 5).
;; Decided to unroll the constant times rolling loop, 4 times.
Question for the reader: how many times is the loop unrolled? 4, 5 or 6? :-)
In fact, the "max_unroll" is always equal to the "times" + 1 and "initial"
is an internal initial value for "max_unroll". Morever, this "max_unroll"
is _not_ equal to the max_unroll in unroll_loop_constant_iterations:
unsigned max_unroll = loop->lpt_decision.times;
which is equal to the "times" instead. And we also have comments like:
/* Unroll LOOP with constant number of iterations LOOP->LPT_DECISION.TIMES + 1
times.
so LOOP->LPT_DECISION.TIMES (the "times" above) isn't really what one might
think it is. In the end, this is mightily confusing.
Tested on x86_64-suse-linux and SPARC/Solaris, applied on the mainline.
2012-10-13 Eric Botcazou <ebotca...@adacore.com>
PR rtl-optimization/54871
* loop-iv.c (simplify_using_initial_values): When scanning previous
basic blocks, prune the recorded conditions if the current insn was
not used to make a replacement.
* loop-unroll.c (decide_unroll_constant_iterations): Clean up message.
(unroll_loop_constant_iterations): Clarify head comment.
(decide_unroll_runtime_iterations): Clean up message.
(unroll_loop_runtime_iterations): Clarify head comment.
(decide_peel_simple): Clean up message.
(peel_loop_simple): Clarify head comment.
(decide_unroll_stupid): Clean up message.
(unroll_loop_stupid): Clarify head comment.
--
Eric Botcazou
Index: loop-iv.c
===================================================================
--- loop-iv.c (revision 192353)
+++ loop-iv.c (working copy)
@@ -2004,11 +2004,30 @@ simplify_using_initial_values (struct lo
}
}
else
- /* If we did not use this insn to make a replacement, any overlap
- between stores in this insn and our expression will cause the
- expression to become invalid. */
- if (for_each_rtx (expr, altered_reg_used, this_altered))
- goto out;
+ {
+ rtx *pnote, *pnote_next;
+
+ /* If we did not use this insn to make a replacement, any overlap
+ between stores in this insn and our expression will cause the
+ expression to become invalid. */
+ if (for_each_rtx (expr, altered_reg_used, this_altered))
+ goto out;
+
+ /* Likewise for the conditions. */
+ for (pnote = &cond_list; *pnote; pnote = pnote_next)
+ {
+ rtx note = *pnote;
+ rtx old_cond = XEXP (note, 0);
+
+ pnote_next = &XEXP (note, 1);
+ if (for_each_rtx (&old_cond, altered_reg_used, this_altered))
+ {
+ *pnote = *pnote_next;
+ pnote_next = pnote;
+ free_EXPR_LIST_node (note);
+ }
+ }
+ }
if (CONSTANT_P (*expr))
goto out;
Index: loop-unroll.c
===================================================================
--- loop-unroll.c (revision 192353)
+++ loop-unroll.c (working copy)
@@ -602,26 +602,21 @@ decide_unroll_constant_iterations (struc
}
}
- if (dump_file)
- fprintf (dump_file, ";; max_unroll %d (%d copies, initial %d).\n",
- best_unroll + 1, best_copies, nunroll);
-
loop->lpt_decision.decision = LPT_UNROLL_CONSTANT;
loop->lpt_decision.times = best_unroll;
if (dump_file)
- fprintf (dump_file,
- ";; Decided to unroll the constant times rolling loop, %d times.\n",
- loop->lpt_decision.times);
+ fprintf (dump_file, ";; Decided to unroll the loop %d times (%d copies).\n",
+ loop->lpt_decision.times, best_copies);
}
-/* Unroll LOOP with constant number of iterations LOOP->LPT_DECISION.TIMES + 1
- times. The transformation does this:
+/* Unroll LOOP with constant number of iterations LOOP->LPT_DECISION.TIMES times.
+ The transformation does this:
for (i = 0; i < 102; i++)
body;
- ==>
+ ==> (LOOP->LPT_DECISION.TIMES == 3)
i = 0;
body; i++;
@@ -671,7 +666,7 @@ unroll_loop_constant_iterations (struct
of exit condition have continuous body after unrolling. */
if (dump_file)
- fprintf (dump_file, ";; Condition on beginning of loop.\n");
+ fprintf (dump_file, ";; Condition at beginning of loop.\n");
/* Peel exit_mod iterations. */
RESET_BIT (wont_exit, 0);
@@ -713,7 +708,7 @@ unroll_loop_constant_iterations (struct
the loop tests the condition at the end of loop body. */
if (dump_file)
- fprintf (dump_file, ";; Condition on end of loop.\n");
+ fprintf (dump_file, ";; Condition at end of loop.\n");
/* We know that niter >= max_unroll + 2; so we do not need to care of
case when we would exit before reaching the loop. So just peel
@@ -896,9 +891,7 @@ decide_unroll_runtime_iterations (struct
loop->lpt_decision.times = i - 1;
if (dump_file)
- fprintf (dump_file,
- ";; Decided to unroll the runtime computable "
- "times rolling loop, %d times.\n",
+ fprintf (dump_file, ";; Decided to unroll the loop %d times.\n",
loop->lpt_decision.times);
}
@@ -949,14 +942,14 @@ split_edge_and_insert (edge e, rtx insns
return bb;
}
-/* Unroll LOOP for that we are able to count number of iterations in runtime
- LOOP->LPT_DECISION.TIMES + 1 times. The transformation does this (with some
+/* Unroll LOOP for which we are able to count number of iterations in runtime
+ LOOP->LPT_DECISION.TIMES times. The transformation does this (with some
extra care for case n < 0):
for (i = 0; i < n; i++)
body;
- ==>
+ ==> (LOOP->LPT_DECISION.TIMES == 3)
i = 0;
mod = n % 4;
@@ -1314,20 +1307,23 @@ decide_peel_simple (struct loop *loop, i
loop->lpt_decision.times = npeel;
if (dump_file)
- fprintf (dump_file, ";; Decided to simply peel the loop, %d times.\n",
+ fprintf (dump_file, ";; Decided to simply peel the loop %d times.\n",
loop->lpt_decision.times);
}
-/* Peel a LOOP LOOP->LPT_DECISION.TIMES times. The transformation:
+/* Peel a LOOP LOOP->LPT_DECISION.TIMES times. The transformation does this:
+
while (cond)
body;
- ==>
+ ==> (LOOP->LPT_DECISION.TIMES == 3)
if (!cond) goto end;
body;
if (!cond) goto end;
body;
+ if (!cond) goto end;
+ body;
while (cond)
body;
end: ;
@@ -1464,16 +1460,16 @@ decide_unroll_stupid (struct loop *loop,
loop->lpt_decision.times = i - 1;
if (dump_file)
- fprintf (dump_file,
- ";; Decided to unroll the loop stupidly, %d times.\n",
+ fprintf (dump_file, ";; Decided to unroll the loop stupidly %d times.\n",
loop->lpt_decision.times);
}
-/* Unroll a LOOP LOOP->LPT_DECISION.TIMES times. The transformation:
+/* Unroll a LOOP LOOP->LPT_DECISION.TIMES times. The transformation does this:
+
while (cond)
body;
- ==>
+ ==> (LOOP->LPT_DECISION.TIMES == 3)
while (cond)
{