New in this revision: Update patch to function correctly with
contributions made by `vect: support vectorization of early break
forced live IVs as scalar' commit.
Regression tested with both AArch64 and x86_64, no new regressions.
-------
In `vect_do_peeling' and `vect_transform_loop', there are several bits
of logic reliant on niters that need to be handled differently in the
case of uncounted loops.
Firstly When we peel the loop, adding a prolog, we subtract the
prolog peeling factor from the original number of iterations for the
main loop.
Then, upon vectorization of the main loop, we need to update the
iteration upper-bound to reflect the fact that each iteration now acts
on VF elements, such that less iterations will be needed.
Both of these updates become unnecessary when we don't have an IV
counting exit. Therefore, it is sufficient to guard these
manipulations behind a check for whether the loop we're dealing with
is uncounted.
gcc/ChangeLog:
* tree-vect-loop-manip.cc (vect_do_peeling): Disable niters
update.
* tree-vect-loop.cc (vect_transform_loop): Likewise.
---
gcc/tree-vect-loop-manip.cc | 88 ++++++++++++++++++++++---------------
gcc/tree-vect-loop.cc | 20 +++++----
2 files changed, 64 insertions(+), 44 deletions(-)
diff --git a/gcc/tree-vect-loop-manip.cc b/gcc/tree-vect-loop-manip.cc
index 0b5f747185a..905d7fdc488 100644
--- a/gcc/tree-vect-loop-manip.cc
+++ b/gcc/tree-vect-loop-manip.cc
@@ -3172,6 +3172,7 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters,
tree nitersm1,
int estimated_vf;
int prolog_peeling = 0;
bool vect_epilogues = loop_vinfo->epilogue_vinfo != NULL;
+ bool uncounted_p = LOOP_VINFO_NITERS_UNCOUNTED_P (loop_vinfo);
if (!vect_use_loop_mask_for_alignment_p (loop_vinfo))
prolog_peeling = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
@@ -3565,29 +3566,34 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters,
tree nitersm1,
bb_before_epilog = loop_preheader_edge (epilog)->src;
}
- /* If loop is peeled for non-zero constant times, now niters refers to
- orig_niters - prolog_peeling, it won't overflow even the orig_niters
- overflows. */
- niters_no_overflow |= (prolog_peeling > 0);
- vect_gen_vector_loop_niters (loop_vinfo, niters,
- niters_vector, step_vector,
- niters_no_overflow);
- if (!integer_onep (*step_vector))
+ if (!uncounted_p)
{
- /* On exit from the loop we will have an easy way of calcalating
- NITERS_VECTOR / STEP * STEP. Install a dummy definition
- until then. */
- niters_vector_mult_vf = make_ssa_name (TREE_TYPE (*niters_vector));
- SSA_NAME_DEF_STMT (niters_vector_mult_vf) = gimple_build_nop ();
- *niters_vector_mult_vf_var = niters_vector_mult_vf;
+ /* If loop is peeled for non-zero constant times, now niters refers to
+ orig_niters - prolog_peeling, it won't overflow even the
+ orig_niters overflows. */
+ niters_no_overflow |= (prolog_peeling > 0);
+ vect_gen_vector_loop_niters (loop_vinfo, niters,
+ niters_vector, step_vector,
+ niters_no_overflow);
+ if (!integer_onep (*step_vector))
+ {
+ /* On exit from the loop we will have an easy way of calcalating
+ NITERS_VECTOR / STEP * STEP. Install a dummy definition
+ until then. */
+ niters_vector_mult_vf
+ = make_ssa_name (TREE_TYPE (*niters_vector));
+ SSA_NAME_DEF_STMT (niters_vector_mult_vf) = gimple_build_nop ();
+ *niters_vector_mult_vf_var = niters_vector_mult_vf;
+ }
+ else
+ vect_gen_vector_loop_niters_mult_vf (loop_vinfo, *niters_vector,
+ &niters_vector_mult_vf);
+ /* Update IVs of original loop as if they were advanced by
+ niters_vector_mult_vf steps. */
+ gcc_checking_assert (vect_can_advance_ivs_p (loop_vinfo));
+ update_e = skip_vector ? e : loop_preheader_edge (epilog);
}
- else
- vect_gen_vector_loop_niters_mult_vf (loop_vinfo, *niters_vector,
- &niters_vector_mult_vf);
- /* Update IVs of original loop as if they were advanced by
- niters_vector_mult_vf steps. */
- gcc_checking_assert (vect_can_advance_ivs_p (loop_vinfo));
- update_e = skip_vector ? e : loop_preheader_edge (epilog);
+
if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo))
update_e = single_succ_edge (LOOP_VINFO_MAIN_EXIT (loop_vinfo)->dest);
@@ -3653,26 +3659,36 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters,
tree nitersm1,
tree vector_iters_vf = niters_vector_mult_vf;
if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo))
{
- tree scal_iv_ty = signed_type_for (TREE_TYPE (vector_iters_vf));
+ tree vector_iters_vf_type = uncounted_p ? sizetype
+ : TREE_TYPE (vector_iters_vf);
+ tree scal_iv_ty = signed_type_for (vector_iters_vf_type);
tree tmp_niters_vf = make_ssa_name (scal_iv_ty);
- basic_block exit_bb = NULL;
- edge update_e = NULL;
- /* Identify the early exit merge block. I wish we had stored this.
*/
- for (auto e : get_loop_exit_edges (loop))
- if (e != LOOP_VINFO_IV_EXIT (loop_vinfo))
- {
- exit_bb = e->dest;
- update_e = single_succ_edge (exit_bb);
- break;
- }
- vect_update_ivs_after_vectorizer (loop_vinfo, tmp_niters_vf,
- update_e, true);
+ /* Not all "early break" loops will have an early exit merge block,
+ uncounted loops classify as early break, but may be single-exit.
+ No work to be done in such cases. */
+ if (!(LOOP_VINFO_NITERS_UNCOUNTED_P (loop_vinfo)
+ && get_loop_exit_edges (loop).length () == 1))
+ {
+ basic_block exit_bb = NULL;
+ edge update_e = NULL;
- if (LOOP_VINFO_EARLY_BREAKS_VECT_PEELED (loop_vinfo))
- vector_iters_vf = tmp_niters_vf;
+ /* Identify the early exit merge block. */
+ for (auto e : get_loop_exit_edges (loop))
+ if (e != LOOP_VINFO_MAIN_EXIT (loop_vinfo))
+ {
+ exit_bb = e->dest;
+ update_e = single_succ_edge (exit_bb);
+ break;
+ }
+ vect_update_ivs_after_vectorizer (loop_vinfo, tmp_niters_vf,
+ update_e, true);
+ }
LOOP_VINFO_EARLY_BRK_NITERS_VAR (loop_vinfo) = tmp_niters_vf;
+
+ if (LOOP_VINFO_EARLY_BREAKS_VECT_PEELED (loop_vinfo))
+ vector_iters_vf = tmp_niters_vf;
}
bool recalculate_peel_niters_init
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index dc4f0416ea6..159355116c4 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -11207,7 +11207,7 @@ vect_transform_loop (loop_vec_info loop_vinfo, gimple
*loop_vectorized_call)
LOOP_VINFO_SCALAR_MAIN_EXIT (loop_vinfo)->dest->count = preheader->count;
}
- if (niters_vector == NULL_TREE)
+ if (niters_vector == NULL_TREE && !uncounted_p)
{
if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
&& !LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)
@@ -11333,13 +11333,17 @@ vect_transform_loop (loop_vec_info loop_vinfo, gimple
*loop_vectorized_call)
}
}
- /* The vectorization factor is always > 1, so if we use an IV increment of 1.
- a zero NITERS becomes a nonzero NITERS_VECTOR. */
- if (integer_onep (step_vector))
- niters_no_overflow = true;
- vect_set_loop_condition (loop, LOOP_VINFO_MAIN_EXIT (loop_vinfo), loop_vinfo,
- niters_vector, step_vector, niters_vector_mult_vf,
- !niters_no_overflow);
+ if (!uncounted_p)
+ {
+ /* The vectorization factor is always > 1, so if we use an IV increment
of
+ 1. A zero NITERS becomes a nonzero NITERS_VECTOR. */
+ if (integer_onep (step_vector))
+ niters_no_overflow = true;
+
+ vect_set_loop_condition (loop, LOOP_VINFO_MAIN_EXIT (loop_vinfo),
+ loop_vinfo, niters_vector, step_vector,
+ niters_vector_mult_vf, !niters_no_overflow);
+ }
unsigned int assumed_vf = vect_vf_for_cost (loop_vinfo);
--
2.43.0