New in this revision: The original rebasing of the patch posted in v3
was missing a small "fixup!" commit I had on my local machine to ensure
all uncounted early break exits pointed to the early exit merge block,
as was the case prior to the rebase. This patch combines v3 with
this fixup.
All clear for regressions on x86_64 and AArch64.
------
In `vect_do_peeling' and `vect_transform_loop', there are several bits
of logic reliant on niters that need to be handled differently in the
case of uncounted loops.
Firstly When we peel the loop, adding a prolog, we subtract the
prolog peeling factor from the original number of iterations for the
main loop.
Then, upon vectorization of the main loop, we need to update the
iteration upper-bound to reflect the fact that each iteration now acts
on VF elements, such that less iterations will be needed.
Both of these updates become unnecessary when we don't have an IV
counting exit. Therefore, it is sufficient to guard these
manipulations behind a check for whether the loop we're dealing with
is uncounted.
gcc/ChangeLog:
* tree-vect-loop-manip.cc (vect_do_peeling): Disable niters
update.
* tree-vect-loop.cc (vect_transform_loop): Likewise.
---
gcc/tree-vect-loop-manip.cc | 80 +++++++++++++++++++++----------------
gcc/tree-vect-loop.cc | 20 ++++++----
2 files changed, 58 insertions(+), 42 deletions(-)
diff --git a/gcc/tree-vect-loop-manip.cc b/gcc/tree-vect-loop-manip.cc
index 616ee3e317f..6a8cf6c14f5 100644
--- a/gcc/tree-vect-loop-manip.cc
+++ b/gcc/tree-vect-loop-manip.cc
@@ -3177,6 +3177,7 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters,
tree nitersm1,
int estimated_vf;
int prolog_peeling = 0;
bool vect_epilogues = loop_vinfo->epilogue_vinfo != NULL;
+ bool uncounted_p = LOOP_VINFO_NITERS_UNCOUNTED_P (loop_vinfo);
if (!vect_use_loop_mask_for_alignment_p (loop_vinfo))
prolog_peeling = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
@@ -3570,28 +3571,33 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters,
tree nitersm1,
bb_before_epilog = loop_preheader_edge (epilog)->src;
}
- /* If loop is peeled for non-zero constant times, now niters refers to
- orig_niters - prolog_peeling, it won't overflow even the orig_niters
- overflows. */
- niters_no_overflow |= (prolog_peeling > 0);
- vect_gen_vector_loop_niters (loop_vinfo, niters,
- niters_vector, step_vector,
- niters_no_overflow);
- if (!integer_onep (*step_vector))
+ if (!uncounted_p)
{
- /* On exit from the loop we will have an easy way of calcalating
- NITERS_VECTOR / STEP * STEP. Install a dummy definition
- until then. */
- niters_vector_mult_vf = make_ssa_name (TREE_TYPE (*niters_vector));
- SSA_NAME_DEF_STMT (niters_vector_mult_vf) = gimple_build_nop ();
- *niters_vector_mult_vf_var = niters_vector_mult_vf;
+ /* If loop is peeled for non-zero constant times, now niters refers to
+ orig_niters - prolog_peeling, it won't overflow even the
+ orig_niters overflows. */
+ niters_no_overflow |= (prolog_peeling > 0);
+ vect_gen_vector_loop_niters (loop_vinfo, niters,
+ niters_vector, step_vector,
+ niters_no_overflow);
+ if (!integer_onep (*step_vector))
+ {
+ /* On exit from the loop we will have an easy way of calcalating
+ NITERS_VECTOR / STEP * STEP. Install a dummy definition
+ until then. */
+ niters_vector_mult_vf
+ = make_ssa_name (TREE_TYPE (*niters_vector));
+ SSA_NAME_DEF_STMT (niters_vector_mult_vf) = gimple_build_nop ();
+ *niters_vector_mult_vf_var = niters_vector_mult_vf;
+ }
+ else
+ vect_gen_vector_loop_niters_mult_vf (loop_vinfo, *niters_vector,
+ &niters_vector_mult_vf);
+ /* Update IVs of original loop as if they were advanced by
+ niters_vector_mult_vf steps. */
+ gcc_checking_assert (vect_can_advance_ivs_p (loop_vinfo));
+ update_e = skip_vector ? e : loop_preheader_edge (epilog);
}
- else
- vect_gen_vector_loop_niters_mult_vf (loop_vinfo, *niters_vector,
- &niters_vector_mult_vf);
- /* Update IVs of original loop as if they were advanced by
- niters_vector_mult_vf steps. */
- gcc_checking_assert (vect_can_advance_ivs_p (loop_vinfo));
if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo))
update_e = single_succ_edge (LOOP_VINFO_MAIN_EXIT (loop_vinfo)->dest);
@@ -3662,27 +3668,33 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters,
tree nitersm1,
tree vector_iters_vf = niters_vector_mult_vf;
if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo))
{
- tree scal_iv_ty = signed_type_for (TREE_TYPE (vector_iters_vf));
+ tree vector_iters_vf_type = uncounted_p ? sizetype
+ : TREE_TYPE (vector_iters_vf);
+ tree scal_iv_ty = signed_type_for (vector_iters_vf_type);
tree tmp_niters_vf = make_ssa_name (scal_iv_ty);
- basic_block exit_bb = NULL;
- edge update_e = NULL;
- /* Identify the early exit merge block. I wish we had stored this.
*/
- for (auto e : get_loop_exit_edges (loop))
- if (e != LOOP_VINFO_IV_EXIT (loop_vinfo))
- {
- exit_bb = e->dest;
- update_e = single_succ_edge (exit_bb);
- break;
- }
- vect_update_ivs_after_vectorizer (loop_vinfo, tmp_niters_vf,
- update_e, true);
+ if (!(LOOP_VINFO_NITERS_UNCOUNTED_P (loop_vinfo)
+ && get_loop_exit_edges (loop).length () == 1))
+ {
+ basic_block exit_bb = NULL;
+ edge update_e = NULL;
+ /* Identify the early exit merge block. I wish we had stored this.
*/
+ for (auto e : get_loop_exit_edges (loop))
+ if (e != LOOP_VINFO_MAIN_EXIT (loop_vinfo))
+ {
+ exit_bb = e->dest;
+ update_e = single_succ_edge (exit_bb);
+ break;
+ }
+ vect_update_ivs_after_vectorizer (loop_vinfo, tmp_niters_vf,
+ update_e, true);
+ }
if (LOOP_VINFO_EARLY_BREAKS_VECT_PEELED (loop_vinfo))
vector_iters_vf = tmp_niters_vf;
LOOP_VINFO_EARLY_BRK_NITERS_VAR (loop_vinfo) = tmp_niters_vf;
- }
+ }
bool recalculate_peel_niters_init
= LOOP_VINFO_EARLY_BREAKS_VECT_PEELED (loop_vinfo);
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index 0eae480a576..e61ac675e42 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -11186,7 +11186,7 @@ vect_transform_loop (loop_vec_info loop_vinfo, gimple
*loop_vectorized_call)
LOOP_VINFO_SCALAR_MAIN_EXIT (loop_vinfo)->dest->count = preheader->count;
}
- if (niters_vector == NULL_TREE)
+ if (niters_vector == NULL_TREE && !uncounted_p)
{
if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
&& !LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)
@@ -11312,13 +11312,17 @@ vect_transform_loop (loop_vec_info loop_vinfo, gimple
*loop_vectorized_call)
}
}
- /* The vectorization factor is always > 1, so if we use an IV increment of 1.
- a zero NITERS becomes a nonzero NITERS_VECTOR. */
- if (integer_onep (step_vector))
- niters_no_overflow = true;
- vect_set_loop_condition (loop, LOOP_VINFO_MAIN_EXIT (loop_vinfo), loop_vinfo,
- niters_vector, step_vector, niters_vector_mult_vf,
- !niters_no_overflow);
+ if (!uncounted_p)
+ {
+ /* The vectorization factor is always > 1, so if we use an IV increment
of
+ 1. A zero NITERS becomes a nonzero NITERS_VECTOR. */
+ if (integer_onep (step_vector))
+ niters_no_overflow = true;
+
+ vect_set_loop_condition (loop, LOOP_VINFO_MAIN_EXIT (loop_vinfo),
+ loop_vinfo, niters_vector, step_vector,
+ niters_vector_mult_vf, !niters_no_overflow);
+ }
unsigned int assumed_vf = vect_vf_for_cost (loop_vinfo);
--
2.43.0