The following removes an optimization that wrongly triggers right now
because it accesses LOOP_VINFO_COST_MODEL_THRESHOLD which might not be
computed yet and uses guessed likely max stmt executions.
It also refactors the code to make the default conservative.
Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed.
PR tree-optimization/120939
* tree-vect-loop.cc (vect_need_peeling_or_partial_vectors_p):
Remove eliding an epilogue based on not computed
LOOP_VINFO_COST_MODEL_THRESHOLD and estimated max stmt executions.
* gcc.dg/torture/pr113026-1.c: Skip when -ftracer.
---
gcc/testsuite/gcc.dg/torture/pr113026-1.c | 4 +-
gcc/tree-vect-loop.cc | 45 ++++++++---------------
2 files changed, 18 insertions(+), 31 deletions(-)
diff --git a/gcc/testsuite/gcc.dg/torture/pr113026-1.c
b/gcc/testsuite/gcc.dg/torture/pr113026-1.c
index 56dfef3b36c..37b5281d547 100644
--- a/gcc/testsuite/gcc.dg/torture/pr113026-1.c
+++ b/gcc/testsuite/gcc.dg/torture/pr113026-1.c
@@ -1,4 +1,6 @@
-/* { dg-do compile } */
+/* { dg-do compile } */
+/* When tracing the vector epilog we diagnose an unreachable access. */
+/* { dg-skip-if "" { *-*-* } { "-ftracer" } { "" } } */
/* { dg-additional-options "-Wall" } */
char dst[16];
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index 5fab4b8dac5..cd62c96892b 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -945,13 +945,6 @@ static bool
vect_need_peeling_or_partial_vectors_p (loop_vec_info loop_vinfo)
{
unsigned HOST_WIDE_INT const_vf;
- HOST_WIDE_INT max_niter
- = likely_max_stmt_executions_int (LOOP_VINFO_LOOP (loop_vinfo));
-
- unsigned th = LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo);
- if (!th && LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo))
- th = LOOP_VINFO_COST_MODEL_THRESHOLD (LOOP_VINFO_ORIG_LOOP_INFO
- (loop_vinfo));
loop_vec_info main_loop_vinfo
= (LOOP_VINFO_EPILOGUE_P (loop_vinfo)
@@ -965,31 +958,23 @@ vect_need_peeling_or_partial_vectors_p (loop_vec_info
loop_vinfo)
= LOOP_VINFO_PEELING_FOR_ALIGNMENT (main_loop_vinfo);
if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
peel_niter += 1;
- if (!multiple_p (LOOP_VINFO_INT_NITERS (loop_vinfo) - peel_niter,
- LOOP_VINFO_VECT_FACTOR (loop_vinfo)))
- return true;
+ return !multiple_p (LOOP_VINFO_INT_NITERS (loop_vinfo) - peel_niter,
+ LOOP_VINFO_VECT_FACTOR (loop_vinfo));
}
- else if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (main_loop_vinfo)
- /* ??? When peeling for gaps but not alignment, we could
- try to check whether the (variable) niters is known to be
- VF * N + 1. That's something of a niche case though. */
- || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
- || !LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&const_vf)
- || ((tree_ctz (LOOP_VINFO_NITERS (loop_vinfo))
- < (unsigned) exact_log2 (const_vf))
- /* In case of versioning, check if the maximum number of
- iterations is greater than th. If they are identical,
- the epilogue is unnecessary. */
- && (!LOOP_REQUIRES_VERSIONING (loop_vinfo)
- || ((unsigned HOST_WIDE_INT) max_niter
- /* We'd like to use LOOP_VINFO_VERSIONING_THRESHOLD
- but that's only computed later based on our result.
- The following is the most conservative approximation. */
- > (std::max ((unsigned HOST_WIDE_INT) th,
- const_vf) / const_vf) * const_vf))))
- return true;
- return false;
+ if (!LOOP_VINFO_PEELING_FOR_ALIGNMENT (main_loop_vinfo)
+ && !LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
+ && LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&const_vf))
+ {
+ /* When the number of iterations is a multiple of the vectorization
+ factor and we are not doing prologue or forced epilogue peeling
+ the epilogue isn't necessary. */
+ if (tree_ctz (LOOP_VINFO_NITERS (loop_vinfo))
+ >= (unsigned) exact_log2 (const_vf))
+ return false;
+ }
+
+ return true;
}
/* Each statement in LOOP_VINFO can be masked where necessary. Check
--
2.51.0