Hi,
When loop versioning is required in vectorization, we can merge niter check for
vect
peeling with the check for loop versioning, thus save one check/branch for
vectorized
loop.
Is it OK?
Thanks,
bin
2017-04-11 Bin Cheng <bin.ch...@arm.com>
* tree-vect-loop-manip.c (vect_do_peeling): Don't skip vector loop
if versioning is required.
* tree-vect-loop.c (vect_analyze_loop_2): Merge niter check for loop
peeling with the check for versioning.
From bd54e2524a4047328ba4847ad013db2bbe5850fe Mon Sep 17 00:00:00 2001
From: Bin Cheng <binch...@e108451-lin.cambridge.arm.com>
Date: Thu, 16 Mar 2017 16:40:50 +0000
Subject: [PATCH 32/33] save-vect_peeling-niters-check-20170225.txt
---
gcc/tree-vect-loop-manip.c | 8 +++++---
gcc/tree-vect-loop.c | 30 ++++++++++++++++++++++++++++++
2 files changed, 35 insertions(+), 3 deletions(-)
diff --git a/gcc/tree-vect-loop-manip.c b/gcc/tree-vect-loop-manip.c
index 0fc8cd3..0ff474d 100644
--- a/gcc/tree-vect-loop-manip.c
+++ b/gcc/tree-vect-loop-manip.c
@@ -1686,9 +1686,11 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters,
tree nitersm1,
/* Prolog loop may be skipped. */
bool skip_prolog = (prolog_peeling != 0);
- /* Skip to epilog if scalar loop may be preferred. It's only used when
- we peel for epilog loop. */
- bool skip_vector = (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo));
+ /* Skip to epilog if scalar loop may be preferred. It's only needed
+ when we peel for epilog loop and when it hasn't been checked with
+ loop versioning. */
+ bool skip_vector = (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
+ && !LOOP_REQUIRES_VERSIONING (loop_vinfo));
/* Epilog loop must be executed if the number of iterations for epilog
loop is known at compile time, otherwise we need to add a check at
the end of vector loop and skip to the end of epilog loop. */
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
index af874e7..98caa5e 100644
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -2214,6 +2214,36 @@ start_over:
}
}
+ /* During peeling, we need to check if number of loop iterations is
+ enough for both peeled prolog loop and vector loop. This check
+ can be merged along with threshold check of loop versioning, so
+ increase threshold for this case if necessary. */
+ if (LOOP_REQUIRES_VERSIONING (loop_vinfo)
+ && (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
+ || LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo)))
+ {
+ unsigned niters_th;
+
+ /* Niters for peeled prolog loop. */
+ if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) < 0)
+ {
+ struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
+ tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr)));
+
+ niters_th = TYPE_VECTOR_SUBPARTS (vectype) - 1;
+ }
+ else
+ niters_th = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
+
+ /* Niters for at least one iteration of vectorized loop. */
+ niters_th += LOOP_VINFO_VECT_FACTOR (loop_vinfo);
+ /* One additional iteration because of peeling for gap. */
+ if (!LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
+ niters_th++;
+ if (LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo) < niters_th)
+ LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo) = niters_th;
+ }
+
gcc_assert (vectorization_factor
== (unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo));
--
1.9.1