Hi,
This patch checks if loop has enough niters for peeling for data access gaps in
vect_analyze_loop_2, while now this check is in vect_transform_loop stage. The
problem is vectorizer may vectorize loops without enough iterations and
generate false guard on the vectorized loop. Though the loop is successfully
vectorized, it will never be executed, and most likely, it will be removed
during cfg-cleanup. Examples can be found in revised tests of this patch.
Thanks,
bin
2016-09-01 Bin Cheng <bin.ch...@arm.com>
* tree-vect-loop.c (vect_analyze_loop_2): Check and skip loop if it
has no enough iterations for LOOP_VINFO_PEELING_FOR_GAPS.
gcc/testsuite/ChangeLog
2016-09-01 Bin Cheng <bin.ch...@arm.com>
* gcc.dg/vect/vect-98.c: Refine test case.
* gcc.dg/vect/vect-strided-a-u8-i8-gap2.c: Ditto.
* gcc.dg/vect/vect-strided-u8-i8-gap2.c: Ditto.
* gcc.dg/vect/vect-strided-u8-i8-gap4.c: Ditto.
diff --git a/gcc/testsuite/gcc.dg/vect/vect-98.c
b/gcc/testsuite/gcc.dg/vect/vect-98.c
index 99256a7..2055cce 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-98.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-98.c
@@ -3,6 +3,7 @@
#include <stdarg.h>
#include "tree-vect.h"
+#define M 8
#define N 4
#define DOT4( a, b ) ( a[0]*b[0] + a[1]*b[1] + a[2]*b[2] + a[3]*b[3] )
@@ -11,15 +12,15 @@ int main1 (int ia[][N])
{
int i, j;
int ib[N] = {0,3,6,9};
- int ic[N][N];
+ int ic[M][M];
- for (i = 0; i < N; i++)
+ for (i = 0; i < M; i++)
{
ic[0][i] = DOT4 (ia[i], ib);
}
/* check results: */
- for (i = 0; i < N; i++)
+ for (i = 0; i < M; i++)
{
if (ic[0][i] != DOT4 (ia[i], ib))
abort();
@@ -30,7 +31,8 @@ int main1 (int ia[][N])
int main (void)
{
- int ia[N][N] = {{1,2,3,4},{2,3,5,7},{2,4,6,8},{22,43,55,77}};
+ int ia[M][N] = {{1,2,3,4},{2,3,5,7},{2,4,6,8},{22,43,55,77},
+ {13,17,19,23},{29,31,37,41},{3,7,2,1},{4,9,8,3}};
check_vect ();
diff --git a/gcc/testsuite/gcc.dg/vect/vect-strided-a-u8-i8-gap2.c
b/gcc/testsuite/gcc.dg/vect/vect-strided-a-u8-i8-gap2.c
index 42ed2b7..24c7cc3 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-strided-a-u8-i8-gap2.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-strided-a-u8-i8-gap2.c
@@ -3,7 +3,7 @@
#include <stdarg.h>
#include "tree-vect.h"
-#define N 16
+#define N 24
typedef struct {
unsigned char a;
diff --git a/gcc/testsuite/gcc.dg/vect/vect-strided-u8-i8-gap2.c
b/gcc/testsuite/gcc.dg/vect/vect-strided-u8-i8-gap2.c
index dddce85..23cea24 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-strided-u8-i8-gap2.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-strided-u8-i8-gap2.c
@@ -3,7 +3,7 @@
#include <stdarg.h>
#include "tree-vect.h"
-#define N 16
+#define N 24
typedef struct {
unsigned char a;
diff --git a/gcc/testsuite/gcc.dg/vect/vect-strided-u8-i8-gap4.c
b/gcc/testsuite/gcc.dg/vect/vect-strided-u8-i8-gap4.c
index 6face14..1b36df5 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-strided-u8-i8-gap4.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-strided-u8-i8-gap4.c
@@ -3,7 +3,7 @@
#include <stdarg.h>
#include "tree-vect.h"
-#define N 16
+#define N 24
typedef struct {
unsigned char a;
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
index 45e18af..03ece95 100644
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -2061,6 +2061,25 @@ start_over:
return false;
}
+ /* If epilog loop is required because of data accesses with gaps,
+ one additional iteration needs to be peeled. Check if there is
+ enough iterations for vectorization. */
+ if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
+ && LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
+ {
+ int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
+ tree scalar_niters = LOOP_VINFO_NITERSM1 (loop_vinfo);
+
+ if (wi::to_widest (scalar_niters) < vf)
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "loop has no enough iterations to support"
+ " peeling for gaps.\n");
+ return false;
+ }
+ }
+
/* Analyze cost. Decide if worth while to vectorize. */
int min_profitable_estimate, min_profitable_iters;
vect_estimate_min_profitable_iters (loop_vinfo, &min_profitable_iters,