The loop
void
__attribute__((noipa))
bug (int f, int *w, int l)
{
int i;
for (i = 0; i < l; ++i)
while (f % w[i]) w[i]--;
}
is an uncounted loop which performs a trapping operation during vectorization.
Normally the vectorizer doesn't stop vectorization if the operation can't be
masked (though ifcvt does).
For Early breaks however this is unsafe as we are introducing a trap where the
original scalar code may not have.
reductions however don't require masking when the scalar epilogue is used to
restart the last iteration as we use the previous value of the vector in that
case and don't reduce "inactive" lanes. This would need to be adjusted when we
support staying inside the vector loop directly.
Some tests now fail to vectorize (including some tsvc ones) which I could have
xfail'ed but instead decided to add -fno-trapping-math just to get the
additional coverage they provide.
Bootstrapped Regtested on aarch64-none-linux-gnu,
arm-none-linux-gnueabihf, x86_64-pc-linux-gnu
-m32, -m64 and no issues.
Any objections?
Thanks,
Tamar
gcc/ChangeLog:
PR tree-optimization/124142
* tree-vect-stmts.cc (vectorizable_call, vectorizable_operation):
For early break require masking when operation can trap.
gcc/testsuite/ChangeLog:
PR tree-optimization/124142
* gcc.dg/vect/tsvc/vect-tsvc-s481.c: Add -fno-trapping-math.
* gcc.dg/vect/tsvc/vect-tsvc-s482.c: Likewise.
* gcc.dg/vect/vect-early-break_61.c: Likewise.
* gcc.target/aarch64/vect-early-break-cbranch_3.c: Likewise.
* gcc.dg/vect/vect-early-break_143-pr124142.c: New test.
---
diff --git a/gcc/testsuite/gcc.dg/vect/tsvc/vect-tsvc-s481.c
b/gcc/testsuite/gcc.dg/vect/tsvc/vect-tsvc-s481.c
index
e4433385d6686806a75fffe22f90e3bfb603564c..23c9961691d2f0c9528949c5c44dae3e264dc3ec
100644
--- a/gcc/testsuite/gcc.dg/vect/tsvc/vect-tsvc-s481.c
+++ b/gcc/testsuite/gcc.dg/vect/tsvc/vect-tsvc-s481.c
@@ -1,7 +1,7 @@
/* This file is distributed under the University of Illinois Open Source
License. See license.txt for details. */
-/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
+/* { dg-additional-options "--param vect-epilogues-nomask=0
-fno-trapping-math" } */
/* { dg-require-effective-target vect_float } */
/* { dg-require-effective-target vect_early_break_hw } */
/* { dg-add-options vect_early_break } */
@@ -41,4 +41,4 @@ int main (int argc, char **argv)
return 0;
}
-/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" { xfail { !
vect_early_break} } } } */
+/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" { xfail { !
vect_early_break } } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/tsvc/vect-tsvc-s482.c
b/gcc/testsuite/gcc.dg/vect/tsvc/vect-tsvc-s482.c
index
146df409ecc64e9535583c0d0083469d7aa24031..38426845a3d115432bfc68d9e1e7539fe10e27e2
100644
--- a/gcc/testsuite/gcc.dg/vect/tsvc/vect-tsvc-s482.c
+++ b/gcc/testsuite/gcc.dg/vect/tsvc/vect-tsvc-s482.c
@@ -1,7 +1,7 @@
/* This file is distributed under the University of Illinois Open Source
License. See license.txt for details. */
-/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
+/* { dg-additional-options "--param vect-epilogues-nomask=0
-fno-trapping-math" } */
/* { dg-require-effective-target vect_float } */
/* { dg-require-effective-target vect_early_break_hw } */
/* { dg-add-options vect_early_break } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_143-pr124142.c
b/gcc/testsuite/gcc.dg/vect/vect-early-break_143-pr124142.c
new file mode 100644
index
0000000000000000000000000000000000000000..b9141e3f15a9d152118c2d8e84fc45d5e83066cb
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_143-pr124142.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-add-options vect_early_break } */
+/* { dg-require-effective-target vect_early_break } */
+/* { dg-require-effective-target vect_int } */
+
+/* { dg-final { scan-tree-dump-not "LOOP VECTORIZED" "vect" } } */
+
+void
+__attribute__((noipa))
+bug (int f, int *w, int l)
+{
+ int i;
+ for (i = 0; i < l; ++i)
+ while (f % w[i]) w[i]--;
+}
+
diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_61.c
b/gcc/testsuite/gcc.dg/vect/vect-early-break_61.c
index
c789ec01f32c6b958c6a3663531a7b7517b94477..0cd20549cdc656baad2b5561e39ca999bca98bb0
100644
--- a/gcc/testsuite/gcc.dg/vect/vect-early-break_61.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_61.c
@@ -2,6 +2,7 @@
/* { dg-do compile } */
/* { dg-require-effective-target vect_early_break } */
/* { dg-require-effective-target vect_float } */
+/* { dg-additional-options "-fno-trapping-math" } */
typedef float real_t;
__attribute__((aligned(64))) real_t a[32000], b[32000], c[32000];
diff --git a/gcc/testsuite/gcc.target/aarch64/vect-early-break-cbranch_3.c
b/gcc/testsuite/gcc.target/aarch64/vect-early-break-cbranch_3.c
index
8980b9f04f9a15ded9e90954e25b2c0578681761..b252b80ef291efd4f6fc98224a44ef7fadc68d6c
100644
--- a/gcc/testsuite/gcc.target/aarch64/vect-early-break-cbranch_3.c
+++ b/gcc/testsuite/gcc.target/aarch64/vect-early-break-cbranch_3.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O3 -fno-schedule-insns -fno-reorder-blocks
-fno-schedule-insns2 --param aarch64-autovec-preference=asimd-only" } */
+/* { dg-options "-O3 -fno-trapping-math -fno-schedule-insns
-fno-reorder-blocks -fno-schedule-insns2 --param
aarch64-autovec-preference=asimd-only" } */
/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
#pragma GCC target "+sve"
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index
22285250aa8d4f600721c647a83d1b2bafe7ef2a..cb7217ad6f92ad4b2bd543733c802606f3fba03e
100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -3650,6 +3650,18 @@ vectorizable_call (vec_info *vinfo,
LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
}
}
+
+ /* If the operation traps, and we're an early break loop then don't allow
+ vectorization if masking isn't supported. */
+ if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "can't use a fully-masked loop and loop has "
+ "multiple exits. Cannot vectorize as operation "
+ "may trap.\n");
+ return false;
+ }
}
/* If that fails, try asking for a target-specific built-in function. */
@@ -6792,7 +6804,8 @@ vectorizable_operation (vec_info *vinfo,
Similarly, if this operation is part of a reduction, a fully-masked
loop should only change the active lanes of the reduction chain,
keeping the inactive lanes as-is. */
- bool mask_out_inactive = ((!is_invariant && gimple_could_trap_p (stmt))
+ bool could_trap_p = gimple_could_trap_p (stmt);
+ bool mask_out_inactive = ((!is_invariant && could_trap_p)
|| reduc_idx >= 0);
if (cost_vec) /* transformation not required. */
@@ -6821,6 +6834,23 @@ vectorizable_operation (vec_info *vinfo,
}
}
+ /* If the operation traps, and we're an early break loop then don't allow
+ vectorization if masking isn't supported. Reductions are OK because if
+ we take an early exit, the epilog will get the value of the previous
+ iterations and we recompute the remainder. */
+ if (loop_vinfo
+ && could_trap_p
+ && !LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
+ && LOOP_VINFO_EARLY_BREAKS (loop_vinfo))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "can't use a fully-masked loop and loop has "
+ "multiple exits. Cannot vectorize as operation "
+ "may trap.\n");
+ return false;
+ }
+
/* Put types on constant and invariant SLP children. */
if (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
|| !vect_maybe_update_slp_op_vectype (slp_op1, vectype)
--
diff --git a/gcc/testsuite/gcc.dg/vect/tsvc/vect-tsvc-s481.c b/gcc/testsuite/gcc.dg/vect/tsvc/vect-tsvc-s481.c
index e4433385d6686806a75fffe22f90e3bfb603564c..23c9961691d2f0c9528949c5c44dae3e264dc3ec 100644
--- a/gcc/testsuite/gcc.dg/vect/tsvc/vect-tsvc-s481.c
+++ b/gcc/testsuite/gcc.dg/vect/tsvc/vect-tsvc-s481.c
@@ -1,7 +1,7 @@
/* This file is distributed under the University of Illinois Open Source
License. See license.txt for details. */
-/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
+/* { dg-additional-options "--param vect-epilogues-nomask=0 -fno-trapping-math" } */
/* { dg-require-effective-target vect_float } */
/* { dg-require-effective-target vect_early_break_hw } */
/* { dg-add-options vect_early_break } */
@@ -41,4 +41,4 @@ int main (int argc, char **argv)
return 0;
}
-/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" { xfail { ! vect_early_break} } } } */
+/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" { xfail { ! vect_early_break } } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/tsvc/vect-tsvc-s482.c b/gcc/testsuite/gcc.dg/vect/tsvc/vect-tsvc-s482.c
index 146df409ecc64e9535583c0d0083469d7aa24031..38426845a3d115432bfc68d9e1e7539fe10e27e2 100644
--- a/gcc/testsuite/gcc.dg/vect/tsvc/vect-tsvc-s482.c
+++ b/gcc/testsuite/gcc.dg/vect/tsvc/vect-tsvc-s482.c
@@ -1,7 +1,7 @@
/* This file is distributed under the University of Illinois Open Source
License. See license.txt for details. */
-/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
+/* { dg-additional-options "--param vect-epilogues-nomask=0 -fno-trapping-math" } */
/* { dg-require-effective-target vect_float } */
/* { dg-require-effective-target vect_early_break_hw } */
/* { dg-add-options vect_early_break } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_143-pr124142.c b/gcc/testsuite/gcc.dg/vect/vect-early-break_143-pr124142.c
new file mode 100644
index 0000000000000000000000000000000000000000..b9141e3f15a9d152118c2d8e84fc45d5e83066cb
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_143-pr124142.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-add-options vect_early_break } */
+/* { dg-require-effective-target vect_early_break } */
+/* { dg-require-effective-target vect_int } */
+
+/* { dg-final { scan-tree-dump-not "LOOP VECTORIZED" "vect" } } */
+
+void
+__attribute__((noipa))
+bug (int f, int *w, int l)
+{
+ int i;
+ for (i = 0; i < l; ++i)
+ while (f % w[i]) w[i]--;
+}
+
diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_61.c b/gcc/testsuite/gcc.dg/vect/vect-early-break_61.c
index c789ec01f32c6b958c6a3663531a7b7517b94477..0cd20549cdc656baad2b5561e39ca999bca98bb0 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-early-break_61.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_61.c
@@ -2,6 +2,7 @@
/* { dg-do compile } */
/* { dg-require-effective-target vect_early_break } */
/* { dg-require-effective-target vect_float } */
+/* { dg-additional-options "-fno-trapping-math" } */
typedef float real_t;
__attribute__((aligned(64))) real_t a[32000], b[32000], c[32000];
diff --git a/gcc/testsuite/gcc.target/aarch64/vect-early-break-cbranch_3.c b/gcc/testsuite/gcc.target/aarch64/vect-early-break-cbranch_3.c
index 8980b9f04f9a15ded9e90954e25b2c0578681761..b252b80ef291efd4f6fc98224a44ef7fadc68d6c 100644
--- a/gcc/testsuite/gcc.target/aarch64/vect-early-break-cbranch_3.c
+++ b/gcc/testsuite/gcc.target/aarch64/vect-early-break-cbranch_3.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O3 -fno-schedule-insns -fno-reorder-blocks -fno-schedule-insns2 --param aarch64-autovec-preference=asimd-only" } */
+/* { dg-options "-O3 -fno-trapping-math -fno-schedule-insns -fno-reorder-blocks -fno-schedule-insns2 --param aarch64-autovec-preference=asimd-only" } */
/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
#pragma GCC target "+sve"
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index 22285250aa8d4f600721c647a83d1b2bafe7ef2a..cb7217ad6f92ad4b2bd543733c802606f3fba03e 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -3650,6 +3650,18 @@ vectorizable_call (vec_info *vinfo,
LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
}
}
+
+ /* If the operation traps, and we're an early break loop then don't allow
+ vectorization if masking isn't supported. */
+ if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "can't use a fully-masked loop and loop has "
+ "multiple exits. Cannot vectorize as operation "
+ "may trap.\n");
+ return false;
+ }
}
/* If that fails, try asking for a target-specific built-in function. */
@@ -6792,7 +6804,8 @@ vectorizable_operation (vec_info *vinfo,
Similarly, if this operation is part of a reduction, a fully-masked
loop should only change the active lanes of the reduction chain,
keeping the inactive lanes as-is. */
- bool mask_out_inactive = ((!is_invariant && gimple_could_trap_p (stmt))
+ bool could_trap_p = gimple_could_trap_p (stmt);
+ bool mask_out_inactive = ((!is_invariant && could_trap_p)
|| reduc_idx >= 0);
if (cost_vec) /* transformation not required. */
@@ -6821,6 +6834,23 @@ vectorizable_operation (vec_info *vinfo,
}
}
+ /* If the operation traps, and we're an early break loop then don't allow
+ vectorization if masking isn't supported. Reductions are OK because if
+ we take an early exit, the epilog will get the value of the previous
+ iterations and we recompute the remainder. */
+ if (loop_vinfo
+ && could_trap_p
+ && !LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
+ && LOOP_VINFO_EARLY_BREAKS (loop_vinfo))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "can't use a fully-masked loop and loop has "
+ "multiple exits. Cannot vectorize as operation "
+ "may trap.\n");
+ return false;
+ }
+
/* Put types on constant and invariant SLP children. */
if (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
|| !vect_maybe_update_slp_op_vectype (slp_op1, vectype)