On Fri, Nov 11, 2011 at 06:06:18PM +0100, Jakub Jelinek wrote:
> > Please add
> > ! { dg-final { cleanup-tree-dump "vect" } }
> >
> > OK otherwise.
>
> This is not a /vect/ testcase, but fortran torture. I guess
> if you really want I could move it over to gfortran.dg/vect/ instead,
> then the ! { dg-final { cleanup-tree-dump "vect" } }
> would be indeed needed there.
That would be following, incrementally tested with
make check-gfortran RUNTESTFLAGS='--target_board=unix\{-m32,-m64\}
vect.exp=pr51*'
with both unpatched and patched f951.
2011-11-11 Jakub Jelinek <[email protected]>
PR tree-optimization/51058
* tree-vect-slp.c (vect_remove_slp_scalar_calls): New function.
(vect_schedule_slp): Call it.
* tree-vect-stmts.c (vectorizable_call): If slp_node != NULL,
don't replace scalar calls with clearing of their lhs here.
* gcc.dg/vect/fast-math-vect-call-1.c: Add f4 test.
* gfortran.dg/vect/pr51058-2.f90: New test.
--- gcc/tree-vect-slp.c.jj 2011-11-11 16:02:40.475359160 +0100
+++ gcc/tree-vect-slp.c 2011-11-11 18:08:29.784708271 +0100
@@ -2902,6 +2902,46 @@ vect_schedule_slp_instance (slp_tree nod
return is_store;
}
+/* Replace scalar calls from SLP node NODE with clearing of their lhs.
+ For loop vectorization this is done in vectorizable_call, but for SLP
+ it needs to be deferred until end of vect_schedule_slp, because multiple
+ SLP instances may refer to the same scalar stmt. */
+
+static void
+vect_remove_slp_scalar_calls (slp_tree node)
+{
+ gimple stmt, new_stmt;
+ gimple_stmt_iterator gsi;
+ int i;
+ slp_void_p child;
+ tree lhs;
+ stmt_vec_info stmt_info;
+
+ if (!node)
+ return;
+
+ FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
+ vect_remove_slp_scalar_calls ((slp_tree) child);
+
+ FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
+ {
+ if (!is_gimple_call (stmt) || gimple_bb (stmt) == NULL)
+ continue;
+ stmt_info = vinfo_for_stmt (stmt);
+ if (stmt_info == NULL
+ || is_pattern_stmt_p (stmt_info)
+ || !PURE_SLP_STMT (stmt_info))
+ continue;
+ lhs = gimple_call_lhs (stmt);
+ new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
+ set_vinfo_for_stmt (new_stmt, stmt_info);
+ set_vinfo_for_stmt (stmt, NULL);
+ STMT_VINFO_STMT (stmt_info) = new_stmt;
+ gsi = gsi_for_stmt (stmt);
+ gsi_replace (&gsi, new_stmt, false);
+ SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
+ }
+}
/* Generate vector code for all SLP instances in the loop/basic block. */
@@ -2941,6 +2981,8 @@ vect_schedule_slp (loop_vec_info loop_vi
unsigned int j;
gimple_stmt_iterator gsi;
+ vect_remove_slp_scalar_calls (root);
+
for (j = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (root), j, store)
&& j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
{
--- gcc/tree-vect-stmts.c.jj 2011-11-11 16:02:28.343433924 +0100
+++ gcc/tree-vect-stmts.c 2011-11-11 18:08:29.786708241 +0100
@@ -1886,6 +1886,9 @@ vectorizable_call (gimple stmt, gimple_s
it defines is mapped to the new definition. So just replace
rhs of the statement with something harmless. */
+ if (slp_node)
+ return true;
+
type = TREE_TYPE (scalar_dest);
if (is_pattern_stmt_p (stmt_info))
lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
@@ -1893,8 +1896,7 @@ vectorizable_call (gimple stmt, gimple_s
lhs = gimple_call_lhs (stmt);
new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
set_vinfo_for_stmt (new_stmt, stmt_info);
- if (!slp_node)
- set_vinfo_for_stmt (stmt, NULL);
+ set_vinfo_for_stmt (stmt, NULL);
STMT_VINFO_STMT (stmt_info) = new_stmt;
gsi_replace (gsi, new_stmt, false);
SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
--- gcc/testsuite/gcc.dg/vect/fast-math-vect-call-1.c.jj 2011-11-11
16:02:28.026435857 +0100
+++ gcc/testsuite/gcc.dg/vect/fast-math-vect-call-1.c 2011-11-11
18:08:29.792708207 +0100
@@ -38,6 +38,18 @@ f3 (void)
a[i] = copysignf (b[i], c[i]) + 1.0f + sqrtf (d[i]);
}
+__attribute__((noinline, noclone)) void
+f4 (int n)
+{
+ int i;
+ for (i = 0; i < 2 * n; i++)
+ {
+ a[3 * i + 0] = copysignf (b[3 * i + 0], c[3 * i + 0]) + 1.0f + sqrtf
(d[3 * i + 0]);
+ a[3 * i + 1] = copysignf (b[3 * i + 1], c[3 * i + 1]) + 2.0f + sqrtf
(d[3 * i + 1]);
+ a[3 * i + 2] = copysignf (b[3 * i + 2], c[3 * i + 2]) + 3.0f + sqrtf
(d[3 * i + 2]);
+ }
+}
+
__attribute__((noinline, noclone)) int
main1 ()
{
@@ -66,6 +78,12 @@ main1 ()
for (i = 0; i < 64; i++)
if (fabsf (((i & 2) ? -4 * i : 4 * i) + 1 + i - a[i]) >= 0.0001f)
abort ();
+ else
+ a[i] = 131.25;
+ f4 (10);
+ for (i = 0; i < 60; i++)
+ if (fabsf (((i & 2) ? -4 * i : 4 * i) + 1 + (i % 3) + i - a[i]) >= 0.0001f)
+ abort ();
return 0;
}
@@ -76,6 +94,6 @@ main ()
return main1 ();
}
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 3 "vect" { target {
vect_call_copysignf && vect_call_sqrtf } } } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" {
target { vect_call_copysignf && vect_call_sqrtf } } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 4 "vect" { target {
vect_call_copysignf && vect_call_sqrtf } } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" {
target { vect_call_copysignf && vect_call_sqrtf } } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
--- gcc/testsuite/gfortran.dg/vect/pr51058-2.f90.jj 2011-11-11
18:08:29.792708207 +0100
+++ gcc/testsuite/gfortran.dg/vect/pr51058-2.f90 2011-11-11
18:09:22.378400344 +0100
@@ -0,0 +1,20 @@
+! PR tree-optimization/51058
+! { dg-do compile }
+subroutine pr51058(n, u, v, w, z)
+ double precision :: x(3,-2:16384), y(3,-2:16384), b, u, v, w, z
+ integer :: i, n
+ common /c/ x, y
+ do i = 1, n
+ b = u * int(x(1,i)) + sign(z,x(1,i))
+ x(1,i) = x(1,i) - b
+ y(1,i) = y(1,i) - b
+ b = v * int(x(2,i)) + sign(z,x(2,i))
+ x(2,i) = x(2,i) - b
+ y(2,i) = y(2,i) - b
+ b = w * int(x(3,i)) + sign(z,x(3,i))
+ x(3,i) = x(3,i) - b
+ y(3,i) = y(3,i) - b
+ end do
+end subroutine
+
+! { dg-final { cleanup-tree-dump "vect" } }
Jakub