On 11 November 2011 19:06, Jakub Jelinek <ja...@redhat.com> wrote: > On Fri, Nov 11, 2011 at 06:57:58PM +0200, Ira Rosen wrote: >> On 11 November 2011 17:32, Jakub Jelinek <ja...@redhat.com> wrote: >> > 2011-11-11 Jakub Jelinek <ja...@redhat.com> >> > >> > PR tree-optimization/51058 >> > * tree-vect-slp.c (vect_remove_slp_scalar_calls): New function. >> > (vect_schedule_slp): Call it. >> > * tree-vect-stmts.c (vectorizable_call): If slp_node != NULL, >> > don't replace scalar calls with clearing of their lhs here. >> >> I think it's rhs. > > I think it is lhs. The scalar call is > lhs = __builtin_copysign (arg1, arg2); > etc. and we transform it to > lhs = 0.0;
I still think it's clearing of rhs, but this is not really important :) On 11 November 2011 19:13, Jakub Jelinek <ja...@redhat.com> wrote: > On Fri, Nov 11, 2011 at 06:06:18PM +0100, Jakub Jelinek wrote: >> > Please add >> > ! { dg-final { cleanup-tree-dump "vect" } } >> > >> > OK otherwise. >> >> This is not a /vect/ testcase, but fortran torture. Ah, sorry, indeed I thought it's a vect test. >> I guess >> if you really want I could move it over to gfortran.dg/vect/ instead, >> then the ! { dg-final { cleanup-tree-dump "vect" } } >> would be indeed needed there. > > That would be following, incrementally tested with > make check-gfortran RUNTESTFLAGS='--target_board=unix\{-m32,-m64\} > vect.exp=pr51*' > with both unpatched and patched f951. > > 2011-11-11 Jakub Jelinek <ja...@redhat.com> > > PR tree-optimization/51058 > * tree-vect-slp.c (vect_remove_slp_scalar_calls): New function. > (vect_schedule_slp): Call it. > * tree-vect-stmts.c (vectorizable_call): If slp_node != NULL, > don't replace scalar calls with clearing of their lhs here. > > * gcc.dg/vect/fast-math-vect-call-1.c: Add f4 test. > * gfortran.dg/vect/pr51058-2.f90: New test. Looks good. Thanks, Ira > > --- gcc/tree-vect-slp.c.jj 2011-11-11 16:02:40.475359160 +0100 > +++ gcc/tree-vect-slp.c 2011-11-11 18:08:29.784708271 +0100 > @@ -2902,6 +2902,46 @@ vect_schedule_slp_instance (slp_tree nod > return is_store; > } > > +/* Replace scalar calls from SLP node NODE with clearing of their lhs. > + For loop vectorization this is done in vectorizable_call, but for SLP > + it needs to be deferred until end of vect_schedule_slp, because multiple > + SLP instances may refer to the same scalar stmt. */ > + > +static void > +vect_remove_slp_scalar_calls (slp_tree node) > +{ > + gimple stmt, new_stmt; > + gimple_stmt_iterator gsi; > + int i; > + slp_void_p child; > + tree lhs; > + stmt_vec_info stmt_info; > + > + if (!node) > + return; > + > + FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child) > + vect_remove_slp_scalar_calls ((slp_tree) child); > + > + FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt) > + { > + if (!is_gimple_call (stmt) || gimple_bb (stmt) == NULL) > + continue; > + stmt_info = vinfo_for_stmt (stmt); > + if (stmt_info == NULL > + || is_pattern_stmt_p (stmt_info) > + || !PURE_SLP_STMT (stmt_info)) > + continue; > + lhs = gimple_call_lhs (stmt); > + new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs))); > + set_vinfo_for_stmt (new_stmt, stmt_info); > + set_vinfo_for_stmt (stmt, NULL); > + STMT_VINFO_STMT (stmt_info) = new_stmt; > + gsi = gsi_for_stmt (stmt); > + gsi_replace (&gsi, new_stmt, false); > + SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt; > + } > +} > > /* Generate vector code for all SLP instances in the loop/basic block. */ > > @@ -2941,6 +2981,8 @@ vect_schedule_slp (loop_vec_info loop_vi > unsigned int j; > gimple_stmt_iterator gsi; > > + vect_remove_slp_scalar_calls (root); > + > for (j = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (root), j, store) > && j < SLP_INSTANCE_GROUP_SIZE (instance); j++) > { > --- gcc/tree-vect-stmts.c.jj 2011-11-11 16:02:28.343433924 +0100 > +++ gcc/tree-vect-stmts.c 2011-11-11 18:08:29.786708241 +0100 > @@ -1886,6 +1886,9 @@ vectorizable_call (gimple stmt, gimple_s > it defines is mapped to the new definition. So just replace > rhs of the statement with something harmless. */ > > + if (slp_node) > + return true; > + > type = TREE_TYPE (scalar_dest); > if (is_pattern_stmt_p (stmt_info)) > lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info)); > @@ -1893,8 +1896,7 @@ vectorizable_call (gimple stmt, gimple_s > lhs = gimple_call_lhs (stmt); > new_stmt = gimple_build_assign (lhs, build_zero_cst (type)); > set_vinfo_for_stmt (new_stmt, stmt_info); > - if (!slp_node) > - set_vinfo_for_stmt (stmt, NULL); > + set_vinfo_for_stmt (stmt, NULL); > STMT_VINFO_STMT (stmt_info) = new_stmt; > gsi_replace (gsi, new_stmt, false); > SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt; > --- gcc/testsuite/gcc.dg/vect/fast-math-vect-call-1.c.jj 2011-11-11 > 16:02:28.026435857 +0100 > +++ gcc/testsuite/gcc.dg/vect/fast-math-vect-call-1.c 2011-11-11 > 18:08:29.792708207 +0100 > @@ -38,6 +38,18 @@ f3 (void) > a[i] = copysignf (b[i], c[i]) + 1.0f + sqrtf (d[i]); > } > > +__attribute__((noinline, noclone)) void > +f4 (int n) > +{ > + int i; > + for (i = 0; i < 2 * n; i++) > + { > + a[3 * i + 0] = copysignf (b[3 * i + 0], c[3 * i + 0]) + 1.0f + sqrtf > (d[3 * i + 0]); > + a[3 * i + 1] = copysignf (b[3 * i + 1], c[3 * i + 1]) + 2.0f + sqrtf > (d[3 * i + 1]); > + a[3 * i + 2] = copysignf (b[3 * i + 2], c[3 * i + 2]) + 3.0f + sqrtf > (d[3 * i + 2]); > + } > +} > + > __attribute__((noinline, noclone)) int > main1 () > { > @@ -66,6 +78,12 @@ main1 () > for (i = 0; i < 64; i++) > if (fabsf (((i & 2) ? -4 * i : 4 * i) + 1 + i - a[i]) >= 0.0001f) > abort (); > + else > + a[i] = 131.25; > + f4 (10); > + for (i = 0; i < 60; i++) > + if (fabsf (((i & 2) ? -4 * i : 4 * i) + 1 + (i % 3) + i - a[i]) >= > 0.0001f) > + abort (); > return 0; > } > > @@ -76,6 +94,6 @@ main () > return main1 (); > } > > -/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 3 "vect" { target > { vect_call_copysignf && vect_call_sqrtf } } } } */ > -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" > { target { vect_call_copysignf && vect_call_sqrtf } } } } */ > +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 4 "vect" { target > { vect_call_copysignf && vect_call_sqrtf } } } } */ > +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" > { target { vect_call_copysignf && vect_call_sqrtf } } } } */ > /* { dg-final { cleanup-tree-dump "vect" } } */ > --- gcc/testsuite/gfortran.dg/vect/pr51058-2.f90.jj 2011-11-11 > 18:08:29.792708207 +0100 > +++ gcc/testsuite/gfortran.dg/vect/pr51058-2.f90 2011-11-11 > 18:09:22.378400344 +0100 > @@ -0,0 +1,20 @@ > +! PR tree-optimization/51058 > +! { dg-do compile } > +subroutine pr51058(n, u, v, w, z) > + double precision :: x(3,-2:16384), y(3,-2:16384), b, u, v, w, z > + integer :: i, n > + common /c/ x, y > + do i = 1, n > + b = u * int(x(1,i)) + sign(z,x(1,i)) > + x(1,i) = x(1,i) - b > + y(1,i) = y(1,i) - b > + b = v * int(x(2,i)) + sign(z,x(2,i)) > + x(2,i) = x(2,i) - b > + y(2,i) = y(2,i) - b > + b = w * int(x(3,i)) + sign(z,x(3,i)) > + x(3,i) = x(3,i) - b > + y(3,i) = y(3,i) - b > + end do > +end subroutine > + > +! { dg-final { cleanup-tree-dump "vect" } } > > > Jakub >