On 11 November 2011 19:06, Jakub Jelinek <ja...@redhat.com> wrote:
> On Fri, Nov 11, 2011 at 06:57:58PM +0200, Ira Rosen wrote:
>> On 11 November 2011 17:32, Jakub Jelinek <ja...@redhat.com> wrote:
>> > 2011-11-11  Jakub Jelinek  <ja...@redhat.com>
>> >
>> >        PR tree-optimization/51058
>> >        * tree-vect-slp.c (vect_remove_slp_scalar_calls): New function.
>> >        (vect_schedule_slp): Call it.
>> >        * tree-vect-stmts.c (vectorizable_call): If slp_node != NULL,
>> >        don't replace scalar calls with clearing of their lhs here.
>>
>> I think it's rhs.
>
> I think it is lhs.  The scalar call is
>  lhs = __builtin_copysign (arg1, arg2);
> etc. and we transform it to
>  lhs = 0.0;

I still think it's clearing of rhs, but this is not really important :)

On 11 November 2011 19:13, Jakub Jelinek <ja...@redhat.com> wrote:
> On Fri, Nov 11, 2011 at 06:06:18PM +0100, Jakub Jelinek wrote:
>> > Please add
>> > ! { dg-final { cleanup-tree-dump "vect" } }
>> >
>> > OK otherwise.
>>
>> This is not a /vect/ testcase, but fortran torture.

Ah, sorry, indeed I thought it's a vect test.

>> I guess
>> if you really want I could move it over to gfortran.dg/vect/ instead,
>> then the ! { dg-final { cleanup-tree-dump "vect" } }
>> would be indeed needed there.
>
> That would be following, incrementally tested with
> make check-gfortran RUNTESTFLAGS='--target_board=unix\{-m32,-m64\} 
> vect.exp=pr51*'
> with both unpatched and patched f951.
>
> 2011-11-11  Jakub Jelinek  <ja...@redhat.com>
>
>        PR tree-optimization/51058
>        * tree-vect-slp.c (vect_remove_slp_scalar_calls): New function.
>        (vect_schedule_slp): Call it.
>        * tree-vect-stmts.c (vectorizable_call): If slp_node != NULL,
>        don't replace scalar calls with clearing of their lhs here.
>
>        * gcc.dg/vect/fast-math-vect-call-1.c: Add f4 test.
>        * gfortran.dg/vect/pr51058-2.f90: New test.

Looks good.

Thanks,
Ira

>
> --- gcc/tree-vect-slp.c.jj      2011-11-11 16:02:40.475359160 +0100
> +++ gcc/tree-vect-slp.c 2011-11-11 18:08:29.784708271 +0100
> @@ -2902,6 +2902,46 @@ vect_schedule_slp_instance (slp_tree nod
>   return is_store;
>  }
>
> +/* Replace scalar calls from SLP node NODE with clearing of their lhs.
> +   For loop vectorization this is done in vectorizable_call, but for SLP
> +   it needs to be deferred until end of vect_schedule_slp, because multiple
> +   SLP instances may refer to the same scalar stmt.  */
> +
> +static void
> +vect_remove_slp_scalar_calls (slp_tree node)
> +{
> +  gimple stmt, new_stmt;
> +  gimple_stmt_iterator gsi;
> +  int i;
> +  slp_void_p child;
> +  tree lhs;
> +  stmt_vec_info stmt_info;
> +
> +  if (!node)
> +    return;
> +
> +  FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
> +    vect_remove_slp_scalar_calls ((slp_tree) child);
> +
> +  FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
> +    {
> +      if (!is_gimple_call (stmt) || gimple_bb (stmt) == NULL)
> +       continue;
> +      stmt_info = vinfo_for_stmt (stmt);
> +      if (stmt_info == NULL
> +         || is_pattern_stmt_p (stmt_info)
> +         || !PURE_SLP_STMT (stmt_info))
> +       continue;
> +      lhs = gimple_call_lhs (stmt);
> +      new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
> +      set_vinfo_for_stmt (new_stmt, stmt_info);
> +      set_vinfo_for_stmt (stmt, NULL);
> +      STMT_VINFO_STMT (stmt_info) = new_stmt;
> +      gsi = gsi_for_stmt (stmt);
> +      gsi_replace (&gsi, new_stmt, false);
> +      SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
> +    }
> +}
>
>  /* Generate vector code for all SLP instances in the loop/basic block.  */
>
> @@ -2941,6 +2981,8 @@ vect_schedule_slp (loop_vec_info loop_vi
>       unsigned int j;
>       gimple_stmt_iterator gsi;
>
> +      vect_remove_slp_scalar_calls (root);
> +
>       for (j = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (root), j, store)
>                   && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
>         {
> --- gcc/tree-vect-stmts.c.jj    2011-11-11 16:02:28.343433924 +0100
> +++ gcc/tree-vect-stmts.c       2011-11-11 18:08:29.786708241 +0100
> @@ -1886,6 +1886,9 @@ vectorizable_call (gimple stmt, gimple_s
>      it defines is mapped to the new definition.  So just replace
>      rhs of the statement with something harmless.  */
>
> +  if (slp_node)
> +    return true;
> +
>   type = TREE_TYPE (scalar_dest);
>   if (is_pattern_stmt_p (stmt_info))
>     lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
> @@ -1893,8 +1896,7 @@ vectorizable_call (gimple stmt, gimple_s
>     lhs = gimple_call_lhs (stmt);
>   new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
>   set_vinfo_for_stmt (new_stmt, stmt_info);
> -  if (!slp_node)
> -    set_vinfo_for_stmt (stmt, NULL);
> +  set_vinfo_for_stmt (stmt, NULL);
>   STMT_VINFO_STMT (stmt_info) = new_stmt;
>   gsi_replace (gsi, new_stmt, false);
>   SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
> --- gcc/testsuite/gcc.dg/vect/fast-math-vect-call-1.c.jj        2011-11-11 
> 16:02:28.026435857 +0100
> +++ gcc/testsuite/gcc.dg/vect/fast-math-vect-call-1.c   2011-11-11 
> 18:08:29.792708207 +0100
> @@ -38,6 +38,18 @@ f3 (void)
>     a[i] = copysignf (b[i], c[i]) + 1.0f + sqrtf (d[i]);
>  }
>
> +__attribute__((noinline, noclone)) void
> +f4 (int n)
> +{
> +  int i;
> +  for (i = 0; i < 2 * n; i++)
> +    {
> +      a[3 * i + 0] = copysignf (b[3 * i + 0], c[3 * i + 0]) + 1.0f + sqrtf 
> (d[3 * i + 0]);
> +      a[3 * i + 1] = copysignf (b[3 * i + 1], c[3 * i + 1]) + 2.0f + sqrtf 
> (d[3 * i + 1]);
> +      a[3 * i + 2] = copysignf (b[3 * i + 2], c[3 * i + 2]) + 3.0f + sqrtf 
> (d[3 * i + 2]);
> +    }
> +}
> +
>  __attribute__((noinline, noclone)) int
>  main1 ()
>  {
> @@ -66,6 +78,12 @@ main1 ()
>   for (i = 0; i < 64; i++)
>     if (fabsf (((i & 2) ? -4 * i : 4 * i) + 1 + i - a[i]) >= 0.0001f)
>       abort ();
> +    else
> +      a[i] = 131.25;
> +  f4 (10);
> +  for (i = 0; i < 60; i++)
> +    if (fabsf (((i & 2) ? -4 * i : 4 * i) + 1 + (i % 3) + i - a[i]) >= 
> 0.0001f)
> +      abort ();
>   return 0;
>  }
>
> @@ -76,6 +94,6 @@ main ()
>   return main1 ();
>  }
>
> -/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 3 "vect" { target 
> { vect_call_copysignf && vect_call_sqrtf } } } } */
> -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" 
> { target { vect_call_copysignf && vect_call_sqrtf } } } } */
> +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 4 "vect" { target 
> { vect_call_copysignf && vect_call_sqrtf } } } } */
> +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" 
> { target { vect_call_copysignf && vect_call_sqrtf } } } } */
>  /* { dg-final { cleanup-tree-dump "vect" } } */
> --- gcc/testsuite/gfortran.dg/vect/pr51058-2.f90.jj     2011-11-11 
> 18:08:29.792708207 +0100
> +++ gcc/testsuite/gfortran.dg/vect/pr51058-2.f90        2011-11-11 
> 18:09:22.378400344 +0100
> @@ -0,0 +1,20 @@
> +! PR tree-optimization/51058
> +! { dg-do compile }
> +subroutine pr51058(n, u, v, w, z)
> +  double precision :: x(3,-2:16384), y(3,-2:16384), b, u, v, w, z
> +  integer :: i, n
> +  common /c/ x, y
> +  do i = 1, n
> +    b = u * int(x(1,i)) + sign(z,x(1,i))
> +    x(1,i) = x(1,i) - b
> +    y(1,i) = y(1,i) - b
> +    b = v * int(x(2,i)) + sign(z,x(2,i))
> +    x(2,i) = x(2,i) - b
> +    y(2,i) = y(2,i) - b
> +    b = w * int(x(3,i)) + sign(z,x(3,i))
> +    x(3,i) = x(3,i) - b
> +    y(3,i) = y(3,i) - b
> +  end do
> +end subroutine
> +
> +! { dg-final { cleanup-tree-dump "vect" } }
>
>
>        Jakub
>

Reply via email to