Re: [PATCH] Fix PR66623

Christophe Lyon Fri, 09 Jun 2017 08:32:33 -0700

On 8 June 2017 at 15:49, Richard Biener <rguent...@suse.de> wrote:
> On Thu, 8 Jun 2017, Richard Biener wrote:
>
>>
>> The following fixes unsafe vectorization of reductions in outer loop
>> vectorization.  It combines it with a bit of cleanup in the affected
>> function.
>>
>> Bootstrap and regtest running on x86_64-unknown-linux-gnu.
>
> Re-testing the following variant -- multiple loop-closed PHI nodes
> for the same SSA name happen.
>
> Richard.
>
> 2017-06-08  Richard Biener  <rguent...@suse.de>
>
>         PR tree-optimization/66623
>         * tree-vect-loop.c (vect_is_simple_reduction): Cleanup,
>         refactor check_reduction into two parts, properly computing
>         whether we have to check reduction validity for outer loop
>         vectorization.
>
>         * gcc.dg/vect/pr66623.c: New testcase.
>


Hi Richard,

The new testcase fails on armeb* targets:
FAIL:    gcc.dg/vect/pr66623.c -flto -ffat-lto-objects execution test
FAIL:    gcc.dg/vect/pr66623.c execution test

Christophe


> Index: gcc/tree-vect-loop.c
> ===================================================================
> --- gcc/tree-vect-loop.c        (revision 249007)
> +++ gcc/tree-vect-loop.c        (working copy)
> @@ -2727,8 +2727,6 @@ vect_is_simple_reduction (loop_vec_info
>  {
>    struct loop *loop = (gimple_bb (phi))->loop_father;
>    struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
> -  edge latch_e = loop_latch_edge (loop);
> -  tree loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
>    gimple *def_stmt, *def1 = NULL, *def2 = NULL, *phi_use_stmt = NULL;
>    enum tree_code orig_code, code;
>    tree op1, op2, op3 = NULL_TREE, op4 = NULL_TREE;
> @@ -2742,11 +2740,6 @@ vect_is_simple_reduction (loop_vec_info
>    *double_reduc = false;
>    *v_reduc_type = TREE_CODE_REDUCTION;
>
> -  /* Check validity of the reduction only for the innermost loop.  */
> -  bool check_reduction = ! flow_loop_nested_p (vect_loop, loop);
> -  gcc_assert ((check_reduction && loop == vect_loop)
> -              || (!check_reduction && flow_loop_nested_p (vect_loop, loop)));
> -
>    name = PHI_RESULT (phi);
>    /* ???  If there are no uses of the PHI result the inner loop reduction
>       won't be detected as possibly double-reduction by vectorizable_reduction
> @@ -2775,13 +2768,15 @@ vect_is_simple_reduction (loop_vec_info
>          {
>            if (dump_enabled_p ())
>             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> -                            "reduction used in loop.\n");
> +                            "reduction value used in loop.\n");
>            return NULL;
>          }
>
>        phi_use_stmt = use_stmt;
>      }
>
> +  edge latch_e = loop_latch_edge (loop);
> +  tree loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
>    if (TREE_CODE (loop_arg) != SSA_NAME)
>      {
>        if (dump_enabled_p ())
> @@ -2795,18 +2790,22 @@ vect_is_simple_reduction (loop_vec_info
>      }
>
>    def_stmt = SSA_NAME_DEF_STMT (loop_arg);
> -  if (!def_stmt)
> +  if (gimple_nop_p (def_stmt))
>      {
>        if (dump_enabled_p ())
>         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> -                        "reduction: no def_stmt.\n");
> +                        "reduction: no def_stmt\n");
>        return NULL;
>      }
>
>    if (!is_gimple_assign (def_stmt) && gimple_code (def_stmt) != GIMPLE_PHI)
>      {
>        if (dump_enabled_p ())
> -       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
> +       {
> +         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> +                          "reduction: unhandled reduction operation: ");
> +         dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, def_stmt, 0);
> +       }
>        return NULL;
>      }
>
> @@ -2822,6 +2821,7 @@ vect_is_simple_reduction (loop_vec_info
>      }
>
>    nloop_uses = 0;
> +  auto_vec<gphi *, 3> lcphis;
>    FOR_EACH_IMM_USE_FAST (use_p, imm_iter, name)
>      {
>        gimple *use_stmt = USE_STMT (use_p);
> @@ -2829,6 +2829,9 @@ vect_is_simple_reduction (loop_vec_info
>         continue;
>        if (flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
>         nloop_uses++;
> +      else
> +       /* We can have more than one loop-closed PHI.  */
> +       lcphis.safe_push (as_a <gphi *> (use_stmt));
>        if (nloop_uses > 1)
>         {
>           if (dump_enabled_p ())
> @@ -2873,6 +2876,27 @@ vect_is_simple_reduction (loop_vec_info
>        return NULL;
>      }
>
> +  /* If we are vectorizing an inner reduction we are executing that
> +     in the original order only in case we are not dealing with a
> +     double reduction.  */
> +  bool check_reduction = true;
> +  if (flow_loop_nested_p (vect_loop, loop))
> +    {
> +      gphi *lcphi;
> +      unsigned i;
> +      check_reduction = false;
> +      FOR_EACH_VEC_ELT (lcphis, i, lcphi)
> +       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, gimple_phi_result (lcphi))
> +         {
> +           gimple *use_stmt = USE_STMT (use_p);
> +           if (is_gimple_debug (use_stmt))
> +             continue;
> +           if (! flow_bb_inside_loop_p (vect_loop, gimple_bb (use_stmt)))
> +             check_reduction = true;
> +         }
> +    }
> +
> +  bool nested_in_vect_loop = flow_loop_nested_p (vect_loop, loop);
>    code = orig_code = gimple_assign_rhs_code (def_stmt);
>
>    /* We can handle "res -= x[i]", which is non-associative by
> @@ -2887,27 +2911,8 @@ vect_is_simple_reduction (loop_vec_info
>
>    if (code == COND_EXPR)
>      {
> -      if (check_reduction)
> +      if (! nested_in_vect_loop)
>         *v_reduc_type = COND_REDUCTION;
> -    }
> -  else if (!commutative_tree_code (code) || !associative_tree_code (code))
> -    {
> -      if (dump_enabled_p ())
> -       report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
> -                       "reduction: not commutative/associative: ");
> -      return NULL;
> -    }
> -
> -  if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
> -    {
> -      if (code != COND_EXPR)
> -        {
> -         if (dump_enabled_p ())
> -           report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
> -                           "reduction: not binary operation: ");
> -
> -          return NULL;
> -        }
>
>        op3 = gimple_assign_rhs1 (def_stmt);
>        if (COMPARISON_CLASS_P (op3))
> @@ -2918,30 +2923,35 @@ vect_is_simple_reduction (loop_vec_info
>
>        op1 = gimple_assign_rhs2 (def_stmt);
>        op2 = gimple_assign_rhs3 (def_stmt);
> -
> -      if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) != SSA_NAME)
> -        {
> -          if (dump_enabled_p ())
> -            report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
> -                           "reduction: uses not ssa_names: ");
> -
> -          return NULL;
> -        }
>      }
> -  else
> +  else if (!commutative_tree_code (code) || !associative_tree_code (code))
> +    {
> +      if (dump_enabled_p ())
> +       report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
> +                       "reduction: not commutative/associative: ");
> +      return NULL;
> +    }
> +  else if (get_gimple_rhs_class (code) == GIMPLE_BINARY_RHS)
>      {
>        op1 = gimple_assign_rhs1 (def_stmt);
>        op2 = gimple_assign_rhs2 (def_stmt);
> +    }
> +  else
> +    {
> +      if (dump_enabled_p ())
> +       report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
> +                       "reduction: not handled operation: ");
> +      return NULL;
> +    }
>
> -      if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) != SSA_NAME)
> -        {
> -          if (dump_enabled_p ())
> -           report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
> -                           "reduction: uses not ssa_names: ");
> +  if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) != SSA_NAME)
> +    {
> +      if (dump_enabled_p ())
> +       report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
> +                       "reduction: both uses not ssa_names: ");
>
> -          return NULL;
> -        }
> -   }
> +      return NULL;
> +    }
>
>    type = TREE_TYPE (gimple_assign_lhs (def_stmt));
>    if ((TREE_CODE (op1) == SSA_NAME
> @@ -3091,7 +3101,7 @@ vect_is_simple_reduction (loop_vec_info
>                            == vect_internal_def
>                       && !is_loop_header_bb_p (gimple_bb (def2)))))))
>      {
> -      if (check_reduction && orig_code != MINUS_EXPR)
> +      if (! nested_in_vect_loop && orig_code != MINUS_EXPR)
>         {
>           /* Check if we can swap operands (just for simplicity - so that
>              the rest of the code can assume that the reduction variable
> @@ -3145,7 +3155,8 @@ vect_is_simple_reduction (loop_vec_info
>      }
>
>    /* Try to find SLP reduction chain.  */
> -  if (check_reduction && code != COND_EXPR
> +  if (! nested_in_vect_loop
> +      && code != COND_EXPR
>        && vect_is_slp_reduction (loop_info, phi, def_stmt))
>      {
>        if (dump_enabled_p ())
> Index: gcc/testsuite/gcc.dg/vect/pr66623.c
> ===================================================================
> --- gcc/testsuite/gcc.dg/vect/pr66623.c (revision 0)
> +++ gcc/testsuite/gcc.dg/vect/pr66623.c (working copy)
> @@ -0,0 +1,86 @@
> +/* { dg-require-effective-target vect_float } */
> +
> +#include "tree-vect.h"
> +
> +extern void abort (void);
> +
> +#define OP *
> +#define INIT 1.0
> +
> +float __attribute__((noinline,noclone))
> +foo (float *__restrict__ i)
> +{
> +  float l = INIT;
> +  int a;
> +  int b;
> +
> +  for (a = 0; a < 4; a++)
> +    for (b = 0; b < 4; b++)
> +      l = l OP i[b];
> +
> +  return l;
> +}
> +
> +float __attribute__((noinline,noclone))
> +foo_ref (float *__restrict__ i)
> +{
> +  float l = INIT;
> +
> +  l = l OP i[0];
> +  l = l OP i[1];
> +  l = l OP i[2];
> +  l = l OP i[3];
> +
> +  l = l OP i[0];
> +  l = l OP i[1];
> +  l = l OP i[2];
> +  l = l OP i[3];
> +
> +  l = l OP i[0];
> +  l = l OP i[1];
> +  l = l OP i[2];
> +  l = l OP i[3];
> +
> +  l = l OP i[0];
> +  l = l OP i[1];
> +  l = l OP i[2];
> +  l = l OP i[3];
> +
> +  return l;
> +}
> +
> +union u
> +{
> +  float f;
> +  unsigned int u;
> +};
> +
> +int
> +main (void)
> +{
> +  union u res, res2;
> +  float a[4];
> +
> +  if (sizeof (float) != sizeof (unsigned int))
> +    return 0;
> +
> +  check_vect ();
> +
> +  a[0] = 0.01;
> +  a[1] = 0.01;
> +  a[2] = 0.01;
> +  a[3] = 1.0;
> +
> +  res.f = foo_ref (a);
> +
> +  res2.f = foo (a);
> +
> +  if (res.u != res2.u)
> +    abort ();
> +
> +  return 0;
> +}
> +
> +/* need -ffast-math to vectorize this loop.  */
> +/* ARM NEON passes -ffast-math to these tests, so expect this to fail.  */
> +/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { xfail 
> arm_neon_ok } } } */

Re: [PATCH] Fix PR66623

Reply via email to