On Mon, Dec 4, 2017 at 1:11 PM, Richard Biener <rguent...@suse.de> wrote:
>
> I've noticed we perform FP reduction association without the required
> checks for associative math.  I've added
> gcc.dg/tree-ssa/loop-interchange-1b.c to cover this.
>
> I also noticed we happily interchange a loop with a reduction like
>
>  sum = a[i] - sum;
>
> where a change in order of elements isn't ok.  Unfortunately bwaves
> is exactly a case where single_use != next_def (tried to simply remove
> that case for now), because reassoc didn't have a chance to fix the
> operand order.  Thus this patch exports the relevant handling from
> the vectorizer (for stage1 having a separate infrastructure gathering /
> analyzing of reduction/induction infrastructure would be nice...)
> and uses it from interchange.  We then don't handle
> gcc.dg/tree-ssa/loop-interchange-4.c anymore (similar vectorizer
> missed-opt is PR65930).  I didn't bother to split up the vectorizer
> code further to implement relaxed validity checking but simply XFAILed
> this testcase.
>
> Earlier I simplified allocation stuff in the main loop which is why
> this part is included in this patch.
>
> Bootstrap running on x86_64-unknown-linux-gnu.
>
> I'll see to craft a testcase with the sum = a[i] - sum; mis-handling.
>
> Ok?
Sure.
Just for the record.  There is also similar associative check in
predcom.  As you suggested, a path extraction/checking interface for
associative checking would be great, given we have multiple users now.

Thanks,
bin
>
> Thanks,
> Richard.
>
> 2017-12-04  Richard Biener  <rguent...@suse.de>
>
>         * tree-vectorizer.h (check_reduction_path): Declare.
>         * tree-vect-loop.c (check_reduction_path): New function, split out
>         from ...
>         (vect_is_simple_reduction): ... here.
>         * gimple-loop-interchange.cc: Include tree-vectorizer.h.
>         (loop_cand::analyze_iloop_reduction_var): Use single_imm_use.
>         Properly check for a supported reduction operation and a
>         valid expression if the reduction covers multiple stmts.
>         (prepare_perfect_loop_nest): Simpify allocation.
>         (pass_linterchange::execute): Likewise.
>
>         * gcc.dg/tree-ssa/loop-interchange-1.c: Add fast-math flags.
>         * gcc.dg/tree-ssa/loop-interchange-1b.c: New test variant.
>         * gcc.dg/tree-ssa/loop-interchange-4.c: XFAIL.
>
>
> Index: gcc/gimple-loop-interchange.cc
> ===================================================================
> --- gcc/gimple-loop-interchange.cc      (revision 255375)
> +++ gcc/gimple-loop-interchange.cc      (working copy)
> @@ -41,6 +41,7 @@ along with GCC; see the file COPYING3.
>  #include "tree-ssa-loop-ivopts.h"
>  #include "tree-ssa-dce.h"
>  #include "tree-data-ref.h"
> +#include "tree-vectorizer.h"
>
>  /* This pass performs loop interchange: for example, the loop nest
>
> @@ -551,23 +552,29 @@ loop_cand::analyze_iloop_reduction_var (
>       in a way that reduction operation is seen as black box.  In general,
>       we can ignore reassociation of reduction operator; we can handle fake
>       reductions in which VAR is not even used to compute NEXT.  */
> -  FOR_EACH_IMM_USE_FAST (use_p, iterator, var)
> -    {
> -      stmt = USE_STMT (use_p);
> -      if (is_gimple_debug (stmt))
> -       continue;
> -
> -      if (!flow_bb_inside_loop_p (m_loop, gimple_bb (stmt)))
> -       return false;
> -
> -      if (single_use != NULL)
> -       return false;
> +  if (! single_imm_use (var, &use_p, &single_use)
> +      || ! flow_bb_inside_loop_p (m_loop, gimple_bb (single_use)))
> +    return false;
>
> -      single_use = stmt;
> -    }
> +  /* Check the reduction operation.  We require a commutative or
> +     left-associative operation.  For FP math we also need to be allowed
> +     to associate operations.  */
> +  if (! is_gimple_assign (single_use)
> +      || ! (commutative_tree_code (gimple_assign_rhs_code (single_use))
> +           || (commutative_ternary_tree_code
> +                 (gimple_assign_rhs_code (single_use))
> +               && (use_p->use == gimple_assign_rhs1_ptr (single_use)
> +                   || use_p->use == gimple_assign_rhs2_ptr (single_use)))
> +           || (gimple_assign_rhs_code (single_use) == MINUS_EXPR
> +               && use_p->use == gimple_assign_rhs1_ptr (single_use)))
> +      || (FLOAT_TYPE_P (TREE_TYPE (var))
> +         && ! flag_associative_math))
> +    return false;
>
> +  /* Handle and verify a series of stmts feeding the reduction op.  */
>    if (single_use != next_def
> -      && !stmt_dominates_stmt_p (single_use, next_def))
> +      && !check_reduction_path (UNKNOWN_LOCATION, m_loop, phi, next,
> +                               gimple_assign_rhs_code (single_use)))
>      return false;
>
>    /* Only support cases in which INIT is used in inner loop.  */
> @@ -1964,7 +1971,7 @@ prepare_perfect_loop_nest (struct loop *
>                            vec<data_reference_p> *datarefs, vec<ddr_p> *ddrs)
>  {
>    struct loop *start_loop = NULL, *innermost = loop;
> -  struct loop *outermost = superloop_at_depth (loop, 0);
> +  struct loop *outermost = loops_for_fn (cfun)->tree_root;
>
>    /* Find loop nest from the innermost loop.  The outermost is the innermost
>       outer*/
> @@ -1985,21 +1992,26 @@ prepare_perfect_loop_nest (struct loop *
>    if (!start_loop || !start_loop->inner)
>      return false;
>
> +  /* Prepare the data reference vector for the loop nest, pruning outer
> +     loops we cannot handle.  */
>    datarefs->create (20);
> -  if ((start_loop = prepare_data_references (start_loop, datarefs)) == NULL
> +  start_loop = prepare_data_references (start_loop, datarefs);
> +  if (!start_loop
>        /* Check if there is no data reference.  */
>        || datarefs->is_empty ()
>        /* Check if there are too many of data references.  */
> -      || (int) datarefs->length () > MAX_DATAREFS
> -      /* Compute access strides for all data references.  */
> -      || ((start_loop = compute_access_strides (start_loop, innermost,
> -                                               *datarefs)) == NULL)
> -      /* Check if loop nest should be interchanged.  */
> -      || !should_interchange_loop_nest (start_loop, innermost, *datarefs))
> -    {
> -      free_data_refs_with_aux (*datarefs);
> -      return false;
> -    }
> +      || (int) datarefs->length () > MAX_DATAREFS)
> +    return false;
> +
> +  /* Compute access strides for all data references, pruning outer
> +     loops we cannot analyze refs in.  */
> +  start_loop = compute_access_strides (start_loop, innermost, *datarefs);
> +  if (!start_loop)
> +    return false;
> +
> +  /* Check if any interchange is profitable in the loop nest.  */
> +  if (!should_interchange_loop_nest (start_loop, innermost, *datarefs))
> +    return false;
>
>    /* Check if data dependences can be computed for loop nest starting from
>       start_loop.  */
> @@ -2029,14 +2041,15 @@ prepare_perfect_loop_nest (struct loop *
>         return true;
>        }
>
> +    /* ???  We should be able to adjust DDRs we already computed by
> +       truncating distance vectors.  */
>      free_dependence_relations (*ddrs);
> +    *ddrs = vNULL;
>      /* Try to compute data dependences with the outermost loop stripped.  */
>      loop = start_loop;
>      start_loop = start_loop->inner;
>    } while (start_loop && start_loop->inner);
>
> -  loop_nest->release ();
> -  free_data_refs_with_aux (*datarefs);
>    return false;
>  }
>
> @@ -2050,19 +2063,20 @@ pass_linterchange::execute (function *fu
>
>    bool changed_p = false;
>    struct loop *loop;
> -  vec<loop_p> loop_nest;
> -  vec<data_reference_p> datarefs;
> -  vec<ddr_p> ddrs;
> -
>    FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST)
> -    if (prepare_perfect_loop_nest (loop, &loop_nest, &datarefs, &ddrs))
> -      {
> -       tree_loop_interchange loop_interchange (loop_nest);
> -       changed_p |= loop_interchange.interchange (datarefs, ddrs);
> -       free_dependence_relations (ddrs);
> -       free_data_refs_with_aux (datarefs);
> -       loop_nest.release ();
> -      }
> +    {
> +      vec<loop_p> loop_nest = vNULL;
> +      vec<data_reference_p> datarefs = vNULL;
> +      vec<ddr_p> ddrs = vNULL;
> +      if (prepare_perfect_loop_nest (loop, &loop_nest, &datarefs, &ddrs))
> +       {
> +         tree_loop_interchange loop_interchange (loop_nest);
> +         changed_p |= loop_interchange.interchange (datarefs, ddrs);
> +       }
> +      free_dependence_relations (ddrs);
> +      free_data_refs_with_aux (datarefs);
> +      loop_nest.release ();
> +    }
>
>    if (changed_p)
>      scev_reset_htab ();
> Index: gcc/testsuite/gcc.dg/tree-ssa/loop-interchange-1.c
> ===================================================================
> --- gcc/testsuite/gcc.dg/tree-ssa/loop-interchange-1.c  (revision 255375)
> +++ gcc/testsuite/gcc.dg/tree-ssa/loop-interchange-1.c  (working copy)
> @@ -1,5 +1,5 @@
>  /* { dg-do run } */
> -/* { dg-options "-O2 -floop-interchange -fdump-tree-linterchange-details" } 
> */
> +/* { dg-options "-O2 -floop-interchange -fassociative-math -fno-signed-zeros 
> -fno-trapping-math -fdump-tree-linterchange-details" } */
>
>  /* Copied from graphite/interchange-4.c */
>
> Index: gcc/testsuite/gcc.dg/tree-ssa/loop-interchange-1b.c
> ===================================================================
> --- gcc/testsuite/gcc.dg/tree-ssa/loop-interchange-1b.c (nonexistent)
> +++ gcc/testsuite/gcc.dg/tree-ssa/loop-interchange-1b.c (working copy)
> @@ -0,0 +1,52 @@
> +/* { dg-do run } */
> +/* { dg-options "-O2 -floop-interchange -fdump-tree-linterchange-details" } 
> */
> +
> +/* Copied from graphite/interchange-4.c */
> +
> +#define DEBUG 0
> +#if DEBUG
> +#include <stdio.h>
> +#endif
> +
> +double u[1782225];
> +
> +static void __attribute__((noinline))
> +foo (int N, double *res)
> +{
> +  int i, j;
> +  double sum = 0;
> +  for (i = 0; i < N; i++)
> +    for (j = 0; j < N; j++)
> +      sum = sum + u[i + 1335 * j];
> +
> +  *res = sum;
> +}
> +
> +extern void abort ();
> +
> +int
> +main (void)
> +{
> +  int i, j;
> +  double res;
> +
> +  for (i = 0; i < 1782225; i++)
> +    u[i] = 0;
> +  u[0] = __DBL_MAX__;
> +  u[1335] = -__DBL_MAX__;
> +  u[1] = __DBL_MAX__;
> +  u[1336] = -__DBL_MAX__;
> +
> +  foo (1335, &res);
> +
> +#if DEBUG
> +  fprintf (stderr, "res = %d \n", res);
> +#endif
> +
> +  if (res != 0.0)
> +    abort ();
> +
> +  return 0;
> +}
> +
> +/* { dg-final { scan-tree-dump-not "is interchanged" "linterchange"} } */
> Index: gcc/testsuite/gcc.dg/tree-ssa/loop-interchange-4.c
> ===================================================================
> --- gcc/testsuite/gcc.dg/tree-ssa/loop-interchange-4.c  (revision 255375)
> +++ gcc/testsuite/gcc.dg/tree-ssa/loop-interchange-4.c  (working copy)
> @@ -47,4 +47,4 @@ main (void)
>    return 0;
>  }
>
> -/* { dg-final { scan-tree-dump-times "Loop_pair<outer:., inner:.> is 
> interchanged" 1 "linterchange"} } */
> +/* { dg-final { scan-tree-dump-times "Loop_pair<outer:., inner:.> is 
> interchanged" 1 "linterchange" { xfail *-*-* } } } */
> Index: gcc/tree-vectorizer.h
> ===================================================================
> --- gcc/tree-vectorizer.h       (revision 255375)
> +++ gcc/tree-vectorizer.h       (working copy)
> @@ -1255,6 +1255,9 @@ extern tree vect_create_addr_base_for_ve
>  /* FORNOW: Used in tree-parloops.c.  */
>  extern gimple *vect_force_simple_reduction (loop_vec_info, gimple *,
>                                             bool *, bool);
> +/* Used in gimple-loop-interchange.c.  */
> +extern bool check_reduction_path (location_t, loop_p, gphi *, tree,
> +                                 enum tree_code);
>  /* Drive for loop analysis stage.  */
>  extern loop_vec_info vect_analyze_loop (struct loop *, loop_vec_info);
>  extern tree vect_build_loop_niters (loop_vec_info, bool * = NULL);
> Index: gcc/tree-vect-loop.c
> ===================================================================
> --- gcc/tree-vect-loop.c        (revision 255375)
> +++ gcc/tree-vect-loop.c        (working copy)
> @@ -2626,6 +2626,114 @@ vect_is_slp_reduction (loop_vec_info loo
>  }
>
>
> +/* Return true if the reduction PHI in LOOP with latch arg LOOP_ARG and
> +   reduction operation CODE has a handled computation expression.  */
> +
> +bool
> +check_reduction_path (location_t loc, loop_p loop, gphi *phi, tree loop_arg,
> +                     enum tree_code code)
> +{
> +  auto_vec<std::pair<ssa_op_iter, use_operand_p> > path;
> +  auto_bitmap visited;
> +  tree lookfor = PHI_RESULT (phi);
> +  ssa_op_iter curri;
> +  use_operand_p curr = op_iter_init_phiuse (&curri, phi, SSA_OP_USE);
> +  while (USE_FROM_PTR (curr) != loop_arg)
> +    curr = op_iter_next_use (&curri);
> +  curri.i = curri.numops;
> +  do
> +    {
> +      path.safe_push (std::make_pair (curri, curr));
> +      tree use = USE_FROM_PTR (curr);
> +      if (use == lookfor)
> +       break;
> +      gimple *def = SSA_NAME_DEF_STMT (use);
> +      if (gimple_nop_p (def)
> +         || ! flow_bb_inside_loop_p (loop, gimple_bb (def)))
> +       {
> +pop:
> +         do
> +           {
> +             std::pair<ssa_op_iter, use_operand_p> x = path.pop ();
> +             curri = x.first;
> +             curr = x.second;
> +             do
> +               curr = op_iter_next_use (&curri);
> +             /* Skip already visited or non-SSA operands (from iterating
> +                over PHI args).  */
> +             while (curr != NULL_USE_OPERAND_P
> +                    && (TREE_CODE (USE_FROM_PTR (curr)) != SSA_NAME
> +                        || ! bitmap_set_bit (visited,
> +                                             SSA_NAME_VERSION
> +                                               (USE_FROM_PTR (curr)))));
> +           }
> +         while (curr == NULL_USE_OPERAND_P && ! path.is_empty ());
> +         if (curr == NULL_USE_OPERAND_P)
> +           break;
> +       }
> +      else
> +       {
> +         if (gimple_code (def) == GIMPLE_PHI)
> +           curr = op_iter_init_phiuse (&curri, as_a <gphi *>(def), 
> SSA_OP_USE);
> +         else
> +           curr = op_iter_init_use (&curri, def, SSA_OP_USE);
> +         while (curr != NULL_USE_OPERAND_P
> +                && (TREE_CODE (USE_FROM_PTR (curr)) != SSA_NAME
> +                    || ! bitmap_set_bit (visited,
> +                                         SSA_NAME_VERSION
> +                                           (USE_FROM_PTR (curr)))))
> +           curr = op_iter_next_use (&curri);
> +         if (curr == NULL_USE_OPERAND_P)
> +           goto pop;
> +       }
> +    }
> +  while (1);
> +  if (dump_file && (dump_flags & TDF_DETAILS))
> +    {
> +      dump_printf_loc (MSG_NOTE, loc, "reduction path: ");
> +      unsigned i;
> +      std::pair<ssa_op_iter, use_operand_p> *x;
> +      FOR_EACH_VEC_ELT (path, i, x)
> +       {
> +         dump_generic_expr (MSG_NOTE, TDF_SLIM, USE_FROM_PTR (x->second));
> +         dump_printf (MSG_NOTE, " ");
> +       }
> +      dump_printf (MSG_NOTE, "\n");
> +    }
> +
> +  /* Check whether the reduction path detected is valid.  */
> +  bool fail = path.length () == 0;
> +  bool neg = false;
> +  for (unsigned i = 1; i < path.length (); ++i)
> +    {
> +      gimple *use_stmt = USE_STMT (path[i].second);
> +      tree op = USE_FROM_PTR (path[i].second);
> +      if (! has_single_use (op)
> +         || ! is_gimple_assign (use_stmt))
> +       {
> +         fail = true;
> +         break;
> +       }
> +      if (gimple_assign_rhs_code (use_stmt) != code)
> +       {
> +         if (code == PLUS_EXPR
> +             && gimple_assign_rhs_code (use_stmt) == MINUS_EXPR)
> +           {
> +             /* Track whether we negate the reduction value each iteration.  
> */
> +             if (gimple_assign_rhs2 (use_stmt) == op)
> +               neg = ! neg;
> +           }
> +         else
> +           {
> +             fail = true;
> +             break;
> +           }
> +       }
> +    }
> +  return ! fail && ! neg;
> +}
> +
> +
>  /* Function vect_is_simple_reduction
>
>     (1) Detect a cross-iteration def-use cycle that represents a simple
> @@ -3128,106 +3236,8 @@ vect_is_simple_reduction (loop_vec_info
>      }
>
>    /* Look for the expression computing loop_arg from loop PHI result.  */
> -  auto_vec<std::pair<ssa_op_iter, use_operand_p> > path;
> -  auto_bitmap visited;
> -  tree lookfor = PHI_RESULT (phi);
> -  ssa_op_iter curri;
> -  use_operand_p curr = op_iter_init_phiuse (&curri, as_a <gphi *>(phi),
> -                                           SSA_OP_USE);
> -  while (USE_FROM_PTR (curr) != loop_arg)
> -    curr = op_iter_next_use (&curri);
> -  curri.i = curri.numops;
> -  do
> -    {
> -      path.safe_push (std::make_pair (curri, curr));
> -      tree use = USE_FROM_PTR (curr);
> -      if (use == lookfor)
> -       break;
> -      gimple *def = SSA_NAME_DEF_STMT (use);
> -      if (gimple_nop_p (def)
> -         || ! flow_bb_inside_loop_p (loop, gimple_bb (def)))
> -       {
> -pop:
> -         do
> -           {
> -             std::pair<ssa_op_iter, use_operand_p> x = path.pop ();
> -             curri = x.first;
> -             curr = x.second;
> -             do
> -               curr = op_iter_next_use (&curri);
> -             /* Skip already visited or non-SSA operands (from iterating
> -                over PHI args).  */
> -             while (curr != NULL_USE_OPERAND_P
> -                    && (TREE_CODE (USE_FROM_PTR (curr)) != SSA_NAME
> -                        || ! bitmap_set_bit (visited,
> -                                             SSA_NAME_VERSION
> -                                               (USE_FROM_PTR (curr)))));
> -           }
> -         while (curr == NULL_USE_OPERAND_P && ! path.is_empty ());
> -         if (curr == NULL_USE_OPERAND_P)
> -           break;
> -       }
> -      else
> -       {
> -         if (gimple_code (def) == GIMPLE_PHI)
> -           curr = op_iter_init_phiuse (&curri, as_a <gphi *>(def), 
> SSA_OP_USE);
> -         else
> -           curr = op_iter_init_use (&curri, def, SSA_OP_USE);
> -         while (curr != NULL_USE_OPERAND_P
> -                && (TREE_CODE (USE_FROM_PTR (curr)) != SSA_NAME
> -                    || ! bitmap_set_bit (visited,
> -                                         SSA_NAME_VERSION
> -                                           (USE_FROM_PTR (curr)))))
> -           curr = op_iter_next_use (&curri);
> -         if (curr == NULL_USE_OPERAND_P)
> -           goto pop;
> -       }
> -    }
> -  while (1);
> -  if (dump_file && (dump_flags & TDF_DETAILS))
> -    {
> -      dump_printf_loc (MSG_NOTE, vect_location,
> -                      "reduction path: ");
> -      unsigned i;
> -      std::pair<ssa_op_iter, use_operand_p> *x;
> -      FOR_EACH_VEC_ELT (path, i, x)
> -       {
> -         dump_generic_expr (MSG_NOTE, TDF_SLIM, USE_FROM_PTR (x->second));
> -         dump_printf (MSG_NOTE, " ");
> -       }
> -      dump_printf (MSG_NOTE, "\n");
> -    }
> -
> -  /* Check whether the reduction path detected is valid.  */
> -  bool fail = path.length () == 0;
> -  bool neg = false;
> -  for (unsigned i = 1; i < path.length (); ++i)
> -    {
> -      gimple *use_stmt = USE_STMT (path[i].second);
> -      tree op = USE_FROM_PTR (path[i].second);
> -      if (! has_single_use (op)
> -         || ! is_gimple_assign (use_stmt))
> -       {
> -         fail = true;
> -         break;
> -       }
> -      if (gimple_assign_rhs_code (use_stmt) != code)
> -       {
> -         if (code == PLUS_EXPR
> -             && gimple_assign_rhs_code (use_stmt) == MINUS_EXPR)
> -           {
> -             /* Track whether we negate the reduction value each iteration.  
> */
> -             if (gimple_assign_rhs2 (use_stmt) == op)
> -               neg = ! neg;
> -           }
> -         else
> -           {
> -             fail = true;
> -             break;
> -           }
> -       }
> -    }
> -  if (! fail && ! neg)
> +  if (check_reduction_path (vect_location, loop, as_a <gphi *> (phi), 
> loop_arg,
> +                           code))
>      return def_stmt;
>
>    if (dump_enabled_p ())

Reply via email to