For the new testcase we end up using an indermediate value of the
reduction chain as reduction result.  This can be easily supported
by generating epilogues for (poossibly multiple) intermediate values.

For this to work the following relaxes cycle detection to allow
out-of-loop uses plus it makes sure vectorizable_live_operation
picks it up as reduction.  Finally some invariants process_use
checks are no longer true.

Nicely simple ;)

Bootstrap & regtest running on x86_64-unknown-linux-gnu.

Richard.

2019-10-28  Richard Biener  <rguent...@suse.de>

        PR tree-optimization/65930
        * tree-vect-loop.c (check_reduction_path): Relax single-use
        check allowing out-of-loop uses.
        (vect_is_simple_reduction): SLP reduction chains cannot have
        intermediate stmts used outside of the loop.
        (vect_create_epilog_for_reduction): The adjustment might need
        to be converted.
        (vectorizable_reduction): Annotate live stmts of the reduction
        chain with STMT_VINFO_REDUC_DEF.
        * tree-vect-stms.c (process_use): Remove no longer true asserts.

        * gcc.dg/vect/pr65930-1.c: New testcase.

Index: gcc/tree-vect-loop.c
===================================================================
--- gcc/tree-vect-loop.c        (revision 277517)
+++ gcc/tree-vect-loop.c        (working copy)
@@ -2678,8 +2678,7 @@ pop:
     {
       gimple *use_stmt = USE_STMT (path[i].second);
       tree op = USE_FROM_PTR (path[i].second);
-      if (! has_single_use (op)
-         || ! is_gimple_assign (use_stmt)
+      if (! is_gimple_assign (use_stmt)
          /* The following make sure we can compute the operand index
             easily plus it mostly disallows chaining via COND_EXPR condition
             operands.  */
@@ -2690,6 +2689,20 @@ pop:
          fail = true;
          break;
        }
+      /* Check there's only a single stmt the op is used on inside
+         of the loop.  */
+      imm_use_iterator imm_iter;
+      gimple *op_use_stmt;
+      unsigned cnt = 0;
+      FOR_EACH_IMM_USE_STMT (op_use_stmt, imm_iter, op)
+       if (!is_gimple_debug (op_use_stmt)
+           && flow_bb_inside_loop_p (loop, gimple_bb (op_use_stmt)))
+         cnt++;
+      if (cnt != 1)
+       {
+         fail = true;
+         break;
+       }
       enum tree_code use_code = gimple_assign_rhs_code (use_stmt);
       if (use_code == MINUS_EXPR)
        {
@@ -2922,7 +2935,10 @@ vect_is_simple_reduction (loop_vec_info
       for (i = path.length () - 1; i >= 1; --i)
        {
          gimple *stmt = USE_STMT (path[i].second);
-         if (gimple_assign_rhs_code (stmt) != code)
+         if (gimple_assign_rhs_code (stmt) != code
+             /* We can only handle the final value in epilogue
+                generation for reduction chains.  */
+             || (i != 1 && !has_single_use (gimple_assign_lhs (stmt))))
            is_slp_reduc = false;
          stmt_vec_info stmt_info = loop_info->lookup_stmt (stmt);
          STMT_VINFO_REDUC_IDX (stmt_info)
@@ -4119,11 +4135,11 @@ vect_create_epilog_for_reduction (stmt_v
   stmt_vec_info phi_info;
   gimple_stmt_iterator exit_gsi;
   tree vec_dest;
-  tree new_temp = NULL_TREE, new_dest, new_name, new_scalar_dest;
+  tree new_temp = NULL_TREE, new_name, new_scalar_dest;
   gimple *epilog_stmt = NULL;
   gimple *exit_phi;
   tree bitsize;
-  tree expr, def;
+  tree def;
   tree orig_name, scalar_result;
   imm_use_iterator imm_iter, phi_imm_iter;
   use_operand_p use_p, phi_use_p;
@@ -5048,25 +5064,26 @@ vect_create_epilog_for_reduction (stmt_v
   if (adjustment_def)
     {
       gcc_assert (!slp_reduc);
+      gimple_seq stmts = NULL;
       if (nested_in_vect_loop)
        {
           new_phi = new_phis[0];
-         gcc_assert (TREE_CODE (TREE_TYPE (adjustment_def)) == VECTOR_TYPE);
-         expr = build2 (code, vectype, PHI_RESULT (new_phi), adjustment_def);
-         new_dest = vect_create_destination_var (scalar_dest, vectype);
+         gcc_assert (VECTOR_TYPE_P (TREE_TYPE (adjustment_def)));
+         adjustment_def = gimple_convert (&stmts, vectype, adjustment_def);
+         new_temp = gimple_build (&stmts, code, vectype,
+                                  PHI_RESULT (new_phi), adjustment_def);
        }
       else
        {
           new_temp = scalar_results[0];
          gcc_assert (TREE_CODE (TREE_TYPE (adjustment_def)) != VECTOR_TYPE);
-         expr = build2 (code, scalar_type, new_temp, adjustment_def);
-         new_dest = vect_create_destination_var (scalar_dest, scalar_type);
+         adjustment_def = gimple_convert (&stmts, scalar_type, adjustment_def);
+         new_temp = gimple_build (&stmts, code, scalar_type,
+                                  new_temp, adjustment_def);
        }
 
-      epilog_stmt = gimple_build_assign (new_dest, expr);
-      new_temp = make_ssa_name (new_dest, epilog_stmt);
-      gimple_assign_set_lhs (epilog_stmt, new_temp);
-      gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
+      epilog_stmt = gimple_seq_last_stmt (stmts);
+      gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT);
       if (nested_in_vect_loop)
         {
          stmt_vec_info epilog_stmt_info = loop_vinfo->add_stmt (epilog_stmt);
@@ -5742,6 +5759,10 @@ vectorizable_reduction (stmt_vec_info st
        }
       if (!REDUC_GROUP_FIRST_ELEMENT (def))
        only_slp_reduc_chain = false;
+      /* ???  For epilogue generation live members of the chain need
+         to point back to the PHI for info_for_reduction to work.  */
+      if (STMT_VINFO_LIVE_P (def))
+       STMT_VINFO_REDUC_DEF (def) = phi_info;
       reduc_def = gimple_op (def->stmt, 1 + STMT_VINFO_REDUC_IDX (def));
       reduc_chain_length++;
     }
Index: gcc/tree-vect-stmts.c
===================================================================
--- gcc/tree-vect-stmts.c       (revision 277517)
+++ gcc/tree-vect-stmts.c       (working copy)
@@ -457,7 +457,6 @@ process_use (stmt_vec_info stmt_vinfo, t
             bool force)
 {
   stmt_vec_info dstmt_vinfo;
-  basic_block bb, def_bb;
   enum vect_def_type dt;
 
   /* case 1: we are only interested in uses that need to be vectorized.  Uses
@@ -473,28 +472,8 @@ process_use (stmt_vec_info stmt_vinfo, t
   if (!dstmt_vinfo)
     return opt_result::success ();
 
-  def_bb = gimple_bb (dstmt_vinfo->stmt);
-
-  /* case 2: A reduction phi (STMT) defined by a reduction stmt (DSTMT_VINFO).
-     DSTMT_VINFO must have already been processed, because this should be the
-     only way that STMT, which is a reduction-phi, was put in the worklist,
-     as there should be no other uses for DSTMT_VINFO in the loop.  So we just
-     check that everything is as expected, and we are done.  */
-  bb = gimple_bb (stmt_vinfo->stmt);
-  if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
-      && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
-      && gimple_code (dstmt_vinfo->stmt) != GIMPLE_PHI
-      && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
-      && bb->loop_father == def_bb->loop_father)
-    {
-      if (dump_enabled_p ())
-       dump_printf_loc (MSG_NOTE, vect_location,
-                         "reduc-stmt defining reduc-phi in the same nest.\n");
-      gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
-      gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
-                 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
-      return opt_result::success ();
-    }
+  basic_block def_bb = gimple_bb (dstmt_vinfo->stmt);
+  basic_block bb = gimple_bb (stmt_vinfo->stmt);
 
   /* case 3a: outer-loop stmt defining an inner-loop stmt:
        outer-loop-header-bb:
Index: gcc/testsuite/gcc.dg/vect/pr65930-1.c
===================================================================
--- gcc/testsuite/gcc.dg/vect/pr65930-1.c       (nonexistent)
+++ gcc/testsuite/gcc.dg/vect/pr65930-1.c       (working copy)
@@ -0,0 +1,26 @@
+/* { dg-require-effective-target vect_int } */
+
+#include "tree-vect.h"
+
+unsigned __attribute__((noipa))
+bar (unsigned int *x)
+{
+  int sum = 4;
+  x = __builtin_assume_aligned (x, __BIGGEST_ALIGNMENT__);
+  for (int i = 0; i < 16; ++i)
+    sum += x[i];
+  return sum;
+}
+
+int
+main()
+{
+  static int a[16] __attribute__((aligned(__BIGGEST_ALIGNMENT__)))
+    = { 1, 3, 5, 8, 9, 10, 17, 18, 23, 29, 30, 55, 42, 2, 3, 1 };
+  check_vect ();
+  if (bar (a) != 260)
+    abort ();
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" } } */

Reply via email to