This enables SLP build to handle PHI nodes in full, continuing
the SLP build to non-backedges.  For loop vectorization this
enables outer loop vectorization of nested SLP cycles and for
BB vectorization this enables vectorization of PHIs at CFG merges.

Bootstrapped on x86_64-unknown-linux-gnu, testing in progress.

The patch needs some final polishing and eventually I'm going
to split it into BB and loop vect parts (not sure, only if
easily possible).

Comments welcome.  The various !child checks are a bit iffy
and a more clever way to do pre/postorder walks of the SLP
graph would be nice to have I guess - but the SLP graph
changes quite often :/

There's still one (serious?) wart in how we represent
SLP reduction chains - the SLP node for the PHI
has the single scalar stmt repeated N times and obviously
we cannot find the backedge def for this.  I suppose more
manual frobbing should be done here - I'll see to this
as followup.

2020-10-19  Richard Biener  <rguent...@suse.de>

        * gimple.h ():
        * tree-vect-loop.c ():
        * tree-vect-slp.c ():
        * tree-vect-stmts.c ():

        * gcc.dg/vect/vect-outer-slp-1.c: New testcase.
        * gcc.dg/vect/bb-slp-54.c: Likewise.
---
 gcc/gimple.h                                 |   2 +
 gcc/testsuite/gcc.dg/vect/bb-slp-54.c        |  23 ++
 gcc/testsuite/gcc.dg/vect/vect-outer-slp-1.c |  31 +++
 gcc/tree-vect-loop.c                         |  98 ++++++-
 gcc/tree-vect-slp.c                          | 273 +++++++++++++------
 gcc/tree-vect-stmts.c                        |  11 +-
 gcc/tree-vectorizer.c                        |   3 +-
 gcc/tree-vectorizer.h                        |   3 +
 8 files changed, 354 insertions(+), 90 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/vect/bb-slp-54.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/vect-outer-slp-1.c

diff --git a/gcc/gimple.h b/gcc/gimple.h
index 3c9b9965f5a..87c90be9a6a 100644
--- a/gcc/gimple.h
+++ b/gcc/gimple.h
@@ -6598,6 +6598,8 @@ gimple_expr_type (const gimple *stmt)
     }
   else if (code == GIMPLE_COND)
     return boolean_type_node;
+  else if (code == GIMPLE_PHI)
+    return TREE_TYPE (gimple_phi_result (stmt));
   else
     return void_type_node;
 }
diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-54.c 
b/gcc/testsuite/gcc.dg/vect/bb-slp-54.c
new file mode 100644
index 00000000000..d05ce33310d
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/bb-slp-54.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_double } */
+
+double a[2], b[2], c[2];
+
+void foo(int flag)
+{
+  double tem1, tem2;
+  if (flag)
+    {
+      tem1 = a[0];
+      tem2 = a[1];
+    }
+  else
+    {
+      tem1 = b[0];
+      tem2 = b[1];
+    }
+  c[0] = tem1;
+  c[1] = tem2;
+}
+
+/* { dg-final { scan-tree-dump-times "transform load" 2 "slp2" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-outer-slp-1.c 
b/gcc/testsuite/gcc.dg/vect/vect-outer-slp-1.c
new file mode 100644
index 00000000000..62b18bd5764
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-outer-slp-1.c
@@ -0,0 +1,31 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_int } */
+
+int a[1024];
+
+void foo (void)
+{
+  for (int i = 0; i < 1020; i += 4)
+    {
+      int suma = a[i];
+      int sumb = a[i+1];
+      int sumc = a[i+2];
+      int sumd = a[i+3];
+      for (unsigned j = 0; j < 77; ++j)
+        {
+          suma = (suma ^ i) + 1;
+          sumb = (sumb ^ i) + 2;
+          sumc = (sumc ^ i) + 3;
+          sumd = (sumd ^ i) + 4;
+        }
+      a[i] = suma;
+      a[i+1] = sumb;
+      a[i+2] = sumc;
+      a[i+3] = sumd;
+    }
+}
+
+/* We should vectorize this outer loop with SLP.  */
+/* { dg-final { scan-tree-dump "OUTER LOOP VECTORIZED" "vect" } } */
+/* { dg-final { scan-tree-dump "vectorizing stmts using SLP" "vect" } } */
+/* { dg-final { scan-tree-dump-not "VEC_PERM_EXPR" "vect" } } */
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
index 991fd457229..952a6878447 100644
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -7375,15 +7375,24 @@ vect_transform_cycle_phi (loop_vec_info loop_vinfo,
   if (slp_node)
     {
       vec_initial_defs.reserve (vec_num);
-      gcc_assert (slp_node == slp_node_instance->reduc_phis);
-      stmt_vec_info first = REDUC_GROUP_FIRST_ELEMENT (reduc_stmt_info);
-      tree neutral_op
-       = neutral_op_for_slp_reduction (slp_node, vectype_out,
-                                       STMT_VINFO_REDUC_CODE (reduc_info),
-                                       first != NULL);
-      get_initial_defs_for_reduction (loop_vinfo, 
slp_node_instance->reduc_phis,
-                                     &vec_initial_defs, vec_num,
-                                     first != NULL, neutral_op);
+      if (nested_cycle)
+       {
+         unsigned phi_idx = loop_preheader_edge (loop)->dest_idx;
+         vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[phi_idx],
+                            &vec_initial_defs);
+       }
+      else
+       {
+         gcc_assert (slp_node == slp_node_instance->reduc_phis);
+         stmt_vec_info first = REDUC_GROUP_FIRST_ELEMENT (reduc_stmt_info);
+         tree neutral_op
+             = neutral_op_for_slp_reduction (slp_node, vectype_out,
+                                             STMT_VINFO_REDUC_CODE 
(reduc_info),
+                                             first != NULL);
+         get_initial_defs_for_reduction (loop_vinfo, 
slp_node_instance->reduc_phis,
+                                         &vec_initial_defs, vec_num,
+                                         first != NULL, neutral_op);
+       }
     }
   else
     {
@@ -7518,6 +7527,65 @@ vectorizable_lc_phi (loop_vec_info loop_vinfo,
   return true;
 }
 
+/* Vectorizes PHIs.  */
+
+bool
+vectorizable_phi (vec_info *,
+                 stmt_vec_info stmt_info, gimple **vec_stmt,
+                 slp_tree slp_node)
+{
+  if (!is_a <gphi *> (stmt_info->stmt) || !slp_node)
+    return false;
+
+  if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
+    return false;
+
+  tree vectype = SLP_TREE_VECTYPE (slp_node);
+
+  if (!vec_stmt) /* transformation not required.  */
+    {
+      slp_tree child;
+      unsigned i;
+      FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (slp_node), i, child)
+       if (!vect_maybe_update_slp_op_vectype (child, vectype))
+         {
+           if (dump_enabled_p ())
+             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+                              "incompatible vector types for invariants\n");
+           return false;
+         }
+      STMT_VINFO_TYPE (stmt_info) = phi_info_type;
+      return true;
+    }
+
+  tree scalar_dest = gimple_phi_result (stmt_info->stmt);
+  basic_block bb = gimple_bb (stmt_info->stmt);
+  tree vec_dest = vect_create_destination_var (scalar_dest, vectype);
+  auto_vec<tree> vec_oprnds;
+  auto_vec<gphi *> new_phis;
+  for (unsigned i = 0; i < gimple_phi_num_args (stmt_info->stmt); ++i)
+    {
+      vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[i], &vec_oprnds);
+      if (i == 0)
+       {
+         new_phis.create (vec_oprnds.length ());
+         for (unsigned j = 0; j < vec_oprnds.length (); j++)
+           {
+             /* Create the vectorized LC PHI node.  */
+             new_phis.quick_push (create_phi_node (vec_dest, bb));
+             SLP_TREE_VEC_STMTS (slp_node).quick_push (new_phis[j]);
+           }
+       }
+      edge e = gimple_phi_arg_edge (as_a <gphi *> (stmt_info->stmt), i);
+      for (unsigned j = 0; j < vec_oprnds.length (); j++)
+       {
+         add_phi_arg (new_phis[j], vec_oprnds[j], e, UNKNOWN_LOCATION);
+       }
+    }
+
+  return true;
+}
+
 
 /* Function vect_min_worthwhile_factor.
 
@@ -8374,8 +8442,16 @@ vectorizable_live_operation (vec_info *vinfo,
       gimple_seq stmts = NULL;
       new_tree = force_gimple_operand (fold_convert (lhs_type, new_tree),
                                       &stmts, true, NULL_TREE);
-
-      gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
+      if (is_a <gphi *> (vec_stmt))
+       {
+         gimple_stmt_iterator si = gsi_after_labels (gimple_bb (vec_stmt));
+         gsi_insert_seq_before (&si, stmts, GSI_SAME_STMT);
+       }
+      else
+       {
+         gimple_stmt_iterator si = gsi_for_stmt (vec_stmt);
+         gsi_insert_seq_after (&si, stmts, GSI_SAME_STMT);
+       }
 
       /* Replace use of lhs with newly computed result.  If the use stmt is a
         single arg PHI, just replace all uses of PHI result.  It's necessary
diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
index f36d8d1c642..72a3cdcfb6b 100644
--- a/gcc/tree-vect-slp.c
+++ b/gcc/tree-vect-slp.c
@@ -98,7 +98,8 @@ vect_free_slp_tree (slp_tree node)
     return;
 
   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
-    vect_free_slp_tree (child);
+    if (child)
+      vect_free_slp_tree (child);
 
   delete node;
 }
@@ -426,10 +427,13 @@ vect_get_and_check_slp_defs (vec_info *vinfo, unsigned 
char swap,
       else
        commutative_op = commutative_tree_code (code) ? 0U : -1U;
     }
+  else if (gphi *stmt = dyn_cast <gphi *> (stmt_info->stmt))
+    number_of_oprnds = gimple_phi_num_args (stmt);
   else
     return -1;
 
   bool swapped = (swap != 0);
+  bool backedge = false;
   gcc_assert (!swapped || first_op_cond);
   enum vect_def_type *dts = XALLOCAVEC (enum vect_def_type, number_of_oprnds);
   for (i = 0; i < number_of_oprnds; i++)
@@ -446,6 +450,13 @@ vect_get_and_check_slp_defs (vec_info *vinfo, unsigned 
char swap,
          else
            oprnd = gimple_op (stmt_info->stmt, map[i]);
        }
+      else if (gphi *stmt = dyn_cast <gphi *> (stmt_info->stmt))
+       {
+         oprnd = gimple_phi_arg_def (stmt, i);
+         backedge = dominated_by_p (CDI_DOMINATORS,
+                                    gimple_phi_arg_edge (stmt, i)->src,
+                                    gimple_bb (stmt_info->stmt));
+       }
       else
        oprnd = gimple_op (stmt_info->stmt, first_op_idx + (swapped ? !i : i));
       if (TREE_CODE (oprnd) == VIEW_CONVERT_EXPR)
@@ -470,6 +481,9 @@ vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char 
swap,
       oprnd_info->def_stmts.quick_push (def_stmt_info);
       oprnd_info->ops.quick_push (oprnd);
 
+      if (backedge)
+       dts[i] = vect_backedge_def;
+
       if (first)
        {
          tree type = TREE_TYPE (oprnd);
@@ -504,6 +518,8 @@ vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char 
swap,
            case vect_internal_def:
            case vect_reduction_def:
            case vect_induction_def:
+           case vect_nested_cycle:
+           case vect_backedge_def:
              break;
 
            default:
@@ -514,7 +530,6 @@ vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char 
swap,
                                 oprnd);
              return -1;
            }
-
          oprnd_info->first_dt = dt;
          oprnd_info->first_op_type = type;
        }
@@ -797,6 +812,7 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
   machine_mode vec_mode;
   stmt_vec_info first_load = NULL, prev_first_load = NULL;
   bool first_stmt_load_p = false, load_p = false;
+  bool first_stmt_phi_p = false, phi_p = false;
 
   /* For every stmt in NODE find its def stmt/s.  */
   stmt_vec_info stmt_info;
@@ -886,6 +902,11 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char 
*swap,
              return false;
            }
        }
+      else if (gimple_code (stmt) == GIMPLE_PHI)
+       {
+         rhs_code = ERROR_MARK;
+         phi_p = true;
+       }
       else
        {
          rhs_code = gimple_assign_rhs_code (stmt);
@@ -898,6 +919,7 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
          *node_vectype = vectype;
          first_stmt_code = rhs_code;
          first_stmt_load_p = load_p;
+         first_stmt_phi_p = phi_p;
 
          /* Shift arguments should be equal in all the packed stmts for a
             vector shift with scalar shift operand.  */
@@ -1003,7 +1025,8 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char 
*swap,
                        || first_stmt_code == INDIRECT_REF
                        || first_stmt_code == COMPONENT_REF
                        || first_stmt_code == MEM_REF)))
-             || first_stmt_load_p != load_p)
+             || first_stmt_load_p != load_p
+             || first_stmt_phi_p != phi_p)
            {
              if (dump_enabled_p ())
                {
@@ -1059,6 +1082,18 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char 
*swap,
                }
            }
 
+         if (phi_p
+             && (gimple_bb (first_stmt_info->stmt)
+                 != gimple_bb (stmt_info->stmt)))
+           {
+             if (dump_enabled_p ())
+               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+                                "Build SLP failed: different BB for PHI "
+                                "in %G", stmt);
+             /* Mismatch.  */
+             continue;
+           }
+
          if (!types_compatible_p (vectype, *node_vectype))
            {
              if (dump_enabled_p ())
@@ -1120,7 +1155,8 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char 
*swap,
            }
 
          /* Not memory operation.  */
-         if (TREE_CODE_CLASS (rhs_code) != tcc_binary
+         if (!phi_p
+             && TREE_CODE_CLASS (rhs_code) != tcc_binary
              && TREE_CODE_CLASS (rhs_code) != tcc_unary
              && TREE_CODE_CLASS (rhs_code) != tcc_expression
              && TREE_CODE_CLASS (rhs_code) != tcc_comparison
@@ -1309,41 +1345,47 @@ vect_build_slp_tree_2 (vec_info *vinfo,
 
   /* If the SLP node is a PHI (induction or reduction), terminate
      the recursion.  */
-  if (gphi *stmt = dyn_cast <gphi *> (stmt_info->stmt))
-    {
-      tree scalar_type = TREE_TYPE (PHI_RESULT (stmt));
-      tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
-                                                 group_size);
-      if (!vect_record_max_nunits (vinfo, stmt_info, group_size, vectype,
-                                  max_nunits))
-       return NULL;
+  if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
+    if (gphi *stmt = dyn_cast <gphi *> (stmt_info->stmt))
+      {
+       tree scalar_type = TREE_TYPE (PHI_RESULT (stmt));
+       tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
+                                                   group_size);
+       if (!vect_record_max_nunits (vinfo, stmt_info, group_size, vectype,
+                                    max_nunits))
+         return NULL;
 
-      vect_def_type def_type = STMT_VINFO_DEF_TYPE (stmt_info);
-      /* Induction from different IVs is not supported.  */
-      if (def_type == vect_induction_def)
-       {
-         stmt_vec_info other_info;
-         FOR_EACH_VEC_ELT (stmts, i, other_info)
-           if (stmt_info != other_info)
-             return NULL;
-       }
-      else if (def_type == vect_reduction_def
-              || def_type == vect_double_reduction_def
-              || def_type == vect_nested_cycle)
-       {
-         /* Else def types have to match.  */
-         stmt_vec_info other_info;
-         FOR_EACH_VEC_ELT (stmts, i, other_info)
-           if (STMT_VINFO_DEF_TYPE (other_info) != def_type)
-             return NULL;
-       }
-      else
-       return NULL;
-      (*tree_size)++;
-      node = vect_create_new_slp_node (stmts, nops);
-      SLP_TREE_VECTYPE (node) = vectype;
-      return node;
-    }
+       vect_def_type def_type = STMT_VINFO_DEF_TYPE (stmt_info);
+       /* Induction from different IVs is not supported.  */
+       if (def_type == vect_induction_def)
+         {
+           stmt_vec_info other_info;
+           FOR_EACH_VEC_ELT (stmts, i, other_info)
+             if (stmt_info != other_info)
+               return NULL;
+         }
+       else if (def_type == vect_reduction_def
+                || def_type == vect_double_reduction_def
+                || def_type == vect_nested_cycle)
+         {
+           /* Else def types have to match.  */
+           stmt_vec_info other_info;
+           FOR_EACH_VEC_ELT (stmts, i, other_info)
+             if (STMT_VINFO_DEF_TYPE (other_info) != def_type)
+               return NULL;
+         }
+       else if (def_type != vect_internal_def)
+         return NULL;
+       if (def_type != vect_internal_def
+           && !nested_in_vect_loop_p (LOOP_VINFO_LOOP (loop_vinfo), stmt_info))
+         {
+           (*tree_size)++;
+           node = vect_create_new_slp_node (stmts, nops);
+           SLP_TREE_VECTYPE (node) = vectype;
+           SLP_TREE_CHILDREN (node).quick_grow_cleared (nops);
+           return node;
+         }
+      }
 
 
   bool two_operators = false;
@@ -1468,9 +1510,16 @@ vect_build_slp_tree_2 (vec_info *vinfo,
          continue;
        }
 
-      if (oprnd_info->first_dt != vect_internal_def
-         && oprnd_info->first_dt != vect_reduction_def
-         && oprnd_info->first_dt != vect_induction_def)
+      if (oprnd_info->first_dt == vect_backedge_def)
+       {
+         /* Backedges are filled in later, make sure to not recurse
+            here.  */
+         children.safe_push (NULL);
+         continue;
+       }
+
+      if (oprnd_info->first_dt == vect_external_def
+         || oprnd_info->first_dt == vect_constant_def)
        {
          slp_tree invnode = vect_create_new_slp_node (oprnd_info->ops);
          SLP_TREE_DEF_TYPE (invnode) = oprnd_info->first_dt;
@@ -1630,7 +1679,9 @@ fail:
       unsigned n_vector_builds = 0;
       FOR_EACH_VEC_ELT (children, j, child)
        {
-         if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
+         if (!child)
+           ;
+         else if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
            all_uniform_p = false;
          else if (!vect_slp_tree_uniform_p (child))
            {
@@ -1644,7 +1695,8 @@ fail:
          /* Roll back.  */
          matches[0] = false;
          FOR_EACH_VEC_ELT (children, j, child)
-           vect_free_slp_tree (child);
+           if (child)
+             vect_free_slp_tree (child);
 
          if (dump_enabled_p ())
            dump_printf_loc (MSG_NOTE, vect_location,
@@ -1795,7 +1847,8 @@ vect_print_slp_graph (dump_flags_t dump_kind, 
dump_location_t loc,
   vect_print_slp_tree (dump_kind, loc, node);
 
   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
-    vect_print_slp_graph (dump_kind, loc, child, visited);
+    if (child)
+      vect_print_slp_graph (dump_kind, loc, child, visited);
 }
 
 static void
@@ -1825,7 +1878,8 @@ vect_mark_slp_stmts (slp_tree node, hash_set<slp_tree> 
&visited)
     STMT_SLP_TYPE (stmt_info) = pure_slp;
 
   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
-    vect_mark_slp_stmts (child, visited);
+    if (child)
+      vect_mark_slp_stmts (child, visited);
 }
 
 static void
@@ -1858,7 +1912,8 @@ vect_mark_slp_stmts_relevant (slp_tree node, 
hash_set<slp_tree> &visited)
     }
 
   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
-    vect_mark_slp_stmts_relevant (child, visited);
+    if (child)
+      vect_mark_slp_stmts_relevant (child, visited);
 }
 
 static void
@@ -1875,7 +1930,7 @@ static void
 vect_gather_slp_loads (vec<slp_tree> &loads, slp_tree node,
                       hash_set<slp_tree> &visited)
 {
-  if (visited.add (node))
+  if (!node || visited.add (node))
     return;
 
   if (SLP_TREE_CHILDREN (node).length () == 0)
@@ -2382,6 +2437,8 @@ vect_analyze_slp_backedges (vec_info *vinfo, slp_tree 
node,
   if (gphi *phi = dyn_cast <gphi *> (SLP_TREE_REPRESENTATIVE (node)->stmt))
     for (unsigned i = 0; i < gimple_phi_num_args (phi); ++i)
       {
+       if (SLP_TREE_CHILDREN (node)[i])
+         continue;
        auto_vec<stmt_vec_info, 64> stmts;
        unsigned j;
        stmt_vec_info phi_info;
@@ -2398,13 +2455,7 @@ vect_analyze_slp_backedges (vec_info *vinfo, slp_tree 
node,
        slp_tree *edge_def = bst_map->get (stmts);
        if (edge_def)
          {
-           /* ???  We're currently not recording non-backedge children
-              of PHIs like external reduction initial values.  Avoid
-              NULL entries in SLP_TREE_CHILDREN for those and thus
-              for now simply only record backedge defs at a
-              SLP_TREE_CHILDREN index possibly not matching that of
-              the corresponding PHI argument index.  */
-           SLP_TREE_CHILDREN (node).quick_push (*edge_def);
+           SLP_TREE_CHILDREN (node)[i] = *edge_def;
            (*edge_def)->refcnt++;
          }
       }
@@ -2491,11 +2542,16 @@ vect_slp_build_vertices (hash_set<slp_tree> &visited, 
slp_tree node,
 
   node->vertex = vertices.length ();
   vertices.safe_push (node);
-  if (SLP_TREE_CHILDREN (node).is_empty ())
+
+  bool leaf = true;
+  FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
+    if (child)
+      {
+       leaf = false;
+       vect_slp_build_vertices (visited, child, vertices, leafs);
+      }
+  if (leaf)
     leafs.safe_push (node->vertex);
-  else
-    FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
-      vect_slp_build_vertices (visited, child, vertices, leafs);
 }
 
 /* Fill the vertices and leafs vector with all nodes in the SLP graph.  */
@@ -2573,7 +2629,8 @@ vect_optimize_slp (vec_info *vinfo)
       unsigned j;
       slp_tree child;
       FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
-       add_edge (slpg, i, child->vertex);
+       if (child)
+         add_edge (slpg, i, child->vertex);
     }
 
   /* Compute (reverse) postorder on the inverted graph.  */
@@ -2772,7 +2829,7 @@ vect_optimize_slp (vec_info *vinfo)
       slp_tree child;
       FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
        {
-         if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
+         if (!child || SLP_TREE_DEF_TYPE (child) == vect_internal_def)
            continue;
 
          /* If the vector is uniform there's nothing to do.  */
@@ -3292,7 +3349,7 @@ vect_slp_analyze_node_operations (vec_info *vinfo, 
slp_tree node,
   slp_tree child;
 
   /* Assume we can code-generate all invariants.  */
-  if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
+  if (!node || SLP_TREE_DEF_TYPE (node) != vect_internal_def)
     return true;
 
   /* If we already analyzed the exact same set of scalar stmts we're done.
@@ -3323,8 +3380,9 @@ vect_slp_analyze_node_operations (vec_info *vinfo, 
slp_tree node,
      other referrers.  */
   if (res)
     FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
-      if ((SLP_TREE_DEF_TYPE (child) == vect_constant_def
-          || SLP_TREE_DEF_TYPE (child) == vect_external_def)
+      if (child
+         && (SLP_TREE_DEF_TYPE (child) == vect_constant_def
+             || SLP_TREE_DEF_TYPE (child) == vect_external_def)
          /* Perform usual caching, note code-generation still
             code-gens these nodes multiple times but we expect
             to CSE them later.  */
@@ -3398,7 +3456,7 @@ vect_bb_slp_mark_live_stmts (bb_vec_info bb_vinfo, 
slp_tree node,
       gimple *orig_stmt = orig_stmt_info->stmt;
       ssa_op_iter op_iter;
       def_operand_p def_p;
-      FOR_EACH_SSA_DEF_OPERAND (def_p, orig_stmt, op_iter, SSA_OP_DEF)
+      FOR_EACH_PHI_OR_STMT_DEF (def_p, orig_stmt, op_iter, SSA_OP_DEF)
        {
          imm_use_iterator use_iter;
          gimple *use_stmt;
@@ -3467,7 +3525,7 @@ vect_bb_slp_mark_live_stmts (bb_vec_info bb_vinfo, 
slp_tree node,
 
   slp_tree child;
   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
-    if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
+    if (child && SLP_TREE_DEF_TYPE (child) == vect_internal_def)
       vect_bb_slp_mark_live_stmts (bb_vinfo, child, instance,
                                   cost_vec, svisited);
 }
@@ -3602,7 +3660,7 @@ vect_bb_partition_graph_r (bb_vec_info bb_vinfo,
 
   slp_tree child;
   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
-    if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
+    if (child && SLP_TREE_DEF_TYPE (child) == vect_internal_def)
       vect_bb_partition_graph_r (bb_vinfo, instance, child, stmt_to_instance,
                                 instance_leader, visited);
 }
@@ -3678,7 +3736,7 @@ vect_bb_slp_scalar_cost (vec_info *vinfo,
         the scalar cost.  */
       if (!STMT_VINFO_LIVE_P (stmt_info))
        {
-         FOR_EACH_SSA_DEF_OPERAND (def_p, orig_stmt, op_iter, SSA_OP_DEF)
+         FOR_EACH_PHI_OR_STMT_DEF (def_p, orig_stmt, op_iter, SSA_OP_DEF)
            {
              imm_use_iterator use_iter;
              gimple *use_stmt;
@@ -3722,7 +3780,7 @@ vect_bb_slp_scalar_cost (vec_info *vinfo,
   auto_vec<bool, 20> subtree_life;
   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
     {
-      if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
+      if (child && SLP_TREE_DEF_TYPE (child) == vect_internal_def)
        {
          /* Do not directly pass LIFE to the recursive call, copy it to
             confine changes in the callee to the current child/subtree.  */
@@ -4196,15 +4254,31 @@ vect_slp_function (function *fun)
         edge into it exits a loop (because of implementation issues with
         respect to placement of CTORs for externals).  */
       bool split = false;
+#if 0
       edge e;
       if (!single_pred_p (bb)
          || ((e = single_pred_edge (bb)),
              loop_exit_edge_p (e->src->loop_father, e)))
        split = true;
+#endif
       /* Split when a BB is not dominated by the first block.  */
-      else if (!bbs.is_empty ()
-              && !dominated_by_p (CDI_DOMINATORS, bb, bbs[0]))
+      if (!bbs.is_empty ()
+         && !dominated_by_p (CDI_DOMINATORS, bb, bbs[0]))
        split = true;
+      else
+       {
+         /* Split when the block is entered via a loop exit
+            (because of implementation issues with respect to
+            placement of CTORs for externals).  */
+         edge_iterator ei;
+         edge e;
+         FOR_EACH_EDGE (e, ei, bb->preds)
+           if (loop_exit_edge_p (e->src->loop_father, e))
+             {
+               split = true;
+               break;
+             }
+       }
 
       if (split && !bbs.is_empty ())
        {
@@ -5019,8 +5093,9 @@ vect_schedule_slp_instance (vec_info *vinfo,
 
   /* See if we have already vectorized the node in the graph of the
      SLP instance.  */
-  if ((SLP_TREE_DEF_TYPE (node) == vect_internal_def
-       && SLP_TREE_VEC_STMTS (node).exists ())
+  if (!node
+      || (SLP_TREE_DEF_TYPE (node) == vect_internal_def
+         && SLP_TREE_VEC_STMTS (node).exists ())
       || SLP_TREE_VEC_DEFS (node).exists ())
     return;
 
@@ -5196,7 +5271,7 @@ vect_remove_slp_scalar_calls (vec_info *vinfo,
   tree lhs;
   stmt_vec_info stmt_info;
 
-  if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
+  if (!node || SLP_TREE_DEF_TYPE (node) != vect_internal_def)
     return;
 
   if (visited.add (node))
@@ -5228,6 +5303,41 @@ vect_remove_slp_scalar_calls (vec_info *vinfo, slp_tree 
node)
   vect_remove_slp_scalar_calls (vinfo, node, visited);
 }
 
+/* Fill in the vectorized PHI backedge defs of NODE.  */
+
+static void
+vect_fill_vectorized_backedge_defs (slp_tree node, hash_set<slp_tree> &visited)
+{
+  if (!node
+      || SLP_TREE_DEF_TYPE (node) != vect_internal_def
+      || visited.add (node))
+    return;
+
+  gphi *phi = dyn_cast <gphi *> (SLP_TREE_REPRESENTATIVE (node)->stmt);
+  if (phi)
+    {
+      edge_iterator ei;
+      edge e;
+      FOR_EACH_EDGE (e, ei, gimple_bb (phi)->preds)
+       if (dominated_by_p (CDI_DOMINATORS, e->src, e->dest))
+         {
+           unsigned dest_idx = e->dest_idx;
+           slp_tree child = SLP_TREE_CHILDREN (node)[dest_idx];
+           if (!child || SLP_TREE_DEF_TYPE (child) != vect_internal_def)
+             continue;
+           for (unsigned i = 0; i < SLP_TREE_VEC_STMTS (node).length (); ++i)
+             add_phi_arg (as_a <gphi *> (SLP_TREE_VEC_STMTS (node)[i]),
+                          vect_get_slp_vect_def (child, i),
+                          e, gimple_phi_arg_location (phi, dest_idx));
+         }
+    }
+
+  unsigned i;
+  slp_tree child;
+  FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
+    vect_fill_vectorized_backedge_defs (child, visited);
+}
+
 /* Vectorize the instance root.  */
 
 void
@@ -5311,6 +5421,11 @@ vect_schedule_slp (vec_info *vinfo, vec<slp_instance> 
slp_instances)
                          "vectorizing stmts using SLP.\n");
     }
 
+  /* Set the backedge values of vectorized PHIs.  */
+  visited.empty ();
+  FOR_EACH_VEC_ELT (slp_instances, i, instance)
+    vect_fill_vectorized_backedge_defs (SLP_INSTANCE_TREE (instance), visited);
+
   FOR_EACH_VEC_ELT (slp_instances, i, instance)
     {
       slp_tree root = SLP_INSTANCE_TREE (instance);
@@ -5330,10 +5445,14 @@ vect_schedule_slp (vec_info *vinfo, vec<slp_instance> 
slp_instances)
          edge e = loop_latch_edge (gimple_bb (phi)->loop_father);
          gcc_assert (SLP_TREE_VEC_STMTS (phi_node).length ()
                      == SLP_TREE_VEC_STMTS (slp_node).length ());
-         for (unsigned j = 0; j < SLP_TREE_VEC_STMTS (phi_node).length (); ++j)
-           add_phi_arg (as_a <gphi *> (SLP_TREE_VEC_STMTS (phi_node)[j]),
-                        vect_get_slp_vect_def (slp_node, j),
-                        e, gimple_phi_arg_location (phi, e->dest_idx));
+         /* We still need this for SLP reduction chains since the backedges
+            are not found for those.  */
+         if (!SLP_TREE_CHILDREN (phi_node)[e->dest_idx])
+           for (unsigned j = 0;
+                j < SLP_TREE_VEC_STMTS (phi_node).length (); ++j)
+             add_phi_arg (as_a <gphi *> (SLP_TREE_VEC_STMTS (phi_node)[j]),
+                          vect_get_slp_vect_def (slp_node, j),
+                          e, gimple_phi_arg_location (phi, e->dest_idx));
        }
 
       /* Remove scalar call stmts.  Do not do this for basic-block
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
index 3575f25241f..b10f9f0735b 100644
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -10745,7 +10745,8 @@ vect_analyze_stmt (vec_info *vinfo,
              || vectorizable_condition (vinfo, stmt_info,
                                         NULL, NULL, node, cost_vec)
              || vectorizable_comparison (vinfo, stmt_info, NULL, NULL, node,
-                                         cost_vec));
+                                         cost_vec)
+             || vectorizable_phi (vinfo, stmt_info, NULL, node));
     }
 
   if (!ok)
@@ -10885,6 +10886,11 @@ vect_transform_stmt (vec_info *vinfo,
       gcc_assert (done);
       break;
 
+    case phi_info_type:
+      done = vectorizable_phi (vinfo, stmt_info, &vec_stmt, slp_node);
+      gcc_assert (done);
+      break;
+
     default:
       if (!STMT_VINFO_LIVE_P (stmt_info))
        {
@@ -11277,6 +11283,9 @@ vect_is_simple_use (tree operand, vec_info *vinfo, enum 
vect_def_type *dt,
        case vect_nested_cycle:
          dump_printf (MSG_NOTE, "nested cycle\n");
          break;
+       case vect_backedge_def:
+         dump_printf (MSG_NOTE, "backedge def\n");
+         break;
        case vect_unknown_def_type:
          dump_printf (MSG_NOTE, "unknown\n");
          break;
diff --git a/gcc/tree-vectorizer.c b/gcc/tree-vectorizer.c
index 778177a583b..6ddc55fda3b 100644
--- a/gcc/tree-vectorizer.c
+++ b/gcc/tree-vectorizer.c
@@ -684,7 +684,8 @@ vec_info::new_stmt_vec_info (gimple *stmt)
   STMT_VINFO_SLP_VECT_ONLY (res) = false;
   STMT_VINFO_VEC_STMTS (res) = vNULL;
 
-  if (gimple_code (stmt) == GIMPLE_PHI
+  if (is_a <loop_vec_info> (this)
+      && gimple_code (stmt) == GIMPLE_PHI
       && is_loop_header_bb_p (gimple_bb (stmt)))
     STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
   else
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index b56073c4ee3..eb2c6b6e8cc 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -62,6 +62,7 @@ enum vect_def_type {
   vect_reduction_def,
   vect_double_reduction_def,
   vect_nested_cycle,
+  vect_backedge_def,
   vect_unknown_def_type
 };
 
@@ -872,6 +873,7 @@ enum stmt_vec_info_type {
   type_conversion_vec_info_type,
   cycle_phi_info_type,
   lc_phi_info_type,
+  phi_info_type,
   loop_exit_ctrl_vec_info_type
 };
 
@@ -1930,6 +1932,7 @@ extern bool vect_transform_cycle_phi (loop_vec_info, 
stmt_vec_info,
                                      slp_tree, slp_instance);
 extern bool vectorizable_lc_phi (loop_vec_info, stmt_vec_info,
                                 gimple **, slp_tree);
+extern bool vectorizable_phi (vec_info *, stmt_vec_info, gimple **, slp_tree);
 extern bool vect_worthwhile_without_simd_p (vec_info *, tree_code);
 extern int vect_get_known_peeling_cost (loop_vec_info, int, int *,
                                        stmt_vector_for_cost *,
-- 
2.26.2

Reply via email to