The following changes the fallback to disable SLP when any of the
discovered SLP instances failed to pass vectorization checking into
a fallback that emulates what no SLP would do with SLP - force
single-lane discovery for all instances.

The patch does not remove the final fallback to disable SLP but it
reduces the fallout from failing vectorization when any non-SLP
stmt survives analysis.

Bootstrapped and tested on x86_64-unknown-linux-gnu.

I'm watching CI results but do not really expect to see big improvements
besides of when forcing SLP as with the recently posted
"Testing: fail vectorization when not using SLP" patch.

        * tree-vectorizer.h (vect_analyze_slp): Add force_single_lane
        parameter.
        * tree-vect-slp.cc (vect_analyze_slp_instance): Remove
        defaulting of force_single_lane.
        (vect_build_slp_instance): Likewise.  Pass down appropriate
        force_single_lane.
        (vect_analyze_slp): Add force_sigle_lane parameter and pass
        it down appropriately.
        (vect_slp_analyze_bb_1): Always do multi-lane SLP.
        * tree-vect-loop.cc (vect_analyze_loop_2): Track two SLP
        modes and adjust accordingly.
        (vect_analyze_loop_1): Save the SLP mode when unrolling.
---
 gcc/tree-vect-loop.cc | 25 +++++++++++++------------
 gcc/tree-vect-slp.cc  | 43 +++++++++++++++++++++++++------------------
 gcc/tree-vectorizer.h |  2 +-
 3 files changed, 39 insertions(+), 31 deletions(-)

diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index 199d79029e4..8bf231e98ec 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -2718,7 +2718,7 @@ vect_determine_partial_vectors_and_peeling (loop_vec_info 
loop_vinfo)
 static opt_result
 vect_analyze_loop_2 (loop_vec_info loop_vinfo, bool &fatal,
                     unsigned *suggested_unroll_factor,
-                    bool& slp_done_for_suggested_uf)
+                    unsigned& slp_done_for_suggested_uf)
 {
   opt_result ok = opt_result::success ();
   int res;
@@ -2787,11 +2787,11 @@ vect_analyze_loop_2 (loop_vec_info loop_vinfo, bool 
&fatal,
   /* If the slp decision is false when suggested unroll factor is worked
      out, and we are applying suggested unroll factor, we can simply skip
      all slp related analyses this time.  */
-  bool slp = !applying_suggested_uf || slp_done_for_suggested_uf;
+  unsigned slp = !applying_suggested_uf ? 2 : slp_done_for_suggested_uf;
 
   /* Classify all cross-iteration scalar data-flow cycles.
      Cross-iteration cycles caused by virtual phis are analyzed separately.  */
-  vect_analyze_scalar_cycles (loop_vinfo, slp);
+  vect_analyze_scalar_cycles (loop_vinfo, slp == 2);
 
   vect_pattern_recog (loop_vinfo);
 
@@ -2859,7 +2859,8 @@ vect_analyze_loop_2 (loop_vec_info loop_vinfo, bool 
&fatal,
     {
       /* Check the SLP opportunities in the loop, analyze and build
         SLP trees.  */
-      ok = vect_analyze_slp (loop_vinfo, LOOP_VINFO_N_STMTS (loop_vinfo));
+      ok = vect_analyze_slp (loop_vinfo, LOOP_VINFO_N_STMTS (loop_vinfo),
+                            slp == 1);
       if (!ok)
        return ok;
 
@@ -3212,15 +3213,14 @@ again:
   /* Ensure that "ok" is false (with an opt_problem if dumping is enabled).  */
   gcc_assert (!ok);
 
-  /* Try again with SLP forced off but if we didn't do any SLP there is
+  /* Try again with SLP degraded but if we didn't do any SLP there is
      no point in re-trying.  */
   if (!slp)
     return ok;
 
-  /* If the slp decision is true when suggested unroll factor is worked
-     out, and we are applying suggested unroll factor, we don't need to
-     re-try any more.  */
-  if (applying_suggested_uf && slp_done_for_suggested_uf)
+  /* If we are applying suggested unroll factor, we don't need to
+     re-try any more as we want to keep the SLP mode fixed.  */
+  if (applying_suggested_uf)
     return ok;
 
   /* If there are reduction chains re-trying will fail anyway.  */
@@ -3268,8 +3268,9 @@ again:
     dump_printf_loc (MSG_NOTE, vect_location,
                     "re-trying with SLP disabled\n");
 
-  /* Roll back state appropriately.  No SLP this time.  */
-  slp = false;
+  /* Roll back state appropriately.  Degrade SLP this time.  From multi-
+     to single-lane to disabled.  */
+  --slp;
   /* Restore vectorization factor as it were without SLP.  */
   LOOP_VINFO_VECT_FACTOR (loop_vinfo) = saved_vectorization_factor;
   /* Free the SLP instances.  */
@@ -3414,7 +3415,7 @@ vect_analyze_loop_1 (class loop *loop, vec_info_shared 
*shared,
   machine_mode vector_mode = vector_modes[mode_i];
   loop_vinfo->vector_mode = vector_mode;
   unsigned int suggested_unroll_factor = 1;
-  bool slp_done_for_suggested_uf = false;
+  unsigned slp_done_for_suggested_uf = 0;
 
   /* Run the main analysis.  */
   opt_result res = vect_analyze_loop_2 (loop_vinfo, fatal,
diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index 4fcb9e2fa2b..c8af4d320eb 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -3488,7 +3488,7 @@ vect_analyze_slp_instance (vec_info *vinfo,
                           scalar_stmts_to_slp_tree_map_t *bst_map,
                           stmt_vec_info stmt_info, slp_instance_kind kind,
                           unsigned max_tree_size, unsigned *limit,
-                          bool force_single_lane = false);
+                          bool force_single_lane);
 
 /* Build an interleaving scheme for the store sources RHS_NODES from
    SCALAR_STMTS.  */
@@ -3684,7 +3684,7 @@ vect_build_slp_instance (vec_info *vinfo,
                         scalar_stmts_to_slp_tree_map_t *bst_map,
                         /* ???  We need stmt_info for group splitting.  */
                         stmt_vec_info stmt_info_,
-                        bool force_single_lane = false)
+                        bool force_single_lane)
 {
   /* If there's no budget left bail out early.  */
   if (*limit == 0)
@@ -3891,7 +3891,7 @@ vect_build_slp_instance (vec_info *vinfo,
                                                               group1_size);
              bool res = vect_analyze_slp_instance (vinfo, bst_map, stmt_info,
                                                    kind, max_tree_size,
-                                                   limit);
+                                                   limit, false);
              /* Split the rest at the failure point and possibly
                 re-analyze the remaining matching part if it has
                 at least two lanes.  */
@@ -3904,14 +3904,14 @@ vect_build_slp_instance (vec_info *vinfo,
                  if (i - group1_size > 1)
                    res |= vect_analyze_slp_instance (vinfo, bst_map, rest2,
                                                      kind, max_tree_size,
-                                                     limit);
+                                                     limit, false);
                }
              /* Re-analyze the non-matching tail if it has at least
                 two lanes.  */
              if (i + 1 < group_size)
                res |= vect_analyze_slp_instance (vinfo, bst_map,
                                                  rest, kind, max_tree_size,
-                                                 limit);
+                                                 limit, false);
              return res;
            }
        }
@@ -4544,7 +4544,8 @@ vect_lower_load_permutations (loop_vec_info loop_vinfo,
    trees of packed scalar stmts if SLP is possible.  */
 
 opt_result
-vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size)
+vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size,
+                 bool force_single_lane)
 {
   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
   unsigned int i;
@@ -4561,7 +4562,8 @@ vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size)
   /* Find SLP sequences starting from groups of grouped stores.  */
   FOR_EACH_VEC_ELT (vinfo->grouped_stores, i, first_element)
     vect_analyze_slp_instance (vinfo, bst_map, first_element,
-                              slp_inst_kind_store, max_tree_size, &limit);
+                              slp_inst_kind_store, max_tree_size, &limit,
+                              force_single_lane);
 
   /* For loops also start SLP discovery from non-grouped stores.  */
   if (loop_vinfo)
@@ -4581,7 +4583,7 @@ vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size)
            stmts.quick_push (stmt_info);
            vect_build_slp_instance (vinfo, slp_inst_kind_store,
                                     stmts, roots, remain, max_tree_size,
-                                    &limit, bst_map, NULL);
+                                    &limit, bst_map, NULL, force_single_lane);
          }
     }
 
@@ -4598,7 +4600,8 @@ vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size)
                                       bb_vinfo->roots[i].stmts,
                                       bb_vinfo->roots[i].roots,
                                       bb_vinfo->roots[i].remain,
-                                      max_tree_size, &limit, bst_map, NULL))
+                                      max_tree_size, &limit, bst_map, NULL,
+                                      false))
            {
              bb_vinfo->roots[i].stmts = vNULL;
              bb_vinfo->roots[i].roots = vNULL;
@@ -4614,9 +4617,11 @@ vect_analyze_slp (vec_info *vinfo, unsigned 
max_tree_size)
        if (! STMT_VINFO_RELEVANT_P (first_element)
            && ! STMT_VINFO_LIVE_P (first_element))
          ;
-       else if (! vect_analyze_slp_instance (vinfo, bst_map, first_element,
-                                             slp_inst_kind_reduc_chain,
-                                             max_tree_size, &limit))
+       else if (force_single_lane
+                || ! vect_analyze_slp_instance (vinfo, bst_map, first_element,
+                                                slp_inst_kind_reduc_chain,
+                                                max_tree_size, &limit,
+                                                force_single_lane))
          {
            /* Dissolve reduction chain group.  */
            stmt_vec_info vinfo = first_element;
@@ -4656,7 +4661,8 @@ vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size)
                {
                  /* Do not discover SLP reductions combining lane-reducing
                     ops, that will fail later.  */
-                 if (!lane_reducing_stmt_p (STMT_VINFO_STMT (next_info)))
+                 if (!force_single_lane
+                     && !lane_reducing_stmt_p (STMT_VINFO_STMT (next_info)))
                    scalar_stmts.quick_push (next_info);
                  else
                    {
@@ -4670,7 +4676,8 @@ vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size)
                                               slp_inst_kind_reduc_group,
                                               stmts, roots, remain,
                                               max_tree_size, &limit,
-                                              bst_map, NULL);
+                                              bst_map, NULL,
+                                              force_single_lane);
                    }
                }
            }
@@ -4683,7 +4690,7 @@ vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size)
                                           slp_inst_kind_reduc_group,
                                           scalar_stmts, roots, remain,
                                           max_tree_size, &limit, bst_map,
-                                          NULL))
+                                          NULL, force_single_lane))
            {
              if (scalar_stmts.length () <= 1)
                scalar_stmts.release ();
@@ -4699,7 +4706,7 @@ vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size)
                                           slp_inst_kind_reduc_group,
                                           stmts, roots, remain,
                                           max_tree_size, &limit,
-                                          bst_map, NULL);
+                                          bst_map, NULL, force_single_lane);
                }
              saved_stmts.release ();
            }
@@ -4731,7 +4738,7 @@ vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size)
                                         slp_inst_kind_reduc_group,
                                         stmts, roots, remain,
                                         max_tree_size, &limit,
-                                        bst_map, NULL);
+                                        bst_map, NULL, force_single_lane);
              }
          }
     }
@@ -8934,7 +8941,7 @@ vect_slp_analyze_bb_1 (bb_vec_info bb_vinfo, int n_stmts, 
bool &fatal,
 
   /* Check the SLP opportunities in the basic block, analyze and build SLP
      trees.  */
-  if (!vect_analyze_slp (bb_vinfo, n_stmts))
+  if (!vect_analyze_slp (bb_vinfo, n_stmts, false))
     {
       if (dump_enabled_p ())
        {
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index 699ae9e33ba..53105f9292f 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -2538,7 +2538,7 @@ extern bool vect_transform_slp_perm_load (vec_info *, 
slp_tree, const vec<tree>
                                          unsigned * = nullptr, bool = false);
 extern bool vect_slp_analyze_operations (vec_info *);
 extern void vect_schedule_slp (vec_info *, const vec<slp_instance> &);
-extern opt_result vect_analyze_slp (vec_info *, unsigned);
+extern opt_result vect_analyze_slp (vec_info *, unsigned, bool);
 extern bool vect_make_slp_decision (loop_vec_info);
 extern void vect_detect_hybrid_slp (loop_vec_info);
 extern void vect_optimize_slp (vec_info *);
-- 
2.43.0

Reply via email to