The following enables targets to suggest the vector mode to be used
preferably for the epilogue of a vectorized loop.  The patch also
enables more than one vectorized epilogue in case the target suggests
a vector mode for the epilogue of a vector epilogue.

Bootstrapped and tested on x86_64-unknown-linux-gnu.

As you can see without target cost modeling you cannot get more
than one vectorized epilogue (I've dropped the --param 
vect-epilogues-nomask=N approach for now).  In case the target
prefers a specific epilogue mode but that turns out to not
vectorize we continue iterating, searching for an alternative mode.

Any objections?

Thanks,
Richard.

        * tree-vectorizer.h (vector_costs::suggested_epilogue_mode): New.
        (vector_costs::m_suggested_epilogue_mode): Likewise.
        (vector_costs::vector_costs): Initialize m_suggested_epilogue_mode.
        * tree-vect-loop.cc (vect_analyze_loop): Honor the target
        suggested prefered epilogue mode and support vector epilogues
        of vector epilogues if requested.
---
 gcc/tree-vect-loop.cc | 124 ++++++++++++++++++++++++++----------------
 gcc/tree-vectorizer.h |  15 +++++
 2 files changed, 92 insertions(+), 47 deletions(-)

diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index 3f2095da449..32d84cb8663 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -3714,72 +3714,102 @@ vect_analyze_loop (class loop *loop, gimple 
*loop_vectorized_call,
      array may contain length-agnostic and length-specific modes.  Their
      ordering is not guaranteed, so we could end up picking a mode for the main
      loop that is after the epilogue's optimal mode.  */
-  vector_modes[0] = autodetected_vector_mode;
+  if (!unlimited_cost_model (loop)
+      && first_loop_vinfo->vector_costs->suggested_epilogue_mode () != 
VOIDmode)
+    {
+      vector_modes[0]
+       = first_loop_vinfo->vector_costs->suggested_epilogue_mode ();
+      cached_vf_per_mode[0] = 0;
+    }
+  else
+    vector_modes[0] = autodetected_vector_mode;
   mode_i = 0;
 
   bool supports_partial_vectors =
     partial_vectors_supported_p () && param_vect_partial_vector_usage != 0;
   poly_uint64 first_vinfo_vf = LOOP_VINFO_VECT_FACTOR (first_loop_vinfo);
 
-  while (1)
+  loop_vec_info orig_loop_vinfo = first_loop_vinfo;
+  do
     {
-      /* If the target does not support partial vectors we can shorten the
-        number of modes to analyze for the epilogue as we know we can't pick a
-        mode that would lead to a VF at least as big as the
-        FIRST_VINFO_VF.  */
-      if (!supports_partial_vectors
-         && maybe_ge (cached_vf_per_mode[mode_i], first_vinfo_vf))
+      while (1)
        {
-         mode_i++;
-         if (mode_i == vector_modes.length ())
-           break;
-         continue;
-       }
-
-      if (dump_enabled_p ())
-       dump_printf_loc (MSG_NOTE, vect_location,
-                        "***** Re-trying epilogue analysis with vector "
-                        "mode %s\n", GET_MODE_NAME (vector_modes[mode_i]));
-
-      bool fatal;
-      opt_loop_vec_info loop_vinfo
-       = vect_analyze_loop_1 (loop, shared, &loop_form_info,
-                              first_loop_vinfo,
-                              vector_modes, mode_i,
-                              autodetected_vector_mode, fatal);
-      if (fatal)
-       break;
-
-      if (loop_vinfo)
-       {
-         if (pick_lowest_cost_p
-             && first_loop_vinfo->epilogue_vinfo
-             && vect_joust_loop_vinfos (loop_vinfo,
-                                        first_loop_vinfo->epilogue_vinfo))
+         /* If the target does not support partial vectors we can shorten the
+            number of modes to analyze for the epilogue as we know we can't
+            pick a mode that would lead to a VF at least as big as the
+            FIRST_VINFO_VF.  */
+         if (!supports_partial_vectors
+             && maybe_ge (cached_vf_per_mode[mode_i], first_vinfo_vf))
            {
-             gcc_assert (vect_epilogues);
-             delete first_loop_vinfo->epilogue_vinfo;
-             first_loop_vinfo->epilogue_vinfo = nullptr;
+             mode_i++;
+             if (mode_i == vector_modes.length ())
+               break;
+             continue;
            }
-         if (!first_loop_vinfo->epilogue_vinfo)
-           first_loop_vinfo->epilogue_vinfo = loop_vinfo;
-         else
+
+         if (dump_enabled_p ())
+           dump_printf_loc (MSG_NOTE, vect_location,
+                            "***** Re-trying epilogue analysis with vector "
+                            "mode %s\n", GET_MODE_NAME (vector_modes[mode_i]));
+
+         bool fatal;
+         opt_loop_vec_info loop_vinfo
+           = vect_analyze_loop_1 (loop, shared, &loop_form_info,
+                                  orig_loop_vinfo,
+                                  vector_modes, mode_i,
+                                  autodetected_vector_mode, fatal);
+         if (fatal)
+           break;
+
+         if (loop_vinfo)
            {
-             delete loop_vinfo;
-             loop_vinfo = opt_loop_vec_info::success (NULL);
+             if (pick_lowest_cost_p
+                 && orig_loop_vinfo->epilogue_vinfo
+                 && vect_joust_loop_vinfos (loop_vinfo,
+                                            orig_loop_vinfo->epilogue_vinfo))
+               {
+                 gcc_assert (vect_epilogues);
+                 delete orig_loop_vinfo->epilogue_vinfo;
+                 orig_loop_vinfo->epilogue_vinfo = nullptr;
+               }
+             if (!orig_loop_vinfo->epilogue_vinfo)
+               orig_loop_vinfo->epilogue_vinfo = loop_vinfo;
+             else
+               {
+                 delete loop_vinfo;
+                 loop_vinfo = opt_loop_vec_info::success (NULL);
+               }
+
+             /* For now only allow one epilogue loop, but allow
+                pick_lowest_cost_p to replace it, so commit to the
+                first epilogue if we have no reason to try alternatives.  */
+             if (!pick_lowest_cost_p)
+               break;
            }
 
-         /* For now only allow one epilogue loop, but allow
-            pick_lowest_cost_p to replace it, so commit to the
-            first epilogue if we have no reason to try alternatives.  */
-         if (!pick_lowest_cost_p)
+         if (mode_i == vector_modes.length ())
            break;
        }
 
-      if (mode_i == vector_modes.length ())
+      orig_loop_vinfo = orig_loop_vinfo->epilogue_vinfo;
+      if (!orig_loop_vinfo)
        break;
 
+      /* When we selected a first vectorized epilogue, see if the target
+        suggests to have another one.  */
+      if (!unlimited_cost_model (loop)
+         && (orig_loop_vinfo->vector_costs->suggested_epilogue_mode ()
+             != VOIDmode))
+       {
+         vector_modes[0]
+           = orig_loop_vinfo->vector_costs->suggested_epilogue_mode ();
+         cached_vf_per_mode[0] = 0;
+         mode_i = 0;
+       }
+      else
+       break;
     }
+  while (1);
 
   if (first_loop_vinfo->epilogue_vinfo)
     {
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index 88db34b87b6..273e8c644e7 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -1669,6 +1669,7 @@ public:
   unsigned int outside_cost () const;
   unsigned int total_cost () const;
   unsigned int suggested_unroll_factor () const;
+  machine_mode suggested_epilogue_mode () const;
 
 protected:
   unsigned int record_stmt_cost (stmt_vec_info, vect_cost_model_location,
@@ -1691,6 +1692,10 @@ protected:
   /* The suggested unrolling factor determined at finish_cost.  */
   unsigned int m_suggested_unroll_factor;
 
+  /* The suggested mode to be used for a vectorized epilogue or VOIDmode,
+     determined at finish_cost.  */
+  machine_mode m_suggested_epilogue_mode;
+
   /* True if finish_cost has been called.  */
   bool m_finished;
 };
@@ -1704,6 +1709,7 @@ vector_costs::vector_costs (vec_info *vinfo, bool 
costing_for_scalar)
     m_costing_for_scalar (costing_for_scalar),
     m_costs (),
     m_suggested_unroll_factor(1),
+    m_suggested_epilogue_mode(VOIDmode),
     m_finished (false)
 {
 }
@@ -1761,6 +1767,15 @@ vector_costs::suggested_unroll_factor () const
   return m_suggested_unroll_factor;
 }
 
+/* Return the suggested epilogue mode.  */
+
+inline machine_mode
+vector_costs::suggested_epilogue_mode () const
+{
+  gcc_checking_assert (m_finished);
+  return m_suggested_epilogue_mode;
+}
+
 #define VECT_MAX_COST 1000
 
 /* The maximum number of intermediate steps required in multi-step type
-- 
2.43.0

Reply via email to