> -----Original Message----- > From: Richard Biener <rguent...@suse.de> > Sent: Monday, November 11, 2024 12:17 PM > To: gcc-patches@gcc.gnu.org > Cc: Richard Sandiford <richard.sandif...@arm.com>; Tamar Christina > <tamar.christ...@arm.com> > Subject: [PATCH 1/2] Add suggested_epilogue_mode to vector costs > > The following enables targets to suggest the vector mode to be used > preferably for the epilogue of a vectorized loop. The patch also > enables more than one vectorized epilogue in case the target suggests > a vector mode for the epilogue of a vector epilogue. > > Bootstrapped and tested on x86_64-unknown-linux-gnu. > > As you can see without target cost modeling you cannot get more > than one vectorized epilogue (I've dropped the --param > vect-epilogues-nomask=N approach for now). In case the target > prefers a specific epilogue mode but that turns out to not > vectorize we continue iterating, searching for an alternative mode. > > Any objections?
No objections, but one question: > > Thanks, > Richard. > > * tree-vectorizer.h (vector_costs::suggested_epilogue_mode): New. > (vector_costs::m_suggested_epilogue_mode): Likewise. > (vector_costs::vector_costs): Initialize m_suggested_epilogue_mode. > * tree-vect-loop.cc (vect_analyze_loop): Honor the target > suggested prefered epilogue mode and support vector epilogues > of vector epilogues if requested. > --- > gcc/tree-vect-loop.cc | 124 ++++++++++++++++++++++++++---------------- > gcc/tree-vectorizer.h | 15 +++++ > 2 files changed, 92 insertions(+), 47 deletions(-) > > diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc > index 3f2095da449..32d84cb8663 100644 > --- a/gcc/tree-vect-loop.cc > +++ b/gcc/tree-vect-loop.cc > @@ -3714,72 +3714,102 @@ vect_analyze_loop (class loop *loop, gimple > *loop_vectorized_call, > array may contain length-agnostic and length-specific modes. Their > ordering is not guaranteed, so we could end up picking a mode for the > main > loop that is after the epilogue's optimal mode. */ > - vector_modes[0] = autodetected_vector_mode; > + if (!unlimited_cost_model (loop) > + && first_loop_vinfo->vector_costs->suggested_epilogue_mode () != > VOIDmode) > + { > + vector_modes[0] > + = first_loop_vinfo->vector_costs->suggested_epilogue_mode (); > + cached_vf_per_mode[0] = 0; > + } > + else > + vector_modes[0] = autodetected_vector_mode; > mode_i = 0; > > bool supports_partial_vectors = > partial_vectors_supported_p () && param_vect_partial_vector_usage != 0; > poly_uint64 first_vinfo_vf = LOOP_VINFO_VECT_FACTOR (first_loop_vinfo); > > - while (1) > + loop_vec_info orig_loop_vinfo = first_loop_vinfo; > + do > { > - /* If the target does not support partial vectors we can shorten the > - number of modes to analyze for the epilogue as we know we can't pick a > - mode that would lead to a VF at least as big as the > - FIRST_VINFO_VF. */ > - if (!supports_partial_vectors > - && maybe_ge (cached_vf_per_mode[mode_i], first_vinfo_vf)) > + while (1) > { > - mode_i++; > - if (mode_i == vector_modes.length ()) > - break; > - continue; > - } > - > - if (dump_enabled_p ()) > - dump_printf_loc (MSG_NOTE, vect_location, > - "***** Re-trying epilogue analysis with vector " > - "mode %s\n", GET_MODE_NAME > (vector_modes[mode_i])); > - > - bool fatal; > - opt_loop_vec_info loop_vinfo > - = vect_analyze_loop_1 (loop, shared, &loop_form_info, > - first_loop_vinfo, > - vector_modes, mode_i, > - autodetected_vector_mode, fatal); > - if (fatal) > - break; > - > - if (loop_vinfo) > - { > - if (pick_lowest_cost_p > - && first_loop_vinfo->epilogue_vinfo > - && vect_joust_loop_vinfos (loop_vinfo, > - first_loop_vinfo->epilogue_vinfo)) > + /* If the target does not support partial vectors we can shorten the > + number of modes to analyze for the epilogue as we know we can't > + pick a mode that would lead to a VF at least as big as the > + FIRST_VINFO_VF. */ > + if (!supports_partial_vectors > + && maybe_ge (cached_vf_per_mode[mode_i], first_vinfo_vf)) > { > - gcc_assert (vect_epilogues); > - delete first_loop_vinfo->epilogue_vinfo; > - first_loop_vinfo->epilogue_vinfo = nullptr; > + mode_i++; > + if (mode_i == vector_modes.length ()) > + break; > + continue; > } > - if (!first_loop_vinfo->epilogue_vinfo) > - first_loop_vinfo->epilogue_vinfo = loop_vinfo; > - else > + > + if (dump_enabled_p ()) > + dump_printf_loc (MSG_NOTE, vect_location, > + "***** Re-trying epilogue analysis with vector " > + "mode %s\n", GET_MODE_NAME > (vector_modes[mode_i])); > + > + bool fatal; > + opt_loop_vec_info loop_vinfo > + = vect_analyze_loop_1 (loop, shared, &loop_form_info, > + orig_loop_vinfo, > + vector_modes, mode_i, > + autodetected_vector_mode, fatal); > + if (fatal) > + break; > + > + if (loop_vinfo) > { > - delete loop_vinfo; > - loop_vinfo = opt_loop_vec_info::success (NULL); > + if (pick_lowest_cost_p > + && orig_loop_vinfo->epilogue_vinfo > + && vect_joust_loop_vinfos (loop_vinfo, > + orig_loop_vinfo->epilogue_vinfo)) > + { > + gcc_assert (vect_epilogues); > + delete orig_loop_vinfo->epilogue_vinfo; > + orig_loop_vinfo->epilogue_vinfo = nullptr; > + } > + if (!orig_loop_vinfo->epilogue_vinfo) > + orig_loop_vinfo->epilogue_vinfo = loop_vinfo; > + else > + { > + delete loop_vinfo; > + loop_vinfo = opt_loop_vec_info::success (NULL); > + } > + > + /* For now only allow one epilogue loop, but allow > + pick_lowest_cost_p to replace it, so commit to the > + first epilogue if we have no reason to try alternatives. */ > + if (!pick_lowest_cost_p) > + break; > } > > - /* For now only allow one epilogue loop, but allow > - pick_lowest_cost_p to replace it, so commit to the > - first epilogue if we have no reason to try alternatives. */ > - if (!pick_lowest_cost_p) > + if (mode_i == vector_modes.length ()) > break; > } I'm having a hard time following the exit condition for the inner loop. I'm wondering if it's not an infinite loop when supports_partial_vectors && pick_lowest_cost_p as mode_i won't be moved then. Did I miss something here? Thanks, Tamar > > - if (mode_i == vector_modes.length ()) > + orig_loop_vinfo = orig_loop_vinfo->epilogue_vinfo; > + if (!orig_loop_vinfo) > break; > > + /* When we selected a first vectorized epilogue, see if the target > + suggests to have another one. */ > + if (!unlimited_cost_model (loop) > + && (orig_loop_vinfo->vector_costs->suggested_epilogue_mode () > + != VOIDmode)) > + { > + vector_modes[0] > + = orig_loop_vinfo->vector_costs->suggested_epilogue_mode (); > + cached_vf_per_mode[0] = 0; > + mode_i = 0; > + } > + else > + break; > } > + while (1); > > if (first_loop_vinfo->epilogue_vinfo) > { > diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h > index 88db34b87b6..273e8c644e7 100644 > --- a/gcc/tree-vectorizer.h > +++ b/gcc/tree-vectorizer.h > @@ -1669,6 +1669,7 @@ public: > unsigned int outside_cost () const; > unsigned int total_cost () const; > unsigned int suggested_unroll_factor () const; > + machine_mode suggested_epilogue_mode () const; > > protected: > unsigned int record_stmt_cost (stmt_vec_info, vect_cost_model_location, > @@ -1691,6 +1692,10 @@ protected: > /* The suggested unrolling factor determined at finish_cost. */ > unsigned int m_suggested_unroll_factor; > > + /* The suggested mode to be used for a vectorized epilogue or VOIDmode, > + determined at finish_cost. */ > + machine_mode m_suggested_epilogue_mode; > + > /* True if finish_cost has been called. */ > bool m_finished; > }; > @@ -1704,6 +1709,7 @@ vector_costs::vector_costs (vec_info *vinfo, bool > costing_for_scalar) > m_costing_for_scalar (costing_for_scalar), > m_costs (), > m_suggested_unroll_factor(1), > + m_suggested_epilogue_mode(VOIDmode), > m_finished (false) > { > } > @@ -1761,6 +1767,15 @@ vector_costs::suggested_unroll_factor () const > return m_suggested_unroll_factor; > } > > +/* Return the suggested epilogue mode. */ > + > +inline machine_mode > +vector_costs::suggested_epilogue_mode () const > +{ > + gcc_checking_assert (m_finished); > + return m_suggested_epilogue_mode; > +} > + > #define VECT_MAX_COST 1000 > > /* The maximum number of intermediate steps required in multi-step type > -- > 2.43.0