On Wed, 13 Nov 2024, Tamar Christina wrote: > > -----Original Message----- > > From: Richard Biener <rguent...@suse.de> > > Sent: Monday, November 11, 2024 12:17 PM > > To: gcc-patches@gcc.gnu.org > > Cc: Richard Sandiford <richard.sandif...@arm.com>; Tamar Christina > > <tamar.christ...@arm.com> > > Subject: [PATCH 1/2] Add suggested_epilogue_mode to vector costs > > > > The following enables targets to suggest the vector mode to be used > > preferably for the epilogue of a vectorized loop. The patch also > > enables more than one vectorized epilogue in case the target suggests > > a vector mode for the epilogue of a vector epilogue. > > > > Bootstrapped and tested on x86_64-unknown-linux-gnu. > > > > As you can see without target cost modeling you cannot get more > > than one vectorized epilogue (I've dropped the --param > > vect-epilogues-nomask=N approach for now). In case the target > > prefers a specific epilogue mode but that turns out to not > > vectorize we continue iterating, searching for an alternative mode. > > > > Any objections? > > No objections, but one question: > > > > > Thanks, > > Richard. > > > > * tree-vectorizer.h (vector_costs::suggested_epilogue_mode): New. > > (vector_costs::m_suggested_epilogue_mode): Likewise. > > (vector_costs::vector_costs): Initialize m_suggested_epilogue_mode. > > * tree-vect-loop.cc (vect_analyze_loop): Honor the target > > suggested prefered epilogue mode and support vector epilogues > > of vector epilogues if requested. > > --- > > gcc/tree-vect-loop.cc | 124 ++++++++++++++++++++++++++---------------- > > gcc/tree-vectorizer.h | 15 +++++ > > 2 files changed, 92 insertions(+), 47 deletions(-) > > > > diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc > > index 3f2095da449..32d84cb8663 100644 > > --- a/gcc/tree-vect-loop.cc > > +++ b/gcc/tree-vect-loop.cc > > @@ -3714,72 +3714,102 @@ vect_analyze_loop (class loop *loop, gimple > > *loop_vectorized_call, > > array may contain length-agnostic and length-specific modes. Their > > ordering is not guaranteed, so we could end up picking a mode for the > > main > > loop that is after the epilogue's optimal mode. */ > > - vector_modes[0] = autodetected_vector_mode; > > + if (!unlimited_cost_model (loop) > > + && first_loop_vinfo->vector_costs->suggested_epilogue_mode () != > > VOIDmode) > > + { > > + vector_modes[0] > > + = first_loop_vinfo->vector_costs->suggested_epilogue_mode (); > > + cached_vf_per_mode[0] = 0; > > + } > > + else > > + vector_modes[0] = autodetected_vector_mode; > > mode_i = 0; > > > > bool supports_partial_vectors = > > partial_vectors_supported_p () && param_vect_partial_vector_usage != 0; > > poly_uint64 first_vinfo_vf = LOOP_VINFO_VECT_FACTOR (first_loop_vinfo); > > > > - while (1) > > + loop_vec_info orig_loop_vinfo = first_loop_vinfo; > > + do > > { > > - /* If the target does not support partial vectors we can shorten the > > - number of modes to analyze for the epilogue as we know we can't pick a > > - mode that would lead to a VF at least as big as the > > - FIRST_VINFO_VF. */ > > - if (!supports_partial_vectors > > - && maybe_ge (cached_vf_per_mode[mode_i], first_vinfo_vf)) > > + while (1) > > { > > - mode_i++; > > - if (mode_i == vector_modes.length ()) > > - break; > > - continue; > > - } > > - > > - if (dump_enabled_p ()) > > - dump_printf_loc (MSG_NOTE, vect_location, > > - "***** Re-trying epilogue analysis with vector " > > - "mode %s\n", GET_MODE_NAME > > (vector_modes[mode_i])); > > - > > - bool fatal; > > - opt_loop_vec_info loop_vinfo > > - = vect_analyze_loop_1 (loop, shared, &loop_form_info, > > - first_loop_vinfo, > > - vector_modes, mode_i, > > - autodetected_vector_mode, fatal); > > - if (fatal) > > - break; > > - > > - if (loop_vinfo) > > - { > > - if (pick_lowest_cost_p > > - && first_loop_vinfo->epilogue_vinfo > > - && vect_joust_loop_vinfos (loop_vinfo, > > - first_loop_vinfo->epilogue_vinfo)) > > + /* If the target does not support partial vectors we can shorten the > > + number of modes to analyze for the epilogue as we know we can't > > + pick a mode that would lead to a VF at least as big as the > > + FIRST_VINFO_VF. */ > > + if (!supports_partial_vectors > > + && maybe_ge (cached_vf_per_mode[mode_i], first_vinfo_vf)) > > { > > - gcc_assert (vect_epilogues); > > - delete first_loop_vinfo->epilogue_vinfo; > > - first_loop_vinfo->epilogue_vinfo = nullptr; > > + mode_i++; > > + if (mode_i == vector_modes.length ()) > > + break; > > + continue; > > } > > - if (!first_loop_vinfo->epilogue_vinfo) > > - first_loop_vinfo->epilogue_vinfo = loop_vinfo; > > - else > > + > > + if (dump_enabled_p ()) > > + dump_printf_loc (MSG_NOTE, vect_location, > > + "***** Re-trying epilogue analysis with vector " > > + "mode %s\n", GET_MODE_NAME > > (vector_modes[mode_i])); > > + > > + bool fatal; > > + opt_loop_vec_info loop_vinfo > > + = vect_analyze_loop_1 (loop, shared, &loop_form_info, > > + orig_loop_vinfo, > > + vector_modes, mode_i, > > + autodetected_vector_mode, fatal); > > + if (fatal) > > + break; > > + > > + if (loop_vinfo) > > { > > - delete loop_vinfo; > > - loop_vinfo = opt_loop_vec_info::success (NULL); > > + if (pick_lowest_cost_p > > + && orig_loop_vinfo->epilogue_vinfo > > + && vect_joust_loop_vinfos (loop_vinfo, > > + orig_loop_vinfo->epilogue_vinfo)) > > + { > > + gcc_assert (vect_epilogues); > > + delete orig_loop_vinfo->epilogue_vinfo; > > + orig_loop_vinfo->epilogue_vinfo = nullptr; > > + } > > + if (!orig_loop_vinfo->epilogue_vinfo) > > + orig_loop_vinfo->epilogue_vinfo = loop_vinfo; > > + else > > + { > > + delete loop_vinfo; > > + loop_vinfo = opt_loop_vec_info::success (NULL); > > + } > > + > > + /* For now only allow one epilogue loop, but allow > > + pick_lowest_cost_p to replace it, so commit to the > > + first epilogue if we have no reason to try alternatives. */ > > + if (!pick_lowest_cost_p) > > + break; > > } > > > > - /* For now only allow one epilogue loop, but allow > > - pick_lowest_cost_p to replace it, so commit to the > > - first epilogue if we have no reason to try alternatives. */ > > - if (!pick_lowest_cost_p) > > + if (mode_i == vector_modes.length ()) > > break; > > } > > I'm having a hard time following the exit condition for the inner loop. > I'm wondering if it's not an infinite loop when supports_partial_vectors && > pick_lowest_cost_p > as mode_i won't be moved then. > > Did I miss something here?
If the target continues to ask for more epilogues and we succeed in analyzing more epilogues then yes. But I think we refuse to generate an epilogue for a loop using partial vectors and if not using partial vectors you'd get smaller and smaller modes (also enfoced by analysis). So in practice even with the target asking I think we should be fine. Richard. > Thanks, > Tamar > > > > > - if (mode_i == vector_modes.length ()) > > + orig_loop_vinfo = orig_loop_vinfo->epilogue_vinfo; > > + if (!orig_loop_vinfo) > > break; > > > > + /* When we selected a first vectorized epilogue, see if the target > > + suggests to have another one. */ > > + if (!unlimited_cost_model (loop) > > + && (orig_loop_vinfo->vector_costs->suggested_epilogue_mode () > > + != VOIDmode)) > > + { > > + vector_modes[0] > > + = orig_loop_vinfo->vector_costs->suggested_epilogue_mode (); > > + cached_vf_per_mode[0] = 0; > > + mode_i = 0; > > + } > > + else > > + break; > > } > > + while (1); > > > > if (first_loop_vinfo->epilogue_vinfo) > > { > > diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h > > index 88db34b87b6..273e8c644e7 100644 > > --- a/gcc/tree-vectorizer.h > > +++ b/gcc/tree-vectorizer.h > > @@ -1669,6 +1669,7 @@ public: > > unsigned int outside_cost () const; > > unsigned int total_cost () const; > > unsigned int suggested_unroll_factor () const; > > + machine_mode suggested_epilogue_mode () const; > > > > protected: > > unsigned int record_stmt_cost (stmt_vec_info, vect_cost_model_location, > > @@ -1691,6 +1692,10 @@ protected: > > /* The suggested unrolling factor determined at finish_cost. */ > > unsigned int m_suggested_unroll_factor; > > > > + /* The suggested mode to be used for a vectorized epilogue or VOIDmode, > > + determined at finish_cost. */ > > + machine_mode m_suggested_epilogue_mode; > > + > > /* True if finish_cost has been called. */ > > bool m_finished; > > }; > > @@ -1704,6 +1709,7 @@ vector_costs::vector_costs (vec_info *vinfo, bool > > costing_for_scalar) > > m_costing_for_scalar (costing_for_scalar), > > m_costs (), > > m_suggested_unroll_factor(1), > > + m_suggested_epilogue_mode(VOIDmode), > > m_finished (false) > > { > > } > > @@ -1761,6 +1767,15 @@ vector_costs::suggested_unroll_factor () const > > return m_suggested_unroll_factor; > > } > > > > +/* Return the suggested epilogue mode. */ > > + > > +inline machine_mode > > +vector_costs::suggested_epilogue_mode () const > > +{ > > + gcc_checking_assert (m_finished); > > + return m_suggested_epilogue_mode; > > +} > > + > > #define VECT_MAX_COST 1000 > > > > /* The maximum number of intermediate steps required in multi-step type > > -- > > 2.43.0 > > -- Richard Biener <rguent...@suse.de> SUSE Software Solutions Germany GmbH, Frankenstrasse 146, 90461 Nuernberg, Germany; GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)