On Wed, 13 Nov 2024, Tamar Christina wrote:

> > -----Original Message-----
> > From: Richard Biener <rguent...@suse.de>
> > Sent: Monday, November 11, 2024 12:17 PM
> > To: gcc-patches@gcc.gnu.org
> > Cc: Richard Sandiford <richard.sandif...@arm.com>; Tamar Christina
> > <tamar.christ...@arm.com>
> > Subject: [PATCH 1/2] Add suggested_epilogue_mode to vector costs
> > 
> > The following enables targets to suggest the vector mode to be used
> > preferably for the epilogue of a vectorized loop.  The patch also
> > enables more than one vectorized epilogue in case the target suggests
> > a vector mode for the epilogue of a vector epilogue.
> > 
> > Bootstrapped and tested on x86_64-unknown-linux-gnu.
> > 
> > As you can see without target cost modeling you cannot get more
> > than one vectorized epilogue (I've dropped the --param
> > vect-epilogues-nomask=N approach for now).  In case the target
> > prefers a specific epilogue mode but that turns out to not
> > vectorize we continue iterating, searching for an alternative mode.
> > 
> > Any objections?
> 
> No objections, but one question:
> 
> > 
> > Thanks,
> > Richard.
> > 
> >     * tree-vectorizer.h (vector_costs::suggested_epilogue_mode): New.
> >     (vector_costs::m_suggested_epilogue_mode): Likewise.
> >     (vector_costs::vector_costs): Initialize m_suggested_epilogue_mode.
> >     * tree-vect-loop.cc (vect_analyze_loop): Honor the target
> >     suggested prefered epilogue mode and support vector epilogues
> >     of vector epilogues if requested.
> > ---
> >  gcc/tree-vect-loop.cc | 124 ++++++++++++++++++++++++++----------------
> >  gcc/tree-vectorizer.h |  15 +++++
> >  2 files changed, 92 insertions(+), 47 deletions(-)
> > 
> > diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
> > index 3f2095da449..32d84cb8663 100644
> > --- a/gcc/tree-vect-loop.cc
> > +++ b/gcc/tree-vect-loop.cc
> > @@ -3714,72 +3714,102 @@ vect_analyze_loop (class loop *loop, gimple
> > *loop_vectorized_call,
> >       array may contain length-agnostic and length-specific modes.  Their
> >       ordering is not guaranteed, so we could end up picking a mode for the 
> > main
> >       loop that is after the epilogue's optimal mode.  */
> > -  vector_modes[0] = autodetected_vector_mode;
> > +  if (!unlimited_cost_model (loop)
> > +      && first_loop_vinfo->vector_costs->suggested_epilogue_mode () !=
> > VOIDmode)
> > +    {
> > +      vector_modes[0]
> > +   = first_loop_vinfo->vector_costs->suggested_epilogue_mode ();
> > +      cached_vf_per_mode[0] = 0;
> > +    }
> > +  else
> > +    vector_modes[0] = autodetected_vector_mode;
> >    mode_i = 0;
> > 
> >    bool supports_partial_vectors =
> >      partial_vectors_supported_p () && param_vect_partial_vector_usage != 0;
> >    poly_uint64 first_vinfo_vf = LOOP_VINFO_VECT_FACTOR (first_loop_vinfo);
> > 
> > -  while (1)
> > +  loop_vec_info orig_loop_vinfo = first_loop_vinfo;
> > +  do
> >      {
> > -      /* If the target does not support partial vectors we can shorten the
> > -    number of modes to analyze for the epilogue as we know we can't pick a
> > -    mode that would lead to a VF at least as big as the
> > -    FIRST_VINFO_VF.  */
> > -      if (!supports_partial_vectors
> > -     && maybe_ge (cached_vf_per_mode[mode_i], first_vinfo_vf))
> > +      while (1)
> >     {
> > -     mode_i++;
> > -     if (mode_i == vector_modes.length ())
> > -       break;
> > -     continue;
> > -   }
> > -
> > -      if (dump_enabled_p ())
> > -   dump_printf_loc (MSG_NOTE, vect_location,
> > -                    "***** Re-trying epilogue analysis with vector "
> > -                    "mode %s\n", GET_MODE_NAME
> > (vector_modes[mode_i]));
> > -
> > -      bool fatal;
> > -      opt_loop_vec_info loop_vinfo
> > -   = vect_analyze_loop_1 (loop, shared, &loop_form_info,
> > -                          first_loop_vinfo,
> > -                          vector_modes, mode_i,
> > -                          autodetected_vector_mode, fatal);
> > -      if (fatal)
> > -   break;
> > -
> > -      if (loop_vinfo)
> > -   {
> > -     if (pick_lowest_cost_p
> > -         && first_loop_vinfo->epilogue_vinfo
> > -         && vect_joust_loop_vinfos (loop_vinfo,
> > -                                    first_loop_vinfo->epilogue_vinfo))
> > +     /* If the target does not support partial vectors we can shorten the
> > +        number of modes to analyze for the epilogue as we know we can't
> > +        pick a mode that would lead to a VF at least as big as the
> > +        FIRST_VINFO_VF.  */
> > +     if (!supports_partial_vectors
> > +         && maybe_ge (cached_vf_per_mode[mode_i], first_vinfo_vf))
> >         {
> > -         gcc_assert (vect_epilogues);
> > -         delete first_loop_vinfo->epilogue_vinfo;
> > -         first_loop_vinfo->epilogue_vinfo = nullptr;
> > +         mode_i++;
> > +         if (mode_i == vector_modes.length ())
> > +           break;
> > +         continue;
> >         }
> > -     if (!first_loop_vinfo->epilogue_vinfo)
> > -       first_loop_vinfo->epilogue_vinfo = loop_vinfo;
> > -     else
> > +
> > +     if (dump_enabled_p ())
> > +       dump_printf_loc (MSG_NOTE, vect_location,
> > +                        "***** Re-trying epilogue analysis with vector "
> > +                        "mode %s\n", GET_MODE_NAME
> > (vector_modes[mode_i]));
> > +
> > +     bool fatal;
> > +     opt_loop_vec_info loop_vinfo
> > +       = vect_analyze_loop_1 (loop, shared, &loop_form_info,
> > +                              orig_loop_vinfo,
> > +                              vector_modes, mode_i,
> > +                              autodetected_vector_mode, fatal);
> > +     if (fatal)
> > +       break;
> > +
> > +     if (loop_vinfo)
> >         {
> > -         delete loop_vinfo;
> > -         loop_vinfo = opt_loop_vec_info::success (NULL);
> > +         if (pick_lowest_cost_p
> > +             && orig_loop_vinfo->epilogue_vinfo
> > +             && vect_joust_loop_vinfos (loop_vinfo,
> > +                                        orig_loop_vinfo->epilogue_vinfo))
> > +           {
> > +             gcc_assert (vect_epilogues);
> > +             delete orig_loop_vinfo->epilogue_vinfo;
> > +             orig_loop_vinfo->epilogue_vinfo = nullptr;
> > +           }
> > +         if (!orig_loop_vinfo->epilogue_vinfo)
> > +           orig_loop_vinfo->epilogue_vinfo = loop_vinfo;
> > +         else
> > +           {
> > +             delete loop_vinfo;
> > +             loop_vinfo = opt_loop_vec_info::success (NULL);
> > +           }
> > +
> > +         /* For now only allow one epilogue loop, but allow
> > +            pick_lowest_cost_p to replace it, so commit to the
> > +            first epilogue if we have no reason to try alternatives.  */
> > +         if (!pick_lowest_cost_p)
> > +           break;
> >         }
> > 
> > -     /* For now only allow one epilogue loop, but allow
> > -        pick_lowest_cost_p to replace it, so commit to the
> > -        first epilogue if we have no reason to try alternatives.  */
> > -     if (!pick_lowest_cost_p)
> > +     if (mode_i == vector_modes.length ())
> >         break;
> >     }
> 
> I'm having a hard time following the exit condition for the inner loop.
> I'm wondering if it's not an infinite loop when supports_partial_vectors && 
> pick_lowest_cost_p
> as mode_i won't be moved then.
> 
> Did I miss something here?

If the target continues to ask for more epilogues and we succeed in
analyzing more epilogues then yes.  But I think we refuse to generate
an epilogue for a loop using partial vectors and if not using partial
vectors you'd get smaller and smaller modes (also enfoced by analysis).

So in practice even with the target asking I think we should be fine.

Richard.

> Thanks,
> Tamar
> 
> > 
> > -      if (mode_i == vector_modes.length ())
> > +      orig_loop_vinfo = orig_loop_vinfo->epilogue_vinfo;
> > +      if (!orig_loop_vinfo)
> >     break;
> > 
> > +      /* When we selected a first vectorized epilogue, see if the target
> > +    suggests to have another one.  */
> > +      if (!unlimited_cost_model (loop)
> > +     && (orig_loop_vinfo->vector_costs->suggested_epilogue_mode ()
> > +         != VOIDmode))
> > +   {
> > +     vector_modes[0]
> > +       = orig_loop_vinfo->vector_costs->suggested_epilogue_mode ();
> > +     cached_vf_per_mode[0] = 0;
> > +     mode_i = 0;
> > +   }
> > +      else
> > +   break;
> >      }
> > +  while (1);
> > 
> >    if (first_loop_vinfo->epilogue_vinfo)
> >      {
> > diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
> > index 88db34b87b6..273e8c644e7 100644
> > --- a/gcc/tree-vectorizer.h
> > +++ b/gcc/tree-vectorizer.h
> > @@ -1669,6 +1669,7 @@ public:
> >    unsigned int outside_cost () const;
> >    unsigned int total_cost () const;
> >    unsigned int suggested_unroll_factor () const;
> > +  machine_mode suggested_epilogue_mode () const;
> > 
> >  protected:
> >    unsigned int record_stmt_cost (stmt_vec_info, vect_cost_model_location,
> > @@ -1691,6 +1692,10 @@ protected:
> >    /* The suggested unrolling factor determined at finish_cost.  */
> >    unsigned int m_suggested_unroll_factor;
> > 
> > +  /* The suggested mode to be used for a vectorized epilogue or VOIDmode,
> > +     determined at finish_cost.  */
> > +  machine_mode m_suggested_epilogue_mode;
> > +
> >    /* True if finish_cost has been called.  */
> >    bool m_finished;
> >  };
> > @@ -1704,6 +1709,7 @@ vector_costs::vector_costs (vec_info *vinfo, bool
> > costing_for_scalar)
> >      m_costing_for_scalar (costing_for_scalar),
> >      m_costs (),
> >      m_suggested_unroll_factor(1),
> > +    m_suggested_epilogue_mode(VOIDmode),
> >      m_finished (false)
> >  {
> >  }
> > @@ -1761,6 +1767,15 @@ vector_costs::suggested_unroll_factor () const
> >    return m_suggested_unroll_factor;
> >  }
> > 
> > +/* Return the suggested epilogue mode.  */
> > +
> > +inline machine_mode
> > +vector_costs::suggested_epilogue_mode () const
> > +{
> > +  gcc_checking_assert (m_finished);
> > +  return m_suggested_epilogue_mode;
> > +}
> > +
> >  #define VECT_MAX_COST 1000
> > 
> >  /* The maximum number of intermediate steps required in multi-step type
> > --
> > 2.43.0
> 
> 

-- 
Richard Biener <rguent...@suse.de>
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)

Reply via email to