> -----Original Message-----
> From: Richard Biener <rguent...@suse.de>
> Sent: Monday, November 11, 2024 12:17 PM
> To: gcc-patches@gcc.gnu.org
> Cc: Richard Sandiford <richard.sandif...@arm.com>; Tamar Christina
> <tamar.christ...@arm.com>
> Subject: [PATCH 1/2] Add suggested_epilogue_mode to vector costs
> 
> The following enables targets to suggest the vector mode to be used
> preferably for the epilogue of a vectorized loop.  The patch also
> enables more than one vectorized epilogue in case the target suggests
> a vector mode for the epilogue of a vector epilogue.
> 
> Bootstrapped and tested on x86_64-unknown-linux-gnu.
> 
> As you can see without target cost modeling you cannot get more
> than one vectorized epilogue (I've dropped the --param
> vect-epilogues-nomask=N approach for now).  In case the target
> prefers a specific epilogue mode but that turns out to not
> vectorize we continue iterating, searching for an alternative mode.
> 
> Any objections?

No objections, but one question:

> 
> Thanks,
> Richard.
> 
>       * tree-vectorizer.h (vector_costs::suggested_epilogue_mode): New.
>       (vector_costs::m_suggested_epilogue_mode): Likewise.
>       (vector_costs::vector_costs): Initialize m_suggested_epilogue_mode.
>       * tree-vect-loop.cc (vect_analyze_loop): Honor the target
>       suggested prefered epilogue mode and support vector epilogues
>       of vector epilogues if requested.
> ---
>  gcc/tree-vect-loop.cc | 124 ++++++++++++++++++++++++++----------------
>  gcc/tree-vectorizer.h |  15 +++++
>  2 files changed, 92 insertions(+), 47 deletions(-)
> 
> diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
> index 3f2095da449..32d84cb8663 100644
> --- a/gcc/tree-vect-loop.cc
> +++ b/gcc/tree-vect-loop.cc
> @@ -3714,72 +3714,102 @@ vect_analyze_loop (class loop *loop, gimple
> *loop_vectorized_call,
>       array may contain length-agnostic and length-specific modes.  Their
>       ordering is not guaranteed, so we could end up picking a mode for the 
> main
>       loop that is after the epilogue's optimal mode.  */
> -  vector_modes[0] = autodetected_vector_mode;
> +  if (!unlimited_cost_model (loop)
> +      && first_loop_vinfo->vector_costs->suggested_epilogue_mode () !=
> VOIDmode)
> +    {
> +      vector_modes[0]
> +     = first_loop_vinfo->vector_costs->suggested_epilogue_mode ();
> +      cached_vf_per_mode[0] = 0;
> +    }
> +  else
> +    vector_modes[0] = autodetected_vector_mode;
>    mode_i = 0;
> 
>    bool supports_partial_vectors =
>      partial_vectors_supported_p () && param_vect_partial_vector_usage != 0;
>    poly_uint64 first_vinfo_vf = LOOP_VINFO_VECT_FACTOR (first_loop_vinfo);
> 
> -  while (1)
> +  loop_vec_info orig_loop_vinfo = first_loop_vinfo;
> +  do
>      {
> -      /* If the target does not support partial vectors we can shorten the
> -      number of modes to analyze for the epilogue as we know we can't pick a
> -      mode that would lead to a VF at least as big as the
> -      FIRST_VINFO_VF.  */
> -      if (!supports_partial_vectors
> -       && maybe_ge (cached_vf_per_mode[mode_i], first_vinfo_vf))
> +      while (1)
>       {
> -       mode_i++;
> -       if (mode_i == vector_modes.length ())
> -         break;
> -       continue;
> -     }
> -
> -      if (dump_enabled_p ())
> -     dump_printf_loc (MSG_NOTE, vect_location,
> -                      "***** Re-trying epilogue analysis with vector "
> -                      "mode %s\n", GET_MODE_NAME
> (vector_modes[mode_i]));
> -
> -      bool fatal;
> -      opt_loop_vec_info loop_vinfo
> -     = vect_analyze_loop_1 (loop, shared, &loop_form_info,
> -                            first_loop_vinfo,
> -                            vector_modes, mode_i,
> -                            autodetected_vector_mode, fatal);
> -      if (fatal)
> -     break;
> -
> -      if (loop_vinfo)
> -     {
> -       if (pick_lowest_cost_p
> -           && first_loop_vinfo->epilogue_vinfo
> -           && vect_joust_loop_vinfos (loop_vinfo,
> -                                      first_loop_vinfo->epilogue_vinfo))
> +       /* If the target does not support partial vectors we can shorten the
> +          number of modes to analyze for the epilogue as we know we can't
> +          pick a mode that would lead to a VF at least as big as the
> +          FIRST_VINFO_VF.  */
> +       if (!supports_partial_vectors
> +           && maybe_ge (cached_vf_per_mode[mode_i], first_vinfo_vf))
>           {
> -           gcc_assert (vect_epilogues);
> -           delete first_loop_vinfo->epilogue_vinfo;
> -           first_loop_vinfo->epilogue_vinfo = nullptr;
> +           mode_i++;
> +           if (mode_i == vector_modes.length ())
> +             break;
> +           continue;
>           }
> -       if (!first_loop_vinfo->epilogue_vinfo)
> -         first_loop_vinfo->epilogue_vinfo = loop_vinfo;
> -       else
> +
> +       if (dump_enabled_p ())
> +         dump_printf_loc (MSG_NOTE, vect_location,
> +                          "***** Re-trying epilogue analysis with vector "
> +                          "mode %s\n", GET_MODE_NAME
> (vector_modes[mode_i]));
> +
> +       bool fatal;
> +       opt_loop_vec_info loop_vinfo
> +         = vect_analyze_loop_1 (loop, shared, &loop_form_info,
> +                                orig_loop_vinfo,
> +                                vector_modes, mode_i,
> +                                autodetected_vector_mode, fatal);
> +       if (fatal)
> +         break;
> +
> +       if (loop_vinfo)
>           {
> -           delete loop_vinfo;
> -           loop_vinfo = opt_loop_vec_info::success (NULL);
> +           if (pick_lowest_cost_p
> +               && orig_loop_vinfo->epilogue_vinfo
> +               && vect_joust_loop_vinfos (loop_vinfo,
> +                                          orig_loop_vinfo->epilogue_vinfo))
> +             {
> +               gcc_assert (vect_epilogues);
> +               delete orig_loop_vinfo->epilogue_vinfo;
> +               orig_loop_vinfo->epilogue_vinfo = nullptr;
> +             }
> +           if (!orig_loop_vinfo->epilogue_vinfo)
> +             orig_loop_vinfo->epilogue_vinfo = loop_vinfo;
> +           else
> +             {
> +               delete loop_vinfo;
> +               loop_vinfo = opt_loop_vec_info::success (NULL);
> +             }
> +
> +           /* For now only allow one epilogue loop, but allow
> +              pick_lowest_cost_p to replace it, so commit to the
> +              first epilogue if we have no reason to try alternatives.  */
> +           if (!pick_lowest_cost_p)
> +             break;
>           }
> 
> -       /* For now only allow one epilogue loop, but allow
> -          pick_lowest_cost_p to replace it, so commit to the
> -          first epilogue if we have no reason to try alternatives.  */
> -       if (!pick_lowest_cost_p)
> +       if (mode_i == vector_modes.length ())
>           break;
>       }

I'm having a hard time following the exit condition for the inner loop.
I'm wondering if it's not an infinite loop when supports_partial_vectors && 
pick_lowest_cost_p
as mode_i won't be moved then.

Did I miss something here?

Thanks,
Tamar

> 
> -      if (mode_i == vector_modes.length ())
> +      orig_loop_vinfo = orig_loop_vinfo->epilogue_vinfo;
> +      if (!orig_loop_vinfo)
>       break;
> 
> +      /* When we selected a first vectorized epilogue, see if the target
> +      suggests to have another one.  */
> +      if (!unlimited_cost_model (loop)
> +       && (orig_loop_vinfo->vector_costs->suggested_epilogue_mode ()
> +           != VOIDmode))
> +     {
> +       vector_modes[0]
> +         = orig_loop_vinfo->vector_costs->suggested_epilogue_mode ();
> +       cached_vf_per_mode[0] = 0;
> +       mode_i = 0;
> +     }
> +      else
> +     break;
>      }
> +  while (1);
> 
>    if (first_loop_vinfo->epilogue_vinfo)
>      {
> diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
> index 88db34b87b6..273e8c644e7 100644
> --- a/gcc/tree-vectorizer.h
> +++ b/gcc/tree-vectorizer.h
> @@ -1669,6 +1669,7 @@ public:
>    unsigned int outside_cost () const;
>    unsigned int total_cost () const;
>    unsigned int suggested_unroll_factor () const;
> +  machine_mode suggested_epilogue_mode () const;
> 
>  protected:
>    unsigned int record_stmt_cost (stmt_vec_info, vect_cost_model_location,
> @@ -1691,6 +1692,10 @@ protected:
>    /* The suggested unrolling factor determined at finish_cost.  */
>    unsigned int m_suggested_unroll_factor;
> 
> +  /* The suggested mode to be used for a vectorized epilogue or VOIDmode,
> +     determined at finish_cost.  */
> +  machine_mode m_suggested_epilogue_mode;
> +
>    /* True if finish_cost has been called.  */
>    bool m_finished;
>  };
> @@ -1704,6 +1709,7 @@ vector_costs::vector_costs (vec_info *vinfo, bool
> costing_for_scalar)
>      m_costing_for_scalar (costing_for_scalar),
>      m_costs (),
>      m_suggested_unroll_factor(1),
> +    m_suggested_epilogue_mode(VOIDmode),
>      m_finished (false)
>  {
>  }
> @@ -1761,6 +1767,15 @@ vector_costs::suggested_unroll_factor () const
>    return m_suggested_unroll_factor;
>  }
> 
> +/* Return the suggested epilogue mode.  */
> +
> +inline machine_mode
> +vector_costs::suggested_epilogue_mode () const
> +{
> +  gcc_checking_assert (m_finished);
> +  return m_suggested_epilogue_mode;
> +}
> +
>  #define VECT_MAX_COST 1000
> 
>  /* The maximum number of intermediate steps required in multi-step type
> --
> 2.43.0

Reply via email to