On Fri, Dec 1, 2023 at 3:39 AM liuhongt <hongtao....@intel.com> wrote:
>
> > Hmm, I would suggest you put reg_needed into the class and accumulate
> > over all vec_construct, with your patch you pessimize a single v32qi
> > over two separate v16qi for example.  Also currently the whole block is
> > gated with INTEGRAL_TYPE_P but register pressure would be also
> > a concern for floating point vectors.  finish_cost would then apply an
> > adjustment.
>
> Changed.
>
> > 'target_avail_regs' is for GENERAL_REGS, does that include APX regs?
> > I don't see anything similar for FP regs, but I guess the target should know
> > or maybe there's a #regs in regclass query already.
> Haven't see any, use below setting.
>
> unsigned target_avail_sse = TARGET_64BIT ? (TARGET_AVX512F ? 32 : 16) : 8;
>
> Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
> No big impact on SPEC2017.
> Observe 1 big improvement from other benchmark by avoiding vectorization with
> vec_construct v32qi which caused lots of spills.
>
> Ok for trunk?

LGTM, let's see what x86 maintainers think.

Richard.

> For vec_contruct, the components must be live at the same time if
> they're not loaded from memory, when the number of those components
> exceeds available registers, spill happens. Try to account that with a
> rough estimation.
> ??? Ideally, we should have an overall estimation of register pressure
> if we know the live range of all variables.
>
> gcc/ChangeLog:
>
>         * config/i386/i386.cc (ix86_vector_costs::add_stmt_cost):
>         Count sse_reg/gpr_regs for components not loaded from memory.
>         (ix86_vector_costs:ix86_vector_costs): New constructor.
>         (ix86_vector_costs::m_num_gpr_needed[3]): New private memeber.
>         (ix86_vector_costs::m_num_sse_needed[3]): Ditto.
>         (ix86_vector_costs::finish_cost): Estimate overall register
>         pressure cost.
>         (ix86_vector_costs::ix86_vect_estimate_reg_pressure): New
>         function.
> ---
>  gcc/config/i386/i386.cc | 54 ++++++++++++++++++++++++++++++++++++++---
>  1 file changed, 50 insertions(+), 4 deletions(-)
>
> diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
> index 9390f525b99..dcaea6c2096 100644
> --- a/gcc/config/i386/i386.cc
> +++ b/gcc/config/i386/i386.cc
> @@ -24562,15 +24562,34 @@ ix86_noce_conversion_profitable_p (rtx_insn *seq, 
> struct noce_if_info *if_info)
>  /* x86-specific vector costs.  */
>  class ix86_vector_costs : public vector_costs
>  {
> -  using vector_costs::vector_costs;
> +public:
> +  ix86_vector_costs (vec_info *, bool);
>
>    unsigned int add_stmt_cost (int count, vect_cost_for_stmt kind,
>                               stmt_vec_info stmt_info, slp_tree node,
>                               tree vectype, int misalign,
>                               vect_cost_model_location where) override;
>    void finish_cost (const vector_costs *) override;
> +
> +private:
> +
> +  /* Estimate register pressure of the vectorized code.  */
> +  void ix86_vect_estimate_reg_pressure ();
> +  /* Number of GENERAL_REGS/SSE_REGS used in the vectorizer, it's used for
> +     estimation of register pressure.
> +     ??? Currently it's only used by vec_construct/scalar_to_vec
> +     where we know it's not loaded from memory.  */
> +  unsigned m_num_gpr_needed[3];
> +  unsigned m_num_sse_needed[3];
>  };
>
> +ix86_vector_costs::ix86_vector_costs (vec_info* vinfo, bool 
> costing_for_scalar)
> +  : vector_costs (vinfo, costing_for_scalar),
> +    m_num_gpr_needed (),
> +    m_num_sse_needed ()
> +{
> +}
> +
>  /* Implement targetm.vectorize.create_costs.  */
>
>  static vector_costs *
> @@ -24748,8 +24767,7 @@ ix86_vector_costs::add_stmt_cost (int count, 
> vect_cost_for_stmt kind,
>      }
>    else if ((kind == vec_construct || kind == scalar_to_vec)
>            && node
> -          && SLP_TREE_DEF_TYPE (node) == vect_external_def
> -          && INTEGRAL_TYPE_P (TREE_TYPE (vectype)))
> +          && SLP_TREE_DEF_TYPE (node) == vect_external_def)
>      {
>        stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
>        unsigned i;
> @@ -24785,7 +24803,15 @@ ix86_vector_costs::add_stmt_cost (int count, 
> vect_cost_for_stmt kind,
>                   && (gimple_assign_rhs_code (def) != BIT_FIELD_REF
>                       || !VECTOR_TYPE_P (TREE_TYPE
>                                 (TREE_OPERAND (gimple_assign_rhs1 (def), 
> 0))))))
> -           stmt_cost += ix86_cost->sse_to_integer;
> +           {
> +             if (fp)
> +               m_num_sse_needed[where]++;
> +             else
> +               {
> +                 m_num_gpr_needed[where]++;
> +                 stmt_cost += ix86_cost->sse_to_integer;
> +               }
> +           }
>         }
>        FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node), i, op)
>         if (TREE_CODE (op) == SSA_NAME)
> @@ -24821,6 +24847,24 @@ ix86_vector_costs::add_stmt_cost (int count, 
> vect_cost_for_stmt kind,
>    return retval;
>  }
>
> +void
> +ix86_vector_costs::ix86_vect_estimate_reg_pressure ()
> +{
> +  unsigned gpr_spill_cost = COSTS_N_INSNS (ix86_cost->int_store [2]) / 2;
> +  unsigned sse_spill_cost = COSTS_N_INSNS (ix86_cost->sse_store[0]) / 2;
> +
> +  /* Any better way to have target available fp registers, currently use 
> SSE_REGS.  */
> +  unsigned target_avail_sse = TARGET_64BIT ? (TARGET_AVX512F ? 32 : 16) : 8;
> +  for (unsigned i = 0; i != 3; i++)
> +    {
> +      if (m_num_gpr_needed[i] > target_avail_regs)
> +       m_costs[i] += gpr_spill_cost * (m_num_gpr_needed[i] - 
> target_avail_regs);
> +      /* Only measure sse registers pressure.  */
> +      if (TARGET_SSE && (m_num_sse_needed[i] > target_avail_sse))
> +       m_costs[i] += sse_spill_cost * (m_num_sse_needed[i] - 
> target_avail_sse);
> +    }
> +}
> +
>  void
>  ix86_vector_costs::finish_cost (const vector_costs *scalar_costs)
>  {
> @@ -24843,6 +24887,8 @@ ix86_vector_costs::finish_cost (const vector_costs 
> *scalar_costs)
>         m_costs[vect_body] = INT_MAX;
>      }
>
> +  ix86_vect_estimate_reg_pressure ();
> +
>    vector_costs::finish_cost (scalar_costs);
>  }
>
> --
> 2.31.1
>

Reply via email to