> -----Original Message-----
> From: Christopher Bazley <[email protected]>
> Sent: 19 December 2025 15:09
> To: [email protected]
> Cc: [email protected]; Tamar Christina
> <[email protected]>
> Subject: [PATCH v8 03/10] Update constant creation for BB SLP with
> predicated tails
> 
> Created a new function, gimple_build_vector_from_elems,
> for use when creating vectorized definitions for basic block
> vectorization in vect_create_constant_vectors.
> 
> The existing gimple_build_vector function cannot be used
> for SVE vector types because it relies on the type
> associated with the tree_vector_builder having a constant
> number of subparts. Even if that limitation were lifted, the
> possibility of tree_vector_builder patterns being used is
> inappropriate.
> 
> The new function takes a vector type and vec of tree nodes
> giving the element values to put into the built vector, instead of an
> instance of tree_vector_builder. If the number of values is zero then
> a zero constant is built. If all values are constant then a vector
> constant is built. Otherwise, a new constructor node is created.
> 
> gcc/ChangeLog:
> 
>       * gimple-fold.cc (gimple_build_vector_from_elems): Define a
>       new function to build a vector from a list of elements that need
>       not be complete.
>       * gimple-fold.h (gimple_build_vector_from_elems): Declare a new
>       function and a simpler overloaded version with fewer parameters.
>       * tree-vect-slp.cc (vect_create_constant_vectors):
>       For basic block SLP vectorization, pad each constant to the
>       minimum vector length (which is the same as the actual vector
>       length, except in the case of variable-length vector types).
>       Use gimple_build_vector_from_elems instead of
>       duplicate_and_interleave to create non-uniform constant
>       vectors for BB SLP vectorization.
> 
> ---
>  gcc/gimple-fold.cc   | 54
> ++++++++++++++++++++++++++++++++++++++++++++
>  gcc/gimple-fold.h    | 14 ++++++++++++
>  gcc/tree-vect-slp.cc | 42 +++++++++++++++++++++++++++-------
>  3 files changed, 102 insertions(+), 8 deletions(-)
> 
> diff --git a/gcc/gimple-fold.cc b/gcc/gimple-fold.cc
> index 55423189191..45baf891c40 100644
> --- a/gcc/gimple-fold.cc
> +++ b/gcc/gimple-fold.cc
> @@ -11365,6 +11365,60 @@ gimple_build_vector (gimple_stmt_iterator
> *gsi,
>    return builder->build ();
>  }
> 
> +/* Build a vector of type VECTYPE from a partial list of ELTS, handling the 
> case
> +   in which some elements are non-constant, and padding the tail with zeros
> +   if the list of values may be shorter than the number of subparts implied 
> by
> +   VECTYPE (including when the vector type is variable-length).
> +
> +   Return a gimple value for the result, inserting any new instructions
> +   to GSI honoring BEFORE and UPDATE.  */
> +
> +tree
> +gimple_build_vector_from_elems (gimple_stmt_iterator *gsi, bool before,
> +                             gsi_iterator_update update, location_t loc,
> +                             tree vectype, const vec<tree> &elts)
> +{
> +  unsigned int encoded_nelts = elts.length ();
> +  gimple_seq seq = NULL;
> +  gcc_assert (TREE_CODE (vectype) == VECTOR_TYPE);
> +  unsigned int lower_bound
> +    = constant_lower_bound (TYPE_VECTOR_SUBPARTS (vectype));
> +  gcc_assert (encoded_nelts <= lower_bound);
> +
> +  if (encoded_nelts == 0)
> +    return build_zero_cst (vectype);
> +
> +  /* Prepare a vector of constructor elements and find out whether all
> +     of the element values are constant.  */
> +  vec<constructor_elt, va_gc> *v;
> +  vec_alloc (v, encoded_nelts);
> +  bool is_constant = true;
> +
> +  for (unsigned int i = 0; i < encoded_nelts; ++i)
> +    {
> +      if (!CONSTANT_CLASS_P (elts[i]))
> +     is_constant = false;
> +
> +      CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, elts[i]);
> +    }
> +
> +  /* If all element values are constant then we can return a new VECTOR_CST
> +     node.  Any elements for which no value is supplied will be zero.  */
> +  if (is_constant)
> +    return build_vector_from_ctor (vectype, v);
> +
> +  tree res;
> +  if (gimple_in_ssa_p (cfun))
> +    res = make_ssa_name (vectype);
> +  else
> +    res = create_tmp_reg (vectype);
> +  gimple *stmt = gimple_build_assign (res, build_constructor (vectype, v));
> +  gimple_set_location (stmt, loc);
> +  gimple_seq_add_stmt_without_update (&seq, stmt);
> +  gimple_build_insert_seq (gsi, before, update, seq);
> +  return res;
> +}
> +
>  /* Emit gimple statements into &stmts that take a value given in OLD_SIZE
>     and generate a value guaranteed to be rounded upwards to ALIGN.
> 
> diff --git a/gcc/gimple-fold.h b/gcc/gimple-fold.h
> index 7244941722d..358c269aacc 100644
> --- a/gcc/gimple-fold.h
> +++ b/gcc/gimple-fold.h
> @@ -243,6 +243,20 @@ gimple_build_vector (gimple_seq *seq,
> tree_vector_builder *builder)
>                             UNKNOWN_LOCATION, builder);
>  }
> 
> +extern tree gimple_build_vector_from_elems (gimple_stmt_iterator *, bool,
> +                                         enum gsi_iterator_update,
> +                                         location_t, tree vectype,
> +                                         const vec<tree> &);
> +
> +inline tree
> +gimple_build_vector_from_elems (gimple_seq *seq, tree vectype,
> +                             const vec<tree> &elts)
> +{
> +  gimple_stmt_iterator gsi = gsi_last (*seq);
> +  return gimple_build_vector_from_elems (&gsi, false,
> GSI_CONTINUE_LINKING,
> +                                      UNKNOWN_LOCATION, vectype,
> elts);
> +}
> +
>  extern tree gimple_build_round_up (gimple_stmt_iterator *, bool,
>                                  enum gsi_iterator_update,
>                                  location_t, tree, tree,
> diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
> index d541077c812..dae6b9cdaaf 100644
> --- a/gcc/tree-vect-slp.cc
> +++ b/gcc/tree-vect-slp.cc
> @@ -10796,7 +10796,7 @@ vect_create_constant_vectors (vec_info *vinfo,
> slp_tree op_node)
>    unsigned j, number_of_places_left_in_vector;
>    tree vector_type;
>    tree vop;
> -  int group_size = op_node->ops.length ();
> +  unsigned int group_size = op_node->ops.length ();
>    unsigned int vec_num, i;
>    unsigned number_of_copies = 1;
>    bool constant_p;
> @@ -10826,12 +10826,27 @@ vect_create_constant_vectors (vec_info
> *vinfo, slp_tree op_node)
>       (s1, s2, ..., s8).  We will create two vectors {s1, s2, s3, s4} and
>       {s5, s6, s7, s8}.  */
> 
> -  /* When using duplicate_and_interleave, we just need one element for
> -     each scalar statement.  */
> -  if (!TYPE_VECTOR_SUBPARTS (vector_type).is_constant (&nunits))
> -    nunits = group_size;
> +  unsigned int elt_count = group_size;
> +  if (is_a<bb_vec_info> (vinfo))
> +    {
> +      /* We don't use duplicate_and_interleave for basic block vectorization.
> +      We know that either the group size is exactly divisible by the vector
> +      length or it fits within a single vector, so all we need to do for
> +      VLA is to pad the constant to the minimum vector length.  */
> +      nunits = constant_lower_bound (TYPE_VECTOR_SUBPARTS
> (vector_type));
> +      gcc_checking_assert (multiple_p (group_size, nunits)
> +                        || known_le (group_size, nunits));
> +      elt_count = MAX (nunits, group_size);

I've been trying to read up on the discussion, but it's a lot so apologies if 
already
covered.  This isn't a blocker, but I wanted to point out that even for BB 
vectorization
duplicate and interleave can be useful. With e.g. {1, a, 1, a, 1, a}  we can 
avoid the
slow literal pool by interleaving {1, 1, ...} and {a, a, ...} both of which are 
cheap to make.

But this is not compatible with the requirement that the other lanes be 0.  I'd 
had gone
with the other lanes being undefined, as the use of them needs to be done 
predicted
anyway.  This makes it easier to create new vectors. With BB SLP we don't have 
iterations
to amortize the cost of invariants.

So not a blocker, but something we might want to consider for GCC 17.

Thanks,
Tamar

> +    }
> +  else
> +    {
> +      /* When using duplicate_and_interleave, we just need one element for
> +      each scalar statement.  */
> +      if (!TYPE_VECTOR_SUBPARTS (vector_type).is_constant (&nunits))
> +     nunits = group_size;
> +    }
> 
> -  number_of_copies = nunits * number_of_vectors / group_size;
> +  number_of_copies = nunits * number_of_vectors / elt_count;
> 
>    number_of_places_left_in_vector = nunits;
>    constant_p = true;
> @@ -10841,9 +10856,15 @@ vect_create_constant_vectors (vec_info *vinfo,
> slp_tree op_node)
>    stmt_vec_info insert_after = NULL;
>    for (j = 0; j < number_of_copies; j++)
>      {
> -      tree op;
> -      for (i = group_size - 1; op_node->ops.iterate (i, &op); i--)
> +      for (i = elt_count; i-- > 0;)
>          {
> +       tree op;
> +       if (i < group_size)
> +         op = op_node->ops[i];
> +       else
> +         /* Pad with zeros.  */
> +         op = build_zero_cst (TREE_TYPE (vector_type));
> +
>            /* Create 'vect_ = {op0,op1,...,opn}'.  */
>         tree orig_op = op;
>         if (number_of_places_left_in_vector == nunits)
> @@ -10929,6 +10950,11 @@ vect_create_constant_vectors (vec_info *vinfo,
> slp_tree op_node)
>                      ? multiple_p (type_nunits, nunits)
>                      : known_eq (type_nunits, nunits))
>               vec_cst = gimple_build_vector (&ctor_seq, &elts);
> +           else if (is_a<bb_vec_info> (vinfo))
> +             {
> +               vec_cst = gimple_build_vector_from_elems (&ctor_seq,
> +                                                         elts.type (), elts);
> +             }
>             else
>               {
>                 if (permute_results.is_empty ())
> --
> 2.43.0

Reply via email to