On Tue, 28 Oct 2025, Christopher Bazley wrote:
> vect_create_constant_vectors is updated to pad with zeros
> between the end of a group and the end of a vector of the type
> chosen for the SLP node, when used for BB SLP. This function
> calls gimple_build_vector, which also has to be updated for
> SVE vector types (by using the lower bound as the number of
> elements, e.g., 16 for VNx16QI).
> ---
> gcc/gimple-fold.cc | 2 +-
> gcc/tree-vect-slp.cc | 43 +++++++++++++++++++++++++++++++++++--------
> 2 files changed, 36 insertions(+), 9 deletions(-)
>
> diff --git a/gcc/gimple-fold.cc b/gcc/gimple-fold.cc
> index edcc04adc08..e5fe0ea12a7 100644
> --- a/gcc/gimple-fold.cc
> +++ b/gcc/gimple-fold.cc
> @@ -11275,7 +11275,7 @@ gimple_build_vector (gimple_stmt_iterator *gsi,
> {
> gimple_seq seq = NULL;
> tree type = builder->type ();
> - unsigned int nelts = TYPE_VECTOR_SUBPARTS (type).to_constant ();
> + unsigned int nelts = constant_lower_bound (TYPE_VECTOR_SUBPARTS (type));
I don't think this is desirable in a generic helper? How does
the 'builder' class deal with the all-constant case? It seems
handling for constant vs. non-constant will now differ semantically
(instead of ICEing in one case previously).
> vec<constructor_elt, va_gc> *v;
> vec_alloc (v, nelts);
> for (i = 0; i < nelts; ++i)
> diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
> index 8b66751a8a9..b0c4c05a447 100644
> --- a/gcc/tree-vect-slp.cc
> +++ b/gcc/tree-vect-slp.cc
> @@ -10628,7 +10628,7 @@ vect_create_constant_vectors (vec_info *vinfo,
> slp_tree op_node)
> unsigned j, number_of_places_left_in_vector;
> tree vector_type;
> tree vop;
> - int group_size = op_node->ops.length ();
> + unsigned int group_size = op_node->ops.length ();
> unsigned int vec_num, i;
> unsigned number_of_copies = 1;
> bool constant_p;
> @@ -10639,6 +10639,11 @@ vect_create_constant_vectors (vec_info *vinfo,
> slp_tree op_node)
> vector_type = SLP_TREE_VECTYPE (op_node);
>
> unsigned int number_of_vectors = vect_get_num_copies (vinfo, op_node);
> + if (dump_enabled_p ())
> + dump_printf_loc (
> + MSG_NOTE, vect_location,
> + "Allocating %u constant and loop invariant defs in node %p\n",
> + number_of_vectors, (void *) op_node);
Please avoid sprinkling new debug stuff everywhere (you can keep
that separate in your tree).
> SLP_TREE_VEC_DEFS (op_node).create (number_of_vectors);
> auto_vec<tree> voprnds (number_of_vectors);
>
> @@ -10658,12 +10663,24 @@ vect_create_constant_vectors (vec_info *vinfo,
> slp_tree op_node)
> (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
> {s5, s6, s7, s8}. */
>
> - /* When using duplicate_and_interleave, we just need one element for
> - each scalar statement. */
> - if (!TYPE_VECTOR_SUBPARTS (vector_type).is_constant (&nunits))
> - nunits = group_size;
> + unsigned int elt_count = group_size;
> + if (is_a<bb_vec_info> (vinfo))
> + {
> + /* We don't use duplicate_and_interleave for basic block vectorization.
> + We know that the group size fits within a single vector, so all we need
> + to do for VLA is to pad the constant to the minimum vector length. */
It would be much easier if the backend would let us use the fixed
(minimum) size SVE vectors, no? Because that's what we are actually
enforcing anyway and those modes exist? (they do for RVV for sure)
> + nunits = constant_lower_bound (TYPE_VECTOR_SUBPARTS (vector_type));
> + elt_count = MAX (nunits, group_size);
> + }
> + else
> + {
> + /* When using duplicate_and_interleave, we just need one element for
> + each scalar statement. */
> + if (!TYPE_VECTOR_SUBPARTS (vector_type).is_constant (&nunits))
> + nunits = group_size;
> + }
>
> - number_of_copies = nunits * number_of_vectors / group_size;
> + number_of_copies = nunits * number_of_vectors / elt_count;
>
> number_of_places_left_in_vector = nunits;
> constant_p = true;
> @@ -10673,9 +10690,15 @@ vect_create_constant_vectors (vec_info *vinfo,
> slp_tree op_node)
> stmt_vec_info insert_after = NULL;
> for (j = 0; j < number_of_copies; j++)
> {
> - tree op;
> - for (i = group_size - 1; op_node->ops.iterate (i, &op); i--)
> + for (i = elt_count; i-- > 0;)
> {
> + tree op;
> + if (i < group_size)
> + op = op_node->ops[i];
> + else
> + /* Pad with zeros. */
> + op = build_zero_cst (TREE_TYPE (vector_type));
> +
> /* Create 'vect_ = {op0,op1,...,opn}'. */
> tree orig_op = op;
> if (number_of_places_left_in_vector == nunits)
> @@ -10761,6 +10784,10 @@ vect_create_constant_vectors (vec_info *vinfo,
> slp_tree op_node)
> ? multiple_p (type_nunits, nunits)
> : known_eq (type_nunits, nunits))
> vec_cst = gimple_build_vector (&ctor_seq, &elts);
> + else if (is_a<bb_vec_info> (vinfo))
> + {
> + vec_cst = gimple_build_vector (&ctor_seq, &elts);
> + }
> else
> {
> if (permute_results.is_empty ())
>
--
Richard Biener <[email protected]>
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)