vect_create_constant_vectors is updated to pad with zeros
between the end of a group and the end of a vector of the type
chosen for the SLP node, when used for BB SLP. This function
calls gimple_build_vector, which also has to be updated for
SVE vector types (by using the lower bound as the number of
elements, e.g., 16 for VNx16QI).
---
 gcc/gimple-fold.cc   |  2 +-
 gcc/tree-vect-slp.cc | 43 +++++++++++++++++++++++++++++++++++--------
 2 files changed, 36 insertions(+), 9 deletions(-)

diff --git a/gcc/gimple-fold.cc b/gcc/gimple-fold.cc
index edcc04adc08..e5fe0ea12a7 100644
--- a/gcc/gimple-fold.cc
+++ b/gcc/gimple-fold.cc
@@ -11275,7 +11275,7 @@ gimple_build_vector (gimple_stmt_iterator *gsi,
       {
        gimple_seq seq = NULL;
        tree type = builder->type ();
-       unsigned int nelts = TYPE_VECTOR_SUBPARTS (type).to_constant ();
+       unsigned int nelts = constant_lower_bound (TYPE_VECTOR_SUBPARTS (type));
        vec<constructor_elt, va_gc> *v;
        vec_alloc (v, nelts);
        for (i = 0; i < nelts; ++i)
diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index 8b66751a8a9..b0c4c05a447 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -10628,7 +10628,7 @@ vect_create_constant_vectors (vec_info *vinfo, slp_tree 
op_node)
   unsigned j, number_of_places_left_in_vector;
   tree vector_type;
   tree vop;
-  int group_size = op_node->ops.length ();
+  unsigned int group_size = op_node->ops.length ();
   unsigned int vec_num, i;
   unsigned number_of_copies = 1;
   bool constant_p;
@@ -10639,6 +10639,11 @@ vect_create_constant_vectors (vec_info *vinfo, 
slp_tree op_node)
   vector_type = SLP_TREE_VECTYPE (op_node);
 
   unsigned int number_of_vectors = vect_get_num_copies (vinfo, op_node);
+  if (dump_enabled_p ())
+    dump_printf_loc (
+      MSG_NOTE, vect_location,
+      "Allocating %u constant and loop invariant defs in node %p\n",
+      number_of_vectors, (void *) op_node);
   SLP_TREE_VEC_DEFS (op_node).create (number_of_vectors);
   auto_vec<tree> voprnds (number_of_vectors);
 
@@ -10658,12 +10663,24 @@ vect_create_constant_vectors (vec_info *vinfo, 
slp_tree op_node)
      (s1, s2, ..., s8).  We will create two vectors {s1, s2, s3, s4} and
      {s5, s6, s7, s8}.  */
 
-  /* When using duplicate_and_interleave, we just need one element for
-     each scalar statement.  */
-  if (!TYPE_VECTOR_SUBPARTS (vector_type).is_constant (&nunits))
-    nunits = group_size;
+  unsigned int elt_count = group_size;
+  if (is_a<bb_vec_info> (vinfo))
+    {
+      /* We don't use duplicate_and_interleave for basic block vectorization.
+       We know that the group size fits within a single vector, so all we need
+       to do for VLA is to pad the constant to the minimum vector length.  */
+      nunits = constant_lower_bound (TYPE_VECTOR_SUBPARTS (vector_type));
+      elt_count = MAX (nunits, group_size);
+    }
+  else
+    {
+      /* When using duplicate_and_interleave, we just need one element for
+        each scalar statement.  */
+      if (!TYPE_VECTOR_SUBPARTS (vector_type).is_constant (&nunits))
+       nunits = group_size;
+    }
 
-  number_of_copies = nunits * number_of_vectors / group_size;
+  number_of_copies = nunits * number_of_vectors / elt_count;
 
   number_of_places_left_in_vector = nunits;
   constant_p = true;
@@ -10673,9 +10690,15 @@ vect_create_constant_vectors (vec_info *vinfo, 
slp_tree op_node)
   stmt_vec_info insert_after = NULL;
   for (j = 0; j < number_of_copies; j++)
     {
-      tree op;
-      for (i = group_size - 1; op_node->ops.iterate (i, &op); i--)
+      for (i = elt_count; i-- > 0;)
         {
+         tree op;
+         if (i < group_size)
+           op = op_node->ops[i];
+         else
+           /* Pad with zeros.  */
+           op = build_zero_cst (TREE_TYPE (vector_type));
+
           /* Create 'vect_ = {op0,op1,...,opn}'.  */
          tree orig_op = op;
          if (number_of_places_left_in_vector == nunits)
@@ -10761,6 +10784,10 @@ vect_create_constant_vectors (vec_info *vinfo, 
slp_tree op_node)
                       ? multiple_p (type_nunits, nunits)
                       : known_eq (type_nunits, nunits))
                vec_cst = gimple_build_vector (&ctor_seq, &elts);
+             else if (is_a<bb_vec_info> (vinfo))
+               {
+                 vec_cst = gimple_build_vector (&ctor_seq, &elts);
+               }
              else
                {
                  if (permute_results.is_empty ())
-- 
2.43.0

Reply via email to