To decide whether to create a new SLP instance for BB SLP,
vect_analyze_slp_instance will need the minimum number of lanes
in the SLP tree, which must not be less than the group size
(otherwise "unrolling" is required). All usage of max_nunits
is therefore replaced with a new class that encapsulates
both minimum and maximum.
---
gcc/tree-vect-slp.cc | 172 ++++++++++++++++++++++--------------------
gcc/tree-vectorizer.h | 47 +++++++++++-
2 files changed, 137 insertions(+), 82 deletions(-)
diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index e02b3379bb4..3d78f91c93a 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -130,7 +130,7 @@ _slp_tree::_slp_tree ()
this->cycle_info.reduc_idx = -1;
SLP_TREE_REF_COUNT (this) = 1;
this->failed = NULL;
- this->max_nunits = 1;
+ this->nunits = {UINT64_MAX, 1};
this->lanes = 0;
SLP_TREE_TYPE (this) = undef_vec_info_type;
this->data = NULL;
@@ -1050,14 +1050,14 @@ compatible_calls_p (gcall *call1, gcall *call2, bool
allow_two_operators)
/* A subroutine of vect_build_slp_tree for checking VECTYPE, which is the
caller's attempt to find the vector type in STMT_INFO with the narrowest
element type. Return true if VECTYPE is nonnull and if it is valid
- for STMT_INFO. When returning true, update MAX_NUNITS to reflect the
- number of units in VECTYPE. GROUP_SIZE and MAX_NUNITS are as for
+ for STMT_INFO. When returning true, update NUNITS to reflect the
+ number of units in VECTYPE. GROUP_SIZE and NUNITS are as for
vect_build_slp_tree. */
static bool
-vect_record_max_nunits (vec_info *vinfo, stmt_vec_info stmt_info,
- unsigned int group_size,
- tree vectype, poly_uint64 *max_nunits)
+vect_record_nunits (vec_info *vinfo, stmt_vec_info stmt_info,
+ unsigned int group_size, tree vectype,
+ slp_tree_nunits *nunits)
{
if (!vectype)
{
@@ -1070,7 +1070,7 @@ vect_record_max_nunits (vec_info *vinfo, stmt_vec_info
stmt_info,
}
/* If populating the vector type requires unrolling then fail
- before adjusting *max_nunits for basic-block vectorization. */
+ before adjusting *nunits for basic-block vectorization. */
if (is_a <bb_vec_info> (vinfo)
&& !multiple_p (group_size, TYPE_VECTOR_SUBPARTS (vectype)))
{
@@ -1083,7 +1083,7 @@ vect_record_max_nunits (vec_info *vinfo, stmt_vec_info
stmt_info,
}
/* In case of multiple types we need to detect the smallest type. */
- vect_update_max_nunits (max_nunits, vectype);
+ vect_update_nunits (nunits, vectype);
return true;
}
@@ -1104,7 +1104,7 @@ vect_record_max_nunits (vec_info *vinfo, stmt_vec_info
stmt_info,
static bool
vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
vec<stmt_vec_info> stmts, unsigned int group_size,
- poly_uint64 *max_nunits, bool *matches,
+ slp_tree_nunits *nunits, bool *matches,
bool *two_operators, tree *node_vectype)
{
unsigned int i;
@@ -1144,8 +1144,8 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char
*swap,
as if nunits was not an issue. This allows splitting of groups
to happen. */
if (nunits_vectype
- && !vect_record_max_nunits (vinfo, first_stmt_info, group_size,
- nunits_vectype, max_nunits))
+ && !vect_record_nunits (vinfo, first_stmt_info, group_size,
+ nunits_vectype, nunits))
{
gcc_assert (is_a <bb_vec_info> (vinfo));
maybe_soft_fail = true;
@@ -1809,14 +1809,14 @@ vect_slp_linearize_chain (vec_info *vinfo,
static slp_tree
vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node,
vec<stmt_vec_info> stmts, unsigned int group_size,
- poly_uint64 *max_nunits,
+ slp_tree_nunits *nunits,
bool *matches, unsigned *limit, unsigned *tree_size,
scalar_stmts_to_slp_tree_map_t *bst_map);
static slp_tree
vect_build_slp_tree (vec_info *vinfo,
vec<stmt_vec_info> stmts, unsigned int group_size,
- poly_uint64 *max_nunits,
+ slp_tree_nunits *nunits,
bool *matches, unsigned *limit, unsigned *tree_size,
scalar_stmts_to_slp_tree_map_t *bst_map)
{
@@ -1829,7 +1829,7 @@ vect_build_slp_tree (vec_info *vinfo,
if (!(*leader)->failed)
{
SLP_TREE_REF_COUNT (*leader)++;
- vect_update_max_nunits (max_nunits, (*leader)->max_nunits);
+ vect_update_nunits (nunits, (*leader)->nunits);
stmts.release ();
return *leader;
}
@@ -1863,9 +1863,9 @@ vect_build_slp_tree (vec_info *vinfo,
dump_printf_loc (MSG_NOTE, vect_location,
"starting SLP discovery for node %p\n", (void *) res);
- poly_uint64 this_max_nunits = 1;
+ slp_tree_nunits this_nunits{};
slp_tree res_ = vect_build_slp_tree_2 (vinfo, res, stmts, group_size,
- &this_max_nunits,
+ &this_nunits,
matches, limit, tree_size, bst_map);
if (!res_)
{
@@ -1894,8 +1894,8 @@ vect_build_slp_tree (vec_info *vinfo,
"SLP discovery for node %p succeeded\n",
(void *) res);
gcc_assert (res_ == res);
- res->max_nunits = this_max_nunits;
- vect_update_max_nunits (max_nunits, this_max_nunits);
+ res->nunits = this_nunits;
+ vect_update_nunits (nunits, this_nunits);
/* Keep a reference for the bst_map use. */
SLP_TREE_REF_COUNT (res)++;
}
@@ -1953,12 +1953,12 @@ vect_slp_build_two_operator_nodes (slp_tree perm, tree
vectype,
static slp_tree
vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node,
vec<stmt_vec_info> stmts, unsigned int group_size,
- poly_uint64 *max_nunits,
+ slp_tree_nunits *nunits,
bool *matches, unsigned *limit, unsigned *tree_size,
scalar_stmts_to_slp_tree_map_t *bst_map)
{
unsigned nops, i, this_tree_size = 0;
- poly_uint64 this_max_nunits = *max_nunits;
+ slp_tree_nunits this_nunits = *nunits;
matches[0] = false;
@@ -1984,8 +1984,7 @@ vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node,
tree scalar_type = TREE_TYPE (PHI_RESULT (stmt));
tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
group_size);
- if (!vect_record_max_nunits (vinfo, stmt_info, group_size, vectype,
- max_nunits))
+ if (!vect_record_nunits (vinfo, stmt_info, group_size, vectype, nunits))
return NULL;
vect_def_type def_type = STMT_VINFO_DEF_TYPE (stmt_info);
@@ -2038,7 +2037,7 @@ vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node,
unsigned char *swap = XALLOCAVEC (unsigned char, group_size);
tree vectype = NULL_TREE;
if (!vect_build_slp_tree_1 (vinfo, swap, stmts, group_size,
- &this_max_nunits, matches, &two_operators,
+ &this_nunits, matches, &two_operators,
&vectype))
return NULL;
@@ -2050,7 +2049,7 @@ vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node,
gcc_assert (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)));
else
{
- *max_nunits = this_max_nunits;
+ *nunits = this_nunits;
(*tree_size)++;
node = vect_create_new_slp_node (node, stmts, 0);
SLP_TREE_VECTYPE (node) = vectype;
@@ -2132,7 +2131,7 @@ vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node,
bool *matches2 = XALLOCAVEC (bool, dr_group_size);
slp_tree unperm_load
= vect_build_slp_tree (vinfo, stmts2, dr_group_size,
- &this_max_nunits, matches2, limit,
+ &this_nunits, matches2, limit,
&this_tree_size, bst_map);
/* When we are able to do the full masked load emit that
followed by 'node' being the desired final permutation. */
@@ -2435,7 +2434,7 @@ vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node,
else
op_stmts.quick_push (NULL);
child = vect_build_slp_tree (vinfo, op_stmts,
- group_size, &this_max_nunits,
+ group_size, &this_nunits,
matches, limit,
&this_tree_size, bst_map);
/* ??? We're likely getting too many fatal mismatches
@@ -2591,7 +2590,7 @@ vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node,
children[i] = child;
}
*tree_size += this_tree_size + 1;
- *max_nunits = this_max_nunits;
+ *nunits = this_nunits;
while (!chains.is_empty ())
chains.pop ().release ();
return node;
@@ -2870,7 +2869,7 @@ out:
def_stmts2.create (1);
def_stmts2.quick_push (oprnd_info->def_stmts[0]);
child = vect_build_slp_tree (vinfo, def_stmts2, 1,
- &this_max_nunits,
+ &this_nunits,
matches, limit,
&this_tree_size, bst_map);
if (child)
@@ -2888,7 +2887,7 @@ out:
.quick_push (std::make_pair (0u, 0u));
}
SLP_TREE_CHILDREN (pnode).quick_push (child);
- pnode->max_nunits = child->max_nunits;
+ pnode->nunits = child->nunits;
children.safe_push (pnode);
oprnd_info->def_stmts = vNULL;
continue;
@@ -2898,7 +2897,7 @@ out:
}
if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts,
- group_size, &this_max_nunits,
+ group_size, &this_nunits,
matches, limit,
&this_tree_size, bst_map)) != NULL)
{
@@ -2987,7 +2986,7 @@ out:
/* And try again with scratch 'matches' ... */
bool *tem = XALLOCAVEC (bool, group_size);
if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts,
- group_size, &this_max_nunits,
+ group_size, &this_nunits,
tem, limit,
&this_tree_size, bst_map)) != NULL)
{
@@ -3093,7 +3092,7 @@ fail:
}
*tree_size += this_tree_size + 1;
- *max_nunits = this_max_nunits;
+ *nunits = this_nunits;
if (two_operators)
{
@@ -3239,16 +3238,15 @@ vect_print_slp_tree (dump_flags_t dump_kind,
dump_location_t loc,
dump_metadata_t metadata (dump_kind, loc.get_impl_location ());
dump_user_location_t user_loc = loc.get_user_location ();
- dump_printf_loc (metadata, user_loc,
- "node%s %p (max_nunits=" HOST_WIDE_INT_PRINT_UNSIGNED
- ", refcnt=%u)",
- SLP_TREE_DEF_TYPE (node) == vect_external_def
- ? " (external)"
- : (SLP_TREE_DEF_TYPE (node) == vect_constant_def
- ? " (constant)"
- : ""), (void *) node,
- estimated_poly_value (node->max_nunits),
- SLP_TREE_REF_COUNT (node));
+ dump_printf_loc (
+ metadata, user_loc,
+ "node%s %p (nunits.min=" HOST_WIDE_INT_PRINT_UNSIGNED
+ ", nunits.max=" HOST_WIDE_INT_PRINT_UNSIGNED ", refcnt=%u)",
+ SLP_TREE_DEF_TYPE (node) == vect_external_def
+ ? " (external)"
+ : (SLP_TREE_DEF_TYPE (node) == vect_constant_def ? " (constant)" : ""),
+ (void *) node, estimated_poly_value (node->nunits.min),
+ estimated_poly_value (node->nunits.max), SLP_TREE_REF_COUNT (node));
if (SLP_TREE_VECTYPE (node))
dump_printf (metadata, " %T", SLP_TREE_VECTYPE (node));
dump_printf (metadata, "%s",
@@ -3615,9 +3613,9 @@ vect_split_slp_store_group (stmt_vec_info first_vinfo,
unsigned group1_size)
statements and a vector of NUNITS elements. */
static poly_uint64
-calculate_unrolling_factor (poly_uint64 nunits, unsigned int group_size)
+calculate_unrolling_factor (slp_tree_nunits nunits, unsigned int group_size)
{
- return exact_div (common_multiple (nunits, group_size), group_size);
+ return exact_div (common_multiple (nunits.max, group_size), group_size);
}
/* Helper that checks to see if a node is a load node. */
@@ -3679,9 +3677,9 @@ optimize_load_redistribution_1
(scalar_stmts_to_slp_tree_map_t *bst_map,
(void *) root);
bool *matches = XALLOCAVEC (bool, group_size);
- poly_uint64 max_nunits = 1;
+ slp_tree_nunits nunits{};
unsigned tree_size = 0, limit = 1;
- node = vect_build_slp_tree (vinfo, stmts, group_size, &max_nunits,
+ node = vect_build_slp_tree (vinfo, stmts, group_size, &nunits,
matches, &limit, &tree_size, bst_map);
if (!node)
stmts.release ();
@@ -3864,14 +3862,14 @@ vect_analyze_slp_instance (vec_info *vinfo,
static slp_tree
vect_build_slp_store_interleaving (vec<slp_tree> &rhs_nodes,
vec<stmt_vec_info> &scalar_stmts,
- poly_uint64 max_nunits)
+ slp_tree_nunits nunits)
{
unsigned int group_size = scalar_stmts.length ();
slp_tree node = vect_create_new_slp_node (scalar_stmts,
SLP_TREE_CHILDREN
(rhs_nodes[0]).length ());
SLP_TREE_VECTYPE (node) = SLP_TREE_VECTYPE (rhs_nodes[0]);
- node->max_nunits = max_nunits;
+ node->nunits = nunits;
for (unsigned l = 0;
l < SLP_TREE_CHILDREN (rhs_nodes[0]).length (); ++l)
{
@@ -3881,7 +3879,7 @@ vect_build_slp_store_interleaving (vec<slp_tree>
&rhs_nodes,
SLP_TREE_CHILDREN (node).quick_push (perm);
SLP_TREE_LANE_PERMUTATION (perm).create (group_size);
SLP_TREE_VECTYPE (perm) = SLP_TREE_VECTYPE (node);
- perm->max_nunits = max_nunits;
+ perm->nunits = nunits;
SLP_TREE_LANES (perm) = group_size;
/* ??? We should set this NULL but that's not expected. */
SLP_TREE_REPRESENTATIVE (perm)
@@ -3937,7 +3935,7 @@ vect_build_slp_store_interleaving (vec<slp_tree>
&rhs_nodes,
SLP_TREE_LANES (permab) = n;
SLP_TREE_LANE_PERMUTATION (permab).create (n);
SLP_TREE_VECTYPE (permab) = SLP_TREE_VECTYPE (perm);
- permab->max_nunits = max_nunits;
+ permab->nunits = nunits;
/* ??? Should be NULL but that's not expected. */
SLP_TREE_REPRESENTATIVE (permab) = SLP_TREE_REPRESENTATIVE (perm);
SLP_TREE_CHILDREN (permab).quick_push (a);
@@ -4008,7 +4006,7 @@ vect_build_slp_store_interleaving (vec<slp_tree>
&rhs_nodes,
SLP_TREE_LANES (permab) = n;
SLP_TREE_LANE_PERMUTATION (permab).create (n);
SLP_TREE_VECTYPE (permab) = SLP_TREE_VECTYPE (perm);
- permab->max_nunits = max_nunits;
+ permab->nunits = nunits;
/* ??? Should be NULL but that's not expected. */
SLP_TREE_REPRESENTATIVE (permab) = SLP_TREE_REPRESENTATIVE (perm);
SLP_TREE_CHILDREN (permab).quick_push (a);
@@ -4088,7 +4086,7 @@ vect_build_slp_instance (vec_info *vinfo,
/* Build the tree for the SLP instance. */
unsigned int group_size = scalar_stmts.length ();
bool *matches = XALLOCAVEC (bool, group_size);
- poly_uint64 max_nunits = 1;
+ slp_tree_nunits nunits{};
unsigned tree_size = 0;
slp_tree node = NULL;
@@ -4099,19 +4097,19 @@ vect_build_slp_instance (vec_info *vinfo,
}
else
node = vect_build_slp_tree (vinfo, scalar_stmts, group_size,
- &max_nunits, matches, limit,
+ &nunits, matches, limit,
&tree_size, bst_map);
if (node != NULL)
{
/* Calculate the unrolling factor based on the smallest type. */
poly_uint64 unrolling_factor
- = calculate_unrolling_factor (max_nunits, group_size);
+ = calculate_unrolling_factor (nunits, group_size);
if (maybe_ne (unrolling_factor, 1U)
&& is_a <bb_vec_info> (vinfo))
{
unsigned HOST_WIDE_INT const_max_nunits;
- if (!max_nunits.is_constant (&const_max_nunits)
+ if (!nunits.max.is_constant (&const_max_nunits)
|| const_max_nunits > group_size)
{
if (dump_enabled_p ())
@@ -4345,7 +4343,7 @@ vect_analyze_slp_reduc_chain (loop_vec_info vinfo,
unsigned int group_size = scalar_stmts.length ();
bool *matches = XALLOCAVEC (bool, group_size);
- poly_uint64 max_nunits = 1;
+ slp_tree_nunits max_nunits{};
unsigned tree_size = 0;
slp_tree node = vect_build_slp_tree (vinfo, scalar_stmts, group_size,
&max_nunits, matches, limit,
@@ -4488,7 +4486,7 @@ vect_analyze_slp_reduc_chain (loop_vec_info vinfo,
/* Build the tree for the SLP instance. */
unsigned int group_size = scalar_stmts.length ();
bool *matches = XALLOCAVEC (bool, group_size);
- poly_uint64 max_nunits = 1;
+ slp_tree_nunits nunits{};
unsigned tree_size = 0;
/* ??? We need this only for SLP discovery. */
@@ -4496,7 +4494,7 @@ vect_analyze_slp_reduc_chain (loop_vec_info vinfo,
REDUC_GROUP_FIRST_ELEMENT (scalar_stmts[i]) = scalar_stmts[0];
slp_tree node = vect_build_slp_tree (vinfo, scalar_stmts, group_size,
- &max_nunits, matches, limit,
+ &nunits, matches, limit,
&tree_size, bst_map);
for (unsigned i = 0; i < scalar_stmts.length (); ++i)
@@ -4638,11 +4636,11 @@ vect_analyze_slp_reduction (loop_vec_info vinfo,
/* Build the tree for the SLP instance. */
unsigned int group_size = scalar_stmts.length ();
bool *matches = XALLOCAVEC (bool, group_size);
- poly_uint64 max_nunits = 1;
+ slp_tree_nunits nunits{};
unsigned tree_size = 0;
slp_tree node = vect_build_slp_tree (vinfo, scalar_stmts, group_size,
- &max_nunits, matches, limit,
+ &nunits, matches, limit,
&tree_size, bst_map);
if (node != NULL)
{
@@ -4741,7 +4739,7 @@ vect_analyze_slp_instance (vec_info *vinfo,
/* Build the tree for the SLP instance. */
unsigned int group_size = scalar_stmts.length ();
bool *matches = XALLOCAVEC (bool, group_size);
- poly_uint64 max_nunits = 1;
+ slp_tree_nunits nunits{};
unsigned tree_size = 0;
unsigned i;
@@ -4753,26 +4751,40 @@ vect_analyze_slp_instance (vec_info *vinfo,
}
else
node = vect_build_slp_tree (vinfo, scalar_stmts, group_size,
- &max_nunits, matches, limit,
+ &nunits, matches, limit,
&tree_size, bst_map);
if (node != NULL)
{
/* Calculate the unrolling factor based on the smallest type. */
poly_uint64 unrolling_factor
- = calculate_unrolling_factor (max_nunits, group_size);
+ = calculate_unrolling_factor (nunits, group_size);
if (maybe_ne (unrolling_factor, 1U)
- && is_a <bb_vec_info> (vinfo))
+ && is_a<bb_vec_info> (vinfo)
+ && !known_ge (nunits.min, group_size))
{
unsigned HOST_WIDE_INT const_max_nunits;
- if (!max_nunits.is_constant (&const_max_nunits)
+ if (!nunits.max.is_constant (&const_max_nunits)
|| const_max_nunits > group_size)
{
if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "Build SLP failed: store group "
- "size not a multiple of the vector size "
- "in basic block SLP\n");
+ {
+ if (nunits.max.is_constant (&const_max_nunits))
+ dump_printf_loc (
+ MSG_MISSED_OPTIMIZATION, vect_location,
+ "Build SLP failed: store group "
+ "size %u not a multiple of the vector size "
+ "%wu in basic block SLP\n",
+ group_size, const_max_nunits);
+ else
+ dump_printf_loc (
+ MSG_MISSED_OPTIMIZATION, vect_location,
+ "Build SLP failed: store group "
+ "size %u not a multiple of the vector size "
+ "in basic block SLP\n",
+ group_size);
+ }
+
vect_free_slp_tree (node);
return false;
}
@@ -4929,7 +4941,7 @@ vect_analyze_slp_instance (vec_info *vinfo,
/* Analyze the stored values and pinch them together with
a permute node so we can preserve the whole store group. */
auto_vec<slp_tree> rhs_nodes;
- poly_uint64 max_nunits = 1;
+ slp_tree_nunits nunits{};
unsigned int rhs_common_nlanes = 0;
unsigned int start = 0, end = i;
@@ -4940,14 +4952,14 @@ vect_analyze_slp_instance (vec_info *vinfo,
substmts.create (end - start);
for (unsigned j = start; j < end; ++j)
substmts.quick_push (scalar_stmts[j]);
- max_nunits = 1;
+ nunits = {UINT64_MAX, 1};
node = vect_build_slp_tree (vinfo, substmts, end - start,
- &max_nunits,
+ &nunits,
matches, limit, &tree_size, bst_map);
if (node)
{
rhs_nodes.safe_push (node);
- vect_update_max_nunits (&max_nunits, node->max_nunits);
+ vect_update_nunits (&nunits, node->nunits);
if (start == 0)
rhs_common_nlanes = SLP_TREE_LANES (node);
else if (rhs_common_nlanes != SLP_TREE_LANES (node))
@@ -5011,7 +5023,7 @@ vect_analyze_slp_instance (vec_info *vinfo,
SLP_TREE_CHILDREN
(rhs_nodes[0]).length ());
SLP_TREE_VECTYPE (node) = SLP_TREE_VECTYPE (rhs_nodes[0]);
- node->max_nunits = max_nunits;
+ node->nunits = nunits;
node->ldst_lanes = true;
SLP_TREE_CHILDREN (node)
.reserve_exact (SLP_TREE_CHILDREN (rhs_nodes[0]).length ()
@@ -5029,7 +5041,7 @@ vect_analyze_slp_instance (vec_info *vinfo,
}
else
node = vect_build_slp_store_interleaving (rhs_nodes, scalar_stmts,
- max_nunits);
+ nunits);
while (!rhs_nodes.is_empty ())
vect_free_slp_tree (rhs_nodes.pop ());
@@ -5294,13 +5306,13 @@ vect_lower_load_permutations (loop_vec_info loop_vinfo,
}
for (unsigned i = 0; i < DR_GROUP_GAP (first); ++i)
stmts.quick_push (NULL);
- poly_uint64 max_nunits = 1;
+ slp_tree_nunits nunits{};
bool *matches = XALLOCAVEC (bool, group_lanes);
unsigned limit = 1;
unsigned tree_size = 0;
slp_tree l0 = vect_build_slp_tree (loop_vinfo, stmts,
group_lanes,
- &max_nunits, matches, &limit,
+ &nunits, matches, &limit,
&tree_size, bst_map);
gcc_assert (!SLP_TREE_LOAD_PERMUTATION (l0).exists ());
@@ -8151,7 +8163,7 @@ vect_update_slp_vf_for_node (slp_tree node, poly_uint64
&vf,
/* We do not visit SLP nodes for constants or externals - those neither
have a vector type set yet (vectorizable_* does this) nor do they
- have max_nunits set. Instead we rely on internal nodes max_nunit
+ have nunits set. Instead we rely on internal nodes max_nunit
to cover constant/external operands.
Note that when we stop using fixed size vectors externs and constants
shouldn't influence the (minimum) vectorization factor, instead
@@ -8159,7 +8171,7 @@ vect_update_slp_vf_for_node (slp_tree node, poly_uint64
&vf,
assign vector types to constants and externals and cause iteration
to a higher vectorization factor when required. */
poly_uint64 node_vf
- = calculate_unrolling_factor (node->max_nunits, SLP_TREE_LANES (node));
+ = calculate_unrolling_factor (node->nunits, SLP_TREE_LANES (node));
vf = force_common_multiple (vf, node_vf);
/* For permute nodes that are fed from externs or constants we have to
@@ -8169,7 +8181,7 @@ vect_update_slp_vf_for_node (slp_tree node, poly_uint64
&vf,
if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
{
poly_uint64 child_vf
- = calculate_unrolling_factor (node->max_nunits,
+ = calculate_unrolling_factor (node->nunits,
SLP_TREE_LANES (child));
vf = force_common_multiple (vf, child_vf);
}
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index 359c994139b..ecfdb7d88ef 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -254,6 +254,18 @@ typedef auto_vec<std::pair<unsigned, unsigned>, 16>
auto_lane_permutation_t;
typedef vec<unsigned> load_permutation_t;
typedef auto_vec<unsigned, 16> auto_load_permutation_t;
+struct slp_tree_nunits
+{
+ slp_tree_nunits () = default;
+
+ /* The minimum number of vector elements for the subtree rooted
+ at this node. */
+ poly_uint64 min = UINT64_MAX;
+ /* The maximum number of vector elements for the subtree rooted
+ at this node. */
+ poly_uint64 max = 1;
+};
+
struct vect_data {
virtual ~vect_data () = default;
};
@@ -336,9 +348,9 @@ struct _slp_tree {
/* Reference count in the SLP graph. */
unsigned int refcnt;
- /* The maximum number of vector elements for the subtree rooted
+ /* The minimum and maximum number of vector elements for the subtree rooted
at this node. */
- poly_uint64 max_nunits;
+ slp_tree_nunits nunits;
/* The DEF type of this node. */
enum vect_def_type def_type;
/* The number of scalar lanes produced by this node. */
@@ -2326,6 +2338,37 @@ vect_update_max_nunits (poly_uint64 *max_nunits, tree
vectype)
vect_update_max_nunits (max_nunits, TYPE_VECTOR_SUBPARTS (vectype));
}
+/* Update minimum and maximum unit count *NUNITS so that it accounts for
+ NEW_NUNITS. *NUNITS can be {MAX,1} if we haven't yet recorded anything.
+ If NEW_NUNITS is {MAX,1} then this function has no effect. */
+
+inline void
+vect_update_nunits (slp_tree_nunits *nunits, slp_tree_nunits new_nunits)
+{
+ vect_update_max_nunits (&nunits->max, new_nunits.max);
+
+ /* We also want to know whether each individual choice of vector type
+ requires no "unrolling", which requires the minimum number of units.
+ All unit counts have the form vec_info::vector_size * X for some
+ rational X, therefore we know the values are ordered. */
+ if (!known_eq (new_nunits.min, UINT64_MAX))
+ nunits->min = known_eq (nunits->min, UINT64_MAX)
+ ? new_nunits.min
+ : ordered_min (nunits->min, new_nunits.min);
+}
+
+/* Update maximum unit count *NUNITS so that it accounts for
+ the number of units in vector type VECTYPE. *NUNITS can be {MAX,1}
+ if we haven't yet recorded any vector types. */
+
+inline void
+vect_update_nunits (slp_tree_nunits *nunits, tree vectype)
+{
+ slp_tree_nunits new_nunits
+ = {TYPE_VECTOR_SUBPARTS (vectype), TYPE_VECTOR_SUBPARTS (vectype)};
+ vect_update_nunits (nunits, new_nunits);
+}
+
/* Return the vectorization factor that should be used for costing
purposes while vectorizing the loop described by LOOP_VINFO.
Pick a reasonable estimate if the vectorization factor isn't
--
2.43.0