To decide whether to create a new SLP instance for BB SLP,
vect_analyze_slp_instance will need the minimum number of lanes
in the SLP tree, which must not be less than the group size
(otherwise "unrolling" is required). All usage of max_nunits
is therefore replaced with a new class that encapsulates
both minimum and maximum.
---
gcc/tree-vect-slp.cc | 157 +++++++++++++++++++++---------------------
gcc/tree-vectorizer.h | 47 ++++++++++++-
2 files changed, 124 insertions(+), 80 deletions(-)
diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index aa6c3e2e041..5e6d09a099f 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -130,7 +130,7 @@ _slp_tree::_slp_tree ()
this->cycle_info.reduc_idx = -1;
SLP_TREE_REF_COUNT (this) = 1;
this->failed = NULL;
- this->max_nunits = 1;
+ this->nunits = {UINT64_MAX, 1};
this->lanes = 0;
SLP_TREE_TYPE (this) = undef_vec_info_type;
this->data = NULL;
@@ -1051,14 +1051,14 @@ compatible_calls_p (gcall *call1, gcall *call2, bool
allow_two_operators)
/* A subroutine of vect_build_slp_tree for checking VECTYPE, which is the
caller's attempt to find the vector type in STMT_INFO with the narrowest
element type. Return true if VECTYPE is nonnull and if it is valid
- for STMT_INFO. When returning true, update MAX_NUNITS to reflect the
- number of units in VECTYPE. GROUP_SIZE and MAX_NUNITS are as for
+ for STMT_INFO. When returning true, update NUNITS to reflect the
+ number of units in VECTYPE. GROUP_SIZE and NUNITS are as for
vect_build_slp_tree. */
static bool
-vect_record_max_nunits (vec_info *vinfo, stmt_vec_info stmt_info,
- unsigned int group_size,
- tree vectype, poly_uint64 *max_nunits)
+vect_record_nunits (vec_info *vinfo, stmt_vec_info stmt_info,
+ unsigned int group_size, tree vectype,
+ slp_tree_nunits *nunits)
{
if (!vectype)
{
@@ -1071,7 +1071,7 @@ vect_record_max_nunits (vec_info *vinfo, stmt_vec_info
stmt_info,
}
/* If populating the vector type requires unrolling then fail
- before adjusting *max_nunits for basic-block vectorization. */
+ before adjusting *nunits for basic-block vectorization. */
if (is_a <bb_vec_info> (vinfo)
&& !multiple_p (group_size, TYPE_VECTOR_SUBPARTS (vectype)))
{
@@ -1084,7 +1084,7 @@ vect_record_max_nunits (vec_info *vinfo, stmt_vec_info
stmt_info,
}
/* In case of multiple types we need to detect the smallest type. */
- vect_update_max_nunits (max_nunits, vectype);
+ vect_update_nunits (nunits, vectype);
return true;
}
@@ -1105,7 +1105,7 @@ vect_record_max_nunits (vec_info *vinfo, stmt_vec_info
stmt_info,
static bool
vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
vec<stmt_vec_info> stmts, unsigned int group_size,
- poly_uint64 *max_nunits, bool *matches,
+ slp_tree_nunits *nunits, bool *matches,
bool *two_operators, tree *node_vectype)
{
unsigned int i;
@@ -1145,8 +1145,8 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char
*swap,
as if nunits was not an issue. This allows splitting of groups
to happen. */
if (nunits_vectype
- && !vect_record_max_nunits (vinfo, first_stmt_info, group_size,
- nunits_vectype, max_nunits))
+ && !vect_record_nunits (vinfo, first_stmt_info, group_size,
+ nunits_vectype, nunits))
{
gcc_assert (is_a <bb_vec_info> (vinfo));
maybe_soft_fail = true;
@@ -1824,14 +1824,14 @@ vect_slp_linearize_chain (vec_info *vinfo,
static slp_tree
vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node,
vec<stmt_vec_info> stmts, unsigned int group_size,
- poly_uint64 *max_nunits,
+ slp_tree_nunits *nunits,
bool *matches, unsigned *limit, unsigned *tree_size,
scalar_stmts_to_slp_tree_map_t *bst_map);
static slp_tree
vect_build_slp_tree (vec_info *vinfo,
vec<stmt_vec_info> stmts, unsigned int group_size,
- poly_uint64 *max_nunits,
+ slp_tree_nunits *nunits,
bool *matches, unsigned *limit, unsigned *tree_size,
scalar_stmts_to_slp_tree_map_t *bst_map)
{
@@ -1844,7 +1844,7 @@ vect_build_slp_tree (vec_info *vinfo,
if (!(*leader)->failed)
{
SLP_TREE_REF_COUNT (*leader)++;
- vect_update_max_nunits (max_nunits, (*leader)->max_nunits);
+ vect_update_nunits (nunits, (*leader)->nunits);
stmts.release ();
return *leader;
}
@@ -1878,9 +1878,9 @@ vect_build_slp_tree (vec_info *vinfo,
dump_printf_loc (MSG_NOTE, vect_location,
"starting SLP discovery for node %p\n", (void *) res);
- poly_uint64 this_max_nunits = 1;
+ slp_tree_nunits this_nunits{};
slp_tree res_ = vect_build_slp_tree_2 (vinfo, res, stmts, group_size,
- &this_max_nunits,
+ &this_nunits,
matches, limit, tree_size, bst_map);
if (!res_)
{
@@ -1909,8 +1909,8 @@ vect_build_slp_tree (vec_info *vinfo,
"SLP discovery for node %p succeeded\n",
(void *) res);
gcc_assert (res_ == res);
- res->max_nunits = this_max_nunits;
- vect_update_max_nunits (max_nunits, this_max_nunits);
+ res->nunits = this_nunits;
+ vect_update_nunits (nunits, this_nunits);
/* Keep a reference for the bst_map use. */
SLP_TREE_REF_COUNT (res)++;
}
@@ -1968,12 +1968,12 @@ vect_slp_build_two_operator_nodes (slp_tree perm, tree
vectype,
static slp_tree
vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node,
vec<stmt_vec_info> stmts, unsigned int group_size,
- poly_uint64 *max_nunits,
+ slp_tree_nunits *nunits,
bool *matches, unsigned *limit, unsigned *tree_size,
scalar_stmts_to_slp_tree_map_t *bst_map)
{
unsigned nops, i, this_tree_size = 0;
- poly_uint64 this_max_nunits = *max_nunits;
+ slp_tree_nunits this_nunits = *nunits;
matches[0] = false;
@@ -1999,8 +1999,7 @@ vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node,
tree scalar_type = TREE_TYPE (PHI_RESULT (stmt));
tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
group_size);
- if (!vect_record_max_nunits (vinfo, stmt_info, group_size, vectype,
- max_nunits))
+ if (!vect_record_nunits (vinfo, stmt_info, group_size, vectype, nunits))
return NULL;
vect_def_type def_type = STMT_VINFO_DEF_TYPE (stmt_info);
@@ -2053,7 +2052,7 @@ vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node,
unsigned char *swap = XALLOCAVEC (unsigned char, group_size);
tree vectype = NULL_TREE;
if (!vect_build_slp_tree_1 (vinfo, swap, stmts, group_size,
- &this_max_nunits, matches, &two_operators,
+ &this_nunits, matches, &two_operators,
&vectype))
return NULL;
@@ -2065,7 +2064,7 @@ vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node,
gcc_assert (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)));
else
{
- *max_nunits = this_max_nunits;
+ *nunits = this_nunits;
(*tree_size)++;
node = vect_create_new_slp_node (node, stmts, 0);
SLP_TREE_VECTYPE (node) = vectype;
@@ -2147,7 +2146,7 @@ vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node,
bool *matches2 = XALLOCAVEC (bool, dr_group_size);
slp_tree unperm_load
= vect_build_slp_tree (vinfo, stmts2, dr_group_size,
- &this_max_nunits, matches2, limit,
+ &this_nunits, matches2, limit,
&this_tree_size, bst_map);
/* When we are able to do the full masked load emit that
followed by 'node' being the desired final permutation. */
@@ -2450,7 +2449,7 @@ vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node,
else
op_stmts.quick_push (NULL);
child = vect_build_slp_tree (vinfo, op_stmts,
- group_size, &this_max_nunits,
+ group_size, &this_nunits,
matches, limit,
&this_tree_size, bst_map);
/* ??? We're likely getting too many fatal mismatches
@@ -2606,7 +2605,7 @@ vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node,
children[i] = child;
}
*tree_size += this_tree_size + 1;
- *max_nunits = this_max_nunits;
+ *nunits = this_nunits;
while (!chains.is_empty ())
chains.pop ().release ();
return node;
@@ -2885,7 +2884,7 @@ out:
def_stmts2.create (1);
def_stmts2.quick_push (oprnd_info->def_stmts[0]);
child = vect_build_slp_tree (vinfo, def_stmts2, 1,
- &this_max_nunits,
+ &this_nunits,
matches, limit,
&this_tree_size, bst_map);
if (child)
@@ -2903,7 +2902,7 @@ out:
.quick_push (std::make_pair (0u, 0u));
}
SLP_TREE_CHILDREN (pnode).quick_push (child);
- pnode->max_nunits = child->max_nunits;
+ pnode->nunits = child->nunits;
children.safe_push (pnode);
oprnd_info->def_stmts = vNULL;
continue;
@@ -2913,7 +2912,7 @@ out:
}
if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts,
- group_size, &this_max_nunits,
+ group_size, &this_nunits,
matches, limit,
&this_tree_size, bst_map)) != NULL)
{
@@ -3002,7 +3001,7 @@ out:
/* And try again with scratch 'matches' ... */
bool *tem = XALLOCAVEC (bool, group_size);
if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts,
- group_size, &this_max_nunits,
+ group_size, &this_nunits,
tem, limit,
&this_tree_size, bst_map)) != NULL)
{
@@ -3108,7 +3107,7 @@ fail:
}
*tree_size += this_tree_size + 1;
- *max_nunits = this_max_nunits;
+ *nunits = this_nunits;
if (two_operators)
{
@@ -3254,16 +3253,15 @@ vect_print_slp_tree (dump_flags_t dump_kind,
dump_location_t loc,
dump_metadata_t metadata (dump_kind, loc.get_impl_location ());
dump_user_location_t user_loc = loc.get_user_location ();
- dump_printf_loc (metadata, user_loc,
- "node%s %p (max_nunits=" HOST_WIDE_INT_PRINT_UNSIGNED
- ", refcnt=%u)",
- SLP_TREE_DEF_TYPE (node) == vect_external_def
- ? " (external)"
- : (SLP_TREE_DEF_TYPE (node) == vect_constant_def
- ? " (constant)"
- : ""), (void *) node,
- estimated_poly_value (node->max_nunits),
- SLP_TREE_REF_COUNT (node));
+ dump_printf_loc (
+ metadata, user_loc,
+ "node%s %p (nunits.min=" HOST_WIDE_INT_PRINT_UNSIGNED
+ ", nunits.max=" HOST_WIDE_INT_PRINT_UNSIGNED ", refcnt=%u)",
+ SLP_TREE_DEF_TYPE (node) == vect_external_def
+ ? " (external)"
+ : (SLP_TREE_DEF_TYPE (node) == vect_constant_def ? " (constant)" : ""),
+ (void *) node, estimated_poly_value (node->nunits.min),
+ estimated_poly_value (node->nunits.max), SLP_TREE_REF_COUNT (node));
if (SLP_TREE_VECTYPE (node))
dump_printf (metadata, " %T", SLP_TREE_VECTYPE (node));
dump_printf (metadata, "%s",
@@ -3630,9 +3628,9 @@ vect_split_slp_store_group (stmt_vec_info first_vinfo,
unsigned group1_size)
statements and a vector of NUNITS elements. */
static poly_uint64
-calculate_unrolling_factor (poly_uint64 nunits, unsigned int group_size)
+calculate_unrolling_factor (slp_tree_nunits nunits, unsigned int group_size)
{
- return exact_div (common_multiple (nunits, group_size), group_size);
+ return exact_div (common_multiple (nunits.max, group_size), group_size);
}
/* Helper that checks to see if a node is a load node. */
@@ -3694,9 +3692,9 @@ optimize_load_redistribution_1
(scalar_stmts_to_slp_tree_map_t *bst_map,
(void *) root);
bool *matches = XALLOCAVEC (bool, group_size);
- poly_uint64 max_nunits = 1;
+ slp_tree_nunits nunits{};
unsigned tree_size = 0, limit = 1;
- node = vect_build_slp_tree (vinfo, stmts, group_size, &max_nunits,
+ node = vect_build_slp_tree (vinfo, stmts, group_size, &nunits,
matches, &limit, &tree_size, bst_map);
if (!node)
stmts.release ();
@@ -3879,14 +3877,14 @@ vect_analyze_slp_instance (vec_info *vinfo,
static slp_tree
vect_build_slp_store_interleaving (vec<slp_tree> &rhs_nodes,
vec<stmt_vec_info> &scalar_stmts,
- poly_uint64 max_nunits)
+ slp_tree_nunits nunits)
{
unsigned int group_size = scalar_stmts.length ();
slp_tree node = vect_create_new_slp_node (scalar_stmts,
SLP_TREE_CHILDREN
(rhs_nodes[0]).length ());
SLP_TREE_VECTYPE (node) = SLP_TREE_VECTYPE (rhs_nodes[0]);
- node->max_nunits = max_nunits;
+ node->nunits = nunits;
for (unsigned l = 0;
l < SLP_TREE_CHILDREN (rhs_nodes[0]).length (); ++l)
{
@@ -3896,7 +3894,7 @@ vect_build_slp_store_interleaving (vec<slp_tree>
&rhs_nodes,
SLP_TREE_CHILDREN (node).quick_push (perm);
SLP_TREE_LANE_PERMUTATION (perm).create (group_size);
SLP_TREE_VECTYPE (perm) = SLP_TREE_VECTYPE (node);
- perm->max_nunits = max_nunits;
+ perm->nunits = nunits;
SLP_TREE_LANES (perm) = group_size;
/* ??? We should set this NULL but that's not expected. */
SLP_TREE_REPRESENTATIVE (perm)
@@ -3952,7 +3950,7 @@ vect_build_slp_store_interleaving (vec<slp_tree>
&rhs_nodes,
SLP_TREE_LANES (permab) = n;
SLP_TREE_LANE_PERMUTATION (permab).create (n);
SLP_TREE_VECTYPE (permab) = SLP_TREE_VECTYPE (perm);
- permab->max_nunits = max_nunits;
+ permab->nunits = nunits;
/* ??? Should be NULL but that's not expected. */
SLP_TREE_REPRESENTATIVE (permab) = SLP_TREE_REPRESENTATIVE (perm);
SLP_TREE_CHILDREN (permab).quick_push (a);
@@ -4023,7 +4021,7 @@ vect_build_slp_store_interleaving (vec<slp_tree>
&rhs_nodes,
SLP_TREE_LANES (permab) = n;
SLP_TREE_LANE_PERMUTATION (permab).create (n);
SLP_TREE_VECTYPE (permab) = SLP_TREE_VECTYPE (perm);
- permab->max_nunits = max_nunits;
+ permab->nunits = nunits;
/* ??? Should be NULL but that's not expected. */
SLP_TREE_REPRESENTATIVE (permab) = SLP_TREE_REPRESENTATIVE (perm);
SLP_TREE_CHILDREN (permab).quick_push (a);
@@ -4108,7 +4106,7 @@ vect_build_slp_instance (vec_info *vinfo,
/* Build the tree for the SLP instance. */
unsigned int group_size = scalar_stmts.length ();
bool *matches = XALLOCAVEC (bool, group_size);
- poly_uint64 max_nunits = 1;
+ slp_tree_nunits nunits{};
unsigned tree_size = 0;
slp_tree node = NULL;
@@ -4119,19 +4117,19 @@ vect_build_slp_instance (vec_info *vinfo,
}
else
node = vect_build_slp_tree (vinfo, scalar_stmts, group_size,
- &max_nunits, matches, limit,
+ &nunits, matches, limit,
&tree_size, bst_map);
if (node != NULL)
{
/* Calculate the unrolling factor based on the smallest type. */
poly_uint64 unrolling_factor
- = calculate_unrolling_factor (max_nunits, group_size);
+ = calculate_unrolling_factor (nunits, group_size);
if (maybe_ne (unrolling_factor, 1U)
&& is_a <bb_vec_info> (vinfo))
{
unsigned HOST_WIDE_INT const_max_nunits;
- if (!max_nunits.is_constant (&const_max_nunits)
+ if (!nunits.max.is_constant (&const_max_nunits)
|| const_max_nunits > group_size)
{
if (dump_enabled_p ())
@@ -4369,7 +4367,7 @@ vect_analyze_slp_reduc_chain (loop_vec_info vinfo,
unsigned int group_size = scalar_stmts.length ();
bool *matches = XALLOCAVEC (bool, group_size);
- poly_uint64 max_nunits = 1;
+ slp_tree_nunits max_nunits{};
unsigned tree_size = 0;
slp_tree node = vect_build_slp_tree (vinfo, scalar_stmts, group_size,
&max_nunits, matches, limit,
@@ -4512,7 +4510,7 @@ vect_analyze_slp_reduc_chain (loop_vec_info vinfo,
/* Build the tree for the SLP instance. */
unsigned int group_size = scalar_stmts.length ();
bool *matches = XALLOCAVEC (bool, group_size);
- poly_uint64 max_nunits = 1;
+ slp_tree_nunits nunits{};
unsigned tree_size = 0;
/* ??? We need this only for SLP discovery. */
@@ -4520,7 +4518,7 @@ vect_analyze_slp_reduc_chain (loop_vec_info vinfo,
REDUC_GROUP_FIRST_ELEMENT (scalar_stmts[i]) = scalar_stmts[0];
slp_tree node = vect_build_slp_tree (vinfo, scalar_stmts, group_size,
- &max_nunits, matches, limit,
+ &nunits, matches, limit,
&tree_size, bst_map);
for (unsigned i = 0; i < scalar_stmts.length (); ++i)
@@ -4662,11 +4660,11 @@ vect_analyze_slp_reduction (loop_vec_info vinfo,
/* Build the tree for the SLP instance. */
unsigned int group_size = scalar_stmts.length ();
bool *matches = XALLOCAVEC (bool, group_size);
- poly_uint64 max_nunits = 1;
+ slp_tree_nunits nunits{};
unsigned tree_size = 0;
slp_tree node = vect_build_slp_tree (vinfo, scalar_stmts, group_size,
- &max_nunits, matches, limit,
+ &nunits, matches, limit,
&tree_size, bst_map);
if (node != NULL)
{
@@ -4765,7 +4763,7 @@ vect_analyze_slp_instance (vec_info *vinfo,
/* Build the tree for the SLP instance. */
unsigned int group_size = scalar_stmts.length ();
bool *matches = XALLOCAVEC (bool, group_size);
- poly_uint64 max_nunits = 1;
+ slp_tree_nunits nunits{};
unsigned tree_size = 0;
unsigned i;
@@ -4777,26 +4775,29 @@ vect_analyze_slp_instance (vec_info *vinfo,
}
else
node = vect_build_slp_tree (vinfo, scalar_stmts, group_size,
- &max_nunits, matches, limit,
+ &nunits, matches, limit,
&tree_size, bst_map);
if (node != NULL)
{
/* Calculate the unrolling factor based on the smallest type. */
poly_uint64 unrolling_factor
- = calculate_unrolling_factor (max_nunits, group_size);
+ = calculate_unrolling_factor (nunits, group_size);
if (maybe_ne (unrolling_factor, 1U)
- && is_a <bb_vec_info> (vinfo))
+ && is_a<bb_vec_info> (vinfo)
+ && !known_ge (nunits.min, group_size))
{
unsigned HOST_WIDE_INT const_max_nunits;
- if (!max_nunits.is_constant (&const_max_nunits)
+ if (!nunits.max.is_constant (&const_max_nunits)
|| const_max_nunits > group_size)
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"Build SLP failed: store group "
- "size not a multiple of the vector size "
- "in basic block SLP\n");
+ "size %u not a multiple of the vector "
+ "size " HOST_WIDE_INT_PRINT_UNSIGNED
+ " in basic block SLP\n ",
+ group_size, estimated_poly_value (nunits.max));
vect_free_slp_tree (node);
return false;
}
@@ -4953,7 +4954,7 @@ vect_analyze_slp_instance (vec_info *vinfo,
/* Analyze the stored values and pinch them together with
a permute node so we can preserve the whole store group. */
auto_vec<slp_tree> rhs_nodes;
- poly_uint64 max_nunits = 1;
+ slp_tree_nunits nunits{};
unsigned int rhs_common_nlanes = 0;
unsigned int start = 0, end = i;
@@ -4964,14 +4965,14 @@ vect_analyze_slp_instance (vec_info *vinfo,
substmts.create (end - start);
for (unsigned j = start; j < end; ++j)
substmts.quick_push (scalar_stmts[j]);
- max_nunits = 1;
+ nunits = {UINT64_MAX, 1};
node = vect_build_slp_tree (vinfo, substmts, end - start,
- &max_nunits,
+ &nunits,
matches, limit, &tree_size, bst_map);
if (node)
{
rhs_nodes.safe_push (node);
- vect_update_max_nunits (&max_nunits, node->max_nunits);
+ vect_update_nunits (&nunits, node->nunits);
if (start == 0)
rhs_common_nlanes = SLP_TREE_LANES (node);
else if (rhs_common_nlanes != SLP_TREE_LANES (node))
@@ -5035,7 +5036,7 @@ vect_analyze_slp_instance (vec_info *vinfo,
SLP_TREE_CHILDREN
(rhs_nodes[0]).length ());
SLP_TREE_VECTYPE (node) = SLP_TREE_VECTYPE (rhs_nodes[0]);
- node->max_nunits = max_nunits;
+ node->nunits = nunits;
node->ldst_lanes = true;
SLP_TREE_CHILDREN (node)
.reserve_exact (SLP_TREE_CHILDREN (rhs_nodes[0]).length ()
@@ -5053,7 +5054,7 @@ vect_analyze_slp_instance (vec_info *vinfo,
}
else
node = vect_build_slp_store_interleaving (rhs_nodes, scalar_stmts,
- max_nunits);
+ nunits);
while (!rhs_nodes.is_empty ())
vect_free_slp_tree (rhs_nodes.pop ());
@@ -5318,13 +5319,13 @@ vect_lower_load_permutations (loop_vec_info loop_vinfo,
}
for (unsigned i = 0; i < DR_GROUP_GAP (first); ++i)
stmts.quick_push (NULL);
- poly_uint64 max_nunits = 1;
+ slp_tree_nunits nunits{};
bool *matches = XALLOCAVEC (bool, group_lanes);
unsigned limit = 1;
unsigned tree_size = 0;
slp_tree l0 = vect_build_slp_tree (loop_vinfo, stmts,
group_lanes,
- &max_nunits, matches, &limit,
+ &nunits, matches, &limit,
&tree_size, bst_map);
gcc_assert (!SLP_TREE_LOAD_PERMUTATION (l0).exists ());
@@ -8259,7 +8260,7 @@ vect_update_slp_vf_for_node (slp_tree node, poly_uint64
&vf,
/* We do not visit SLP nodes for constants or externals - those neither
have a vector type set yet (vectorizable_* does this) nor do they
- have max_nunits set. Instead we rely on internal nodes max_nunit
+ have nunits set. Instead we rely on internal nodes max_nunit
to cover constant/external operands.
Note that when we stop using fixed size vectors externs and constants
shouldn't influence the (minimum) vectorization factor, instead
@@ -8267,7 +8268,7 @@ vect_update_slp_vf_for_node (slp_tree node, poly_uint64
&vf,
assign vector types to constants and externals and cause iteration
to a higher vectorization factor when required. */
poly_uint64 node_vf
- = calculate_unrolling_factor (node->max_nunits, SLP_TREE_LANES (node));
+ = calculate_unrolling_factor (node->nunits, SLP_TREE_LANES (node));
vf = force_common_multiple (vf, node_vf);
/* For permute nodes that are fed from externs or constants we have to
@@ -8277,7 +8278,7 @@ vect_update_slp_vf_for_node (slp_tree node, poly_uint64
&vf,
if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
{
poly_uint64 child_vf
- = calculate_unrolling_factor (node->max_nunits,
+ = calculate_unrolling_factor (node->nunits,
SLP_TREE_LANES (child));
vf = force_common_multiple (vf, child_vf);
}
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index 359c994139b..ecfdb7d88ef 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -254,6 +254,18 @@ typedef auto_vec<std::pair<unsigned, unsigned>, 16>
auto_lane_permutation_t;
typedef vec<unsigned> load_permutation_t;
typedef auto_vec<unsigned, 16> auto_load_permutation_t;
+struct slp_tree_nunits
+{
+ slp_tree_nunits () = default;
+
+ /* The minimum number of vector elements for the subtree rooted
+ at this node. */
+ poly_uint64 min = UINT64_MAX;
+ /* The maximum number of vector elements for the subtree rooted
+ at this node. */
+ poly_uint64 max = 1;
+};
+
struct vect_data {
virtual ~vect_data () = default;
};
@@ -336,9 +348,9 @@ struct _slp_tree {
/* Reference count in the SLP graph. */
unsigned int refcnt;
- /* The maximum number of vector elements for the subtree rooted
+ /* The minimum and maximum number of vector elements for the subtree rooted
at this node. */
- poly_uint64 max_nunits;
+ slp_tree_nunits nunits;
/* The DEF type of this node. */
enum vect_def_type def_type;
/* The number of scalar lanes produced by this node. */
@@ -2326,6 +2338,37 @@ vect_update_max_nunits (poly_uint64 *max_nunits, tree
vectype)
vect_update_max_nunits (max_nunits, TYPE_VECTOR_SUBPARTS (vectype));
}
+/* Update minimum and maximum unit count *NUNITS so that it accounts for
+ NEW_NUNITS. *NUNITS can be {MAX,1} if we haven't yet recorded anything.
+ If NEW_NUNITS is {MAX,1} then this function has no effect. */
+
+inline void
+vect_update_nunits (slp_tree_nunits *nunits, slp_tree_nunits new_nunits)
+{
+ vect_update_max_nunits (&nunits->max, new_nunits.max);
+
+ /* We also want to know whether each individual choice of vector type
+ requires no "unrolling", which requires the minimum number of units.
+ All unit counts have the form vec_info::vector_size * X for some
+ rational X, therefore we know the values are ordered. */
+ if (!known_eq (new_nunits.min, UINT64_MAX))
+ nunits->min = known_eq (nunits->min, UINT64_MAX)
+ ? new_nunits.min
+ : ordered_min (nunits->min, new_nunits.min);
+}
+
+/* Update maximum unit count *NUNITS so that it accounts for
+ the number of units in vector type VECTYPE. *NUNITS can be {MAX,1}
+ if we haven't yet recorded any vector types. */
+
+inline void
+vect_update_nunits (slp_tree_nunits *nunits, tree vectype)
+{
+ slp_tree_nunits new_nunits
+ = {TYPE_VECTOR_SUBPARTS (vectype), TYPE_VECTOR_SUBPARTS (vectype)};
+ vect_update_nunits (nunits, new_nunits);
+}
+
/* Return the vectorization factor that should be used for costing
purposes while vectorizing the loop described by LOOP_VINFO.
Pick a reasonable estimate if the vectorization factor isn't
--
2.43.0