Bootstrap / regtest running on x86_64-unknown-linux-gnu.
Richard. This adds an explicit number of scalar lanes to the SLP node avoiding to dispatch between stmts/ops and eventually not require those vectors at all. 2020-05-27 Richard Biener <rguent...@suse.de> * tree-vectorizer.h (_slp_tree::lanes): New. (SLP_TREE_LANES): Likewise. --- gcc/tree-vect-loop.c | 13 ++++++------- gcc/tree-vect-slp.c | 14 ++++++++------ gcc/tree-vect-stmts.c | 8 ++------ gcc/tree-vectorizer.h | 3 +++ 4 files changed, 19 insertions(+), 19 deletions(-) diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c index ad26663595c..e3fbf9fe28a 100644 --- a/gcc/tree-vect-loop.c +++ b/gcc/tree-vect-loop.c @@ -4516,7 +4516,7 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo, tree induction_index = NULL_TREE; if (slp_node) - group_size = SLP_TREE_SCALAR_STMTS (slp_node).length (); + group_size = SLP_TREE_LANES (slp_node); if (nested_in_vect_loop_p (loop, stmt_info)) { @@ -6594,7 +6594,7 @@ vectorizable_reduction (loop_vec_info loop_vinfo, which each SLP statement has its own initial value and in which that value needs to be repeated for every instance of the statement within the initial vector. */ - unsigned int group_size = SLP_TREE_SCALAR_STMTS (slp_node).length (); + unsigned int group_size = SLP_TREE_LANES (slp_node); if (!neutral_op && !can_duplicate_and_interleave_p (loop_vinfo, group_size, TREE_TYPE (vectype_out))) @@ -7110,9 +7110,8 @@ vect_transform_cycle_phi (loop_vec_info loop_vinfo, if (slp_node) { /* The size vect_schedule_slp_instance computes is off for us. */ - vec_num = vect_get_num_vectors - (LOOP_VINFO_VECT_FACTOR (loop_vinfo) - * SLP_TREE_SCALAR_STMTS (slp_node).length (), vectype_in); + vec_num = vect_get_num_vectors (LOOP_VINFO_VECT_FACTOR (loop_vinfo) + * SLP_TREE_LANES (slp_node), vectype_in); ncopies = 1; } else @@ -7558,7 +7557,7 @@ vectorizable_induction (loop_vec_info loop_vinfo, new_vec, step_vectype, NULL); /* Now generate the IVs. */ - unsigned group_size = SLP_TREE_SCALAR_STMTS (slp_node).length (); + unsigned group_size = SLP_TREE_LANES (slp_node); unsigned nvects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); unsigned elts = const_nunits * nvects; /* Compute the number of distinct IVs we need. First reduce @@ -7999,7 +7998,7 @@ vectorizable_live_operation (loop_vec_info loop_vinfo, { gcc_assert (slp_index >= 0); - int num_scalar = SLP_TREE_SCALAR_STMTS (slp_node).length (); + int num_scalar = SLP_TREE_LANES (slp_node); int num_vec = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); /* Get the last occurrence of the scalar index from the concatenation of diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c index f8b12f0dae9..65c49f5e143 100644 --- a/gcc/tree-vect-slp.c +++ b/gcc/tree-vect-slp.c @@ -64,6 +64,7 @@ _slp_tree::_slp_tree () SLP_TREE_REPRESENTATIVE (this) = NULL; this->refcnt = 1; this->max_nunits = 1; + this->lanes = 0; } /* Tear down a SLP node. */ @@ -134,6 +135,7 @@ vect_create_new_slp_node (vec<stmt_vec_info> scalar_stmts, unsigned nops) SLP_TREE_CHILDREN (node).create (nops); SLP_TREE_DEF_TYPE (node) = vect_internal_def; SLP_TREE_REPRESENTATIVE (node) = scalar_stmts[0]; + SLP_TREE_LANES (node) = scalar_stmts.length (); unsigned i; stmt_vec_info stmt_info; @@ -151,6 +153,7 @@ vect_create_new_slp_node (vec<tree> ops) slp_tree node = new _slp_tree; SLP_TREE_SCALAR_OPS (node) = ops; SLP_TREE_DEF_TYPE (node) = vect_external_def; + SLP_TREE_LANES (node) = ops.length (); return node; } @@ -1670,6 +1673,7 @@ slp_copy_subtree (slp_tree node, hash_map<slp_tree, slp_tree> &map) SLP_TREE_DEF_TYPE (copy) = SLP_TREE_DEF_TYPE (node); SLP_TREE_VECTYPE (copy) = SLP_TREE_VECTYPE (node); SLP_TREE_REPRESENTATIVE (copy) = SLP_TREE_REPRESENTATIVE (node); + SLP_TREE_LANES (copy) = SLP_TREE_LANES (node); copy->max_nunits = node->max_nunits; copy->refcnt = 0; if (SLP_TREE_SCALAR_STMTS (node).exists ()) @@ -2377,8 +2381,7 @@ vect_optimize_slp (vec_info *vinfo) a gap either because the group is larger than the SLP group-size or because there is a gap between the groups. */ && (known_eq (LOOP_VINFO_VECT_FACTOR (as_a <loop_vec_info> (vinfo)), 1U) - || ((SLP_TREE_SCALAR_STMTS (node).length () - == DR_GROUP_SIZE (first_stmt_info)) + || ((SLP_TREE_LANES (node) == DR_GROUP_SIZE (first_stmt_info)) && DR_GROUP_GAP (first_stmt_info) == 0))) { SLP_TREE_LOAD_PERMUTATION (node).release (); @@ -2612,7 +2615,7 @@ vect_slp_analyze_node_operations_1 (vec_info *vinfo, slp_tree node, vf = loop_vinfo->vectorization_factor; else vf = 1; - unsigned int group_size = SLP_TREE_SCALAR_STMTS (node).length (); + unsigned int group_size = SLP_TREE_LANES (node); tree vectype = STMT_VINFO_VECTYPE (stmt_info); SLP_TREE_NUMBER_OF_VEC_STMTS (node) = vect_get_num_vectors (vf * group_size, vectype); @@ -2645,7 +2648,7 @@ vect_slp_convert_to_external (vec_info *vinfo, slp_tree node, /* Don't remove and free the child nodes here, since they could be referenced by other structures. The analysis and scheduling phases (need to) ignore child nodes of anything that isn't vect_internal_def. */ - unsigned int group_size = SLP_TREE_SCALAR_STMTS (node).length (); + unsigned int group_size = SLP_TREE_LANES (node); SLP_TREE_DEF_TYPE (node) = vect_external_def; SLP_TREE_SCALAR_OPS (node).safe_grow (group_size); FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info) @@ -2955,8 +2958,7 @@ vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo) FOR_EACH_VEC_ELT (slp_instances, i, instance) { auto_vec<bool, 20> life; - life.safe_grow_cleared - (SLP_TREE_SCALAR_STMTS (SLP_INSTANCE_TREE (instance)).length ()); + life.safe_grow_cleared (SLP_TREE_LANES (SLP_INSTANCE_TREE (instance))); vect_bb_slp_scalar_cost (bb_vinfo, SLP_INSTANCE_TREE (instance), &life, &scalar_costs, visited); diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c index 4cca06ee96e..4df1d90d5f3 100644 --- a/gcc/tree-vect-stmts.c +++ b/gcc/tree-vect-stmts.c @@ -9225,7 +9225,7 @@ vectorizable_load (vec_info *vinfo, unpermuted sequence. In other cases we need to load the whole group, not only the number of vector stmts the permutation result fits in. */ - unsigned scalar_lanes = SLP_TREE_SCALAR_STMTS (slp_node).length (); + unsigned scalar_lanes = SLP_TREE_LANES (slp_node); if (slp_perm && (group_size != scalar_lanes || !multiple_p (nunits, group_size))) @@ -11603,11 +11603,7 @@ get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type, slp_tree node) { unsigned int group_size = 0; if (node) - { - group_size = SLP_TREE_SCALAR_OPS (node).length (); - if (group_size == 0) - group_size = SLP_TREE_SCALAR_STMTS (node).length (); - } + group_size = SLP_TREE_LANES (node); return get_vectype_for_scalar_type (vinfo, scalar_type, group_size); } diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index 5a5648b3784..1396da74e9d 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -155,6 +155,8 @@ struct _slp_tree { bool two_operators; /* The DEF type of this node. */ enum vect_def_type def_type; + /* The number of scalar lanes produced by this node. */ + unsigned int lanes; }; @@ -197,6 +199,7 @@ public: #define SLP_TREE_DEF_TYPE(S) (S)->def_type #define SLP_TREE_VECTYPE(S) (S)->vectype #define SLP_TREE_REPRESENTATIVE(S) (S)->representative +#define SLP_TREE_LANES(S) (S)->lanes /* Key for map that records association between scalar conditions and corresponding loop mask, and -- 2.26.1