The following uses the full scalar stmt coverage now denoted by
SLP_TREE_TYPE to simplify computing STMT_VINFO_LIVE_P for code
generation of live lanes.
* tree-vect-slp.cc (vec_slp_has_scalar_use): Remove.
(vect_bb_slp_mark_live_stmts): Simplify.
---
gcc/tree-vect-slp.cc | 206 +++++++++----------------------------------
1 file changed, 43 insertions(+), 163 deletions(-)
diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index 15086394d92..f6c03ea8761 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -8948,102 +8948,6 @@ vect_slp_analyze_node_operations (vec_info *vinfo,
slp_tree node,
return res;
}
-/* Given a definition DEF, analyze if it will have any live scalar use after
- performing SLP vectorization whose information is represented by BB_VINFO,
- and record result into hash map SCALAR_USE_MAP as cache for later fast
- check. If recursion DEPTH exceeds a limit, stop analysis and make a
- conservative assumption. Return 0 if no scalar use, 1 if there is, -1
- means recursion is limited. */
-
-static int
-vec_slp_has_scalar_use (bb_vec_info bb_vinfo, tree def,
- hash_map<tree, int> &scalar_use_map,
- int depth = 0)
-{
- const int depth_limit = 3;
- imm_use_iterator use_iter;
- gimple *use_stmt;
-
- if (int *res = scalar_use_map.get (def))
- return *res;
-
- int scalar_use = 1;
-
- FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, def)
- {
- if (is_gimple_debug (use_stmt))
- continue;
-
- stmt_vec_info use_stmt_info = bb_vinfo->lookup_stmt (use_stmt);
-
- if (!use_stmt_info)
- break;
-
- if (PURE_SLP_STMT (vect_stmt_to_vectorize (use_stmt_info)))
- continue;
-
- /* Do not step forward when encounter PHI statement, since it may
- involve cyclic reference and cause infinite recursive invocation. */
- if (gimple_code (use_stmt) == GIMPLE_PHI)
- break;
-
- /* When pattern recognition is involved, a statement whose definition is
- consumed in some pattern, may not be included in the final replacement
- pattern statements, so would be skipped when building SLP graph.
-
- * Original
- char a_c = *(char *) a;
- char b_c = *(char *) b;
- unsigned short a_s = (unsigned short) a_c;
- int a_i = (int) a_s;
- int b_i = (int) b_c;
- int r_i = a_i - b_i;
-
- * After pattern replacement
- a_s = (unsigned short) a_c;
- a_i = (int) a_s;
-
- patt_b_s = (unsigned short) b_c; // b_i = (int) b_c
- patt_b_i = (int) patt_b_s; // b_i = (int) b_c
-
- patt_r_s = widen_minus(a_c, b_c); // r_i = a_i - b_i
- patt_r_i = (int) patt_r_s; // r_i = a_i - b_i
-
- The definitions of a_i(original statement) and b_i(pattern statement)
- are related to, but actually not part of widen_minus pattern.
- Vectorizing the pattern does not cause these definition statements to
- be marked as PURE_SLP. For this case, we need to recursively check
- whether their uses are all absorbed into vectorized code. But there
- is an exception that some use may participate in an vectorized
- operation via an external SLP node containing that use as an element.
- The parameter "scalar_use_map" tags such kind of SSA as having scalar
- use in advance. */
- tree lhs = gimple_get_lhs (use_stmt);
-
- if (!lhs || TREE_CODE (lhs) != SSA_NAME)
- break;
-
- if (depth_limit && depth >= depth_limit)
- return -1;
-
- if ((scalar_use = vec_slp_has_scalar_use (bb_vinfo, lhs, scalar_use_map,
- depth + 1)))
- break;
- }
-
- if (end_imm_use_stmt_p (&use_iter))
- scalar_use = 0;
-
- /* If recursion is limited, do not cache result for non-root defs. */
- if (!depth || scalar_use >= 0)
- {
- bool added = scalar_use_map.put (def, scalar_use);
- gcc_assert (!added);
- }
-
- return scalar_use;
-}
-
/* Mark lanes of NODE that are live outside of the basic-block vectorized
region and that can be vectorized using vectorizable_live_operation
with STMT_VINFO_LIVE_P. Not handled live operations will cause the
@@ -9053,7 +8957,6 @@ static void
vect_bb_slp_mark_live_stmts (bb_vec_info bb_vinfo, slp_tree node,
slp_instance instance,
stmt_vector_for_cost *cost_vec,
- hash_map<tree, int> &scalar_use_map,
hash_set<stmt_vec_info> &svisited,
hash_set<slp_tree> &visited)
{
@@ -9078,22 +8981,6 @@ vect_bb_slp_mark_live_stmts (bb_vec_info bb_vinfo,
slp_tree node,
def_operand_p def_p;
FOR_EACH_PHI_OR_STMT_DEF (def_p, orig_stmt, op_iter, SSA_OP_DEF)
{
- if (vec_slp_has_scalar_use (bb_vinfo, DEF_FROM_PTR (def_p),
- scalar_use_map))
- {
- STMT_VINFO_LIVE_P (stmt_info) = true;
- if (vectorizable_live_operation (bb_vinfo, stmt_info, node,
- instance, i, false, cost_vec))
- /* ??? So we know we can vectorize the live stmt from one SLP
- node. If we cannot do so from all or none consistently
- we'd have to record which SLP node (and lane) we want to
- use for the live operation. So make sure we can
- code-generate from all nodes. */
- mark_visited = false;
- else
- STMT_VINFO_LIVE_P (stmt_info) = false;
- }
-
/* We have to verify whether we can insert the lane extract
before all uses. The following is a conservative approximation.
We cannot put this into vectorizable_live_operation because
@@ -9116,30 +9003,55 @@ vect_bb_slp_mark_live_stmts (bb_vec_info bb_vinfo,
slp_tree node,
gimple *use_stmt;
stmt_vec_info use_stmt_info;
- if (STMT_VINFO_LIVE_P (stmt_info))
- FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, DEF_FROM_PTR (def_p))
- if (!is_gimple_debug (use_stmt)
- && (!(use_stmt_info = bb_vinfo->lookup_stmt (use_stmt))
- || !PURE_SLP_STMT (use_stmt_info))
- && !vect_stmt_dominates_stmt_p (last_stmt->stmt, use_stmt))
+ bool live_p = false;
+ bool can_insert = true;
+ FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, DEF_FROM_PTR (def_p))
+ if (!is_gimple_debug (use_stmt)
+ && (!(use_stmt_info = bb_vinfo->lookup_stmt (use_stmt))
+ || !PURE_SLP_STMT (use_stmt_info)))
+ {
+ live_p = true;
+ if (!vect_stmt_dominates_stmt_p (last_stmt->stmt, use_stmt))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "Cannot determine insertion place for "
+ "lane extract\n");
+ can_insert = false;
+ break;
+ }
+ }
+ if (live_p && can_insert)
+ {
+ STMT_VINFO_LIVE_P (stmt_info) = true;
+ if (vectorizable_live_operation (bb_vinfo, stmt_info, node,
+ instance, i, false, cost_vec))
{
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "Cannot determine insertion place for "
- "lane extract\n");
- STMT_VINFO_LIVE_P (stmt_info) = false;
- mark_visited = true;
+ /* ??? So we know we can vectorize the live stmt from one SLP
+ node. If we cannot do so from all or none consistently
+ we'd have to record which SLP node (and lane) we want to
+ use for the live operation. So make sure we can
+ code-generate from all nodes. */
+ /* ??? We are costing the extract possibly multiple times,
+ but code-generation also works this way, leaving uses
+ that are not valid for one extraction to be handled
+ by another. */
+ mark_visited = false;
}
+ }
}
if (mark_visited)
- svisited.add (stmt_info);
+ {
+ STMT_VINFO_LIVE_P (stmt_info) = false;
+ svisited.add (stmt_info);
+ }
}
slp_tree child;
FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
if (child && SLP_TREE_DEF_TYPE (child) == vect_internal_def)
vect_bb_slp_mark_live_stmts (bb_vinfo, child, instance, cost_vec,
- scalar_use_map, svisited, visited);
+ svisited, visited);
}
/* Traverse all slp instances of BB_VINFO, and mark lanes of every node that
@@ -9152,48 +9064,16 @@ vect_bb_slp_mark_live_stmts (bb_vec_info bb_vinfo)
if (bb_vinfo->slp_instances.is_empty ())
return;
- hash_set<stmt_vec_info> svisited;
hash_set<slp_tree> visited;
- hash_map<tree, int> scalar_use_map;
- auto_vec<slp_tree> worklist;
-
- for (slp_instance instance : bb_vinfo->slp_instances)
- {
- if (SLP_INSTANCE_KIND (instance) == slp_inst_kind_bb_reduc)
- for (tree op : SLP_INSTANCE_REMAIN_DEFS (instance))
- if (TREE_CODE (op) == SSA_NAME)
- scalar_use_map.put (op, 1);
- if (!visited.add (SLP_INSTANCE_TREE (instance)))
- worklist.safe_push (SLP_INSTANCE_TREE (instance));
- }
-
- do
- {
- slp_tree node = worklist.pop ();
-
- if (SLP_TREE_DEF_TYPE (node) == vect_external_def)
- {
- for (tree op : SLP_TREE_SCALAR_OPS (node))
- if (TREE_CODE (op) == SSA_NAME)
- scalar_use_map.put (op, 1);
- }
- else
- {
- for (slp_tree child : SLP_TREE_CHILDREN (node))
- if (child && !visited.add (child))
- worklist.safe_push (child);
- }
- }
- while (!worklist.is_empty ());
-
- visited.empty ();
-
+ hash_set<stmt_vec_info> svisited;
for (slp_instance instance : bb_vinfo->slp_instances)
{
+ if (!SLP_INSTANCE_ROOT_STMTS (instance).is_empty ())
+ STMT_VINFO_LIVE_P (SLP_INSTANCE_ROOT_STMTS (instance)[0]) = true;
vect_location = instance->location ();
vect_bb_slp_mark_live_stmts (bb_vinfo, SLP_INSTANCE_TREE (instance),
instance, &instance->cost_vec,
- scalar_use_map, svisited, visited);
+ svisited, visited);
}
}
--
2.51.0