This adds a forward propagation phase to the permute optimization machinery which allows us to handle "any" permute for all kinds of nodes. To match previous behavior cost-wise we still do not allow non-external/constant nodes to be duplicated for multiple permutes and this is ensured during propagation itself.
Bootstrap & regtest in progress on x86_64-unknown-linux-gnu. 2021-06-29 Richard Biener <rguent...@suse.de> * tree-vect-slp.c (vect_optimize_slp): Forward propagate to "any" permute nodes and relax "any" permute proapgation during iterative backward propagation. --- gcc/tree-vect-slp.c | 81 +++++++++++++++++++++++++++++++++++---------- 1 file changed, 63 insertions(+), 18 deletions(-) diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c index 524bfaa1c7f..9155af499b3 100644 --- a/gcc/tree-vect-slp.c +++ b/gcc/tree-vect-slp.c @@ -3729,16 +3729,11 @@ vect_optimize_slp (vec_info *vinfo) perm = vertices[idx].perm_out; else { - perm = -1; - bool all_constant = true; + perm = vertices[idx].get_perm_in (); for (graph_edge *succ = slpg->vertices[idx].succ; succ; succ = succ->succ_next) { int succ_idx = succ->dest; - slp_tree succ_node = vertices[succ_idx].node; - if (SLP_TREE_DEF_TYPE (succ_node) != vect_external_def - && SLP_TREE_DEF_TYPE (succ_node) != vect_constant_def) - all_constant = false; int succ_perm = vertices[succ_idx].perm_out; /* Handle unvisited (and constant) nodes optimistically. */ /* ??? But for constants once we want to handle @@ -3750,25 +3745,34 @@ vect_optimize_slp (vec_info *vinfo) continue; if (perm == -1) perm = succ_perm; - else if (succ_perm == 0) + else if (succ_perm == 0 + || !vect_slp_perms_eq (perms, perm, succ_perm)) { perm = 0; break; } - else if (!vect_slp_perms_eq (perms, perm, succ_perm)) + } + + /* If this is a node we do not want to eventually unshare + but it can be permuted at will, verify all users have + the same permutations registered and otherwise drop to + zero. */ + if (perm == -1 + && SLP_TREE_DEF_TYPE (node) != vect_external_def + && SLP_TREE_DEF_TYPE (node) != vect_constant_def) + { + int preds_perm = -1; + for (graph_edge *pred = slpg->vertices[idx].pred; + pred; pred = pred->pred_next) { - perm = 0; - break; + int pred_perm = vertices[pred->src].get_perm_in (); + if (preds_perm == -1) + preds_perm = pred_perm; + else if (!vect_slp_perms_eq (perms, + pred_perm, preds_perm)) + perm = 0; } } - /* We still lack a forward propagation of materializations - and thus only allow "any" permutes on constant or external - nodes which we handle during materialization by looking - at SLP children. So avoid having internal "any" permutes - for now, see gcc.dg/vect/bb-slp-71.c for a testcase that - breaks when removing this restriction. */ - if (perm == -1 && all_constant) - perm = 0; if (!vect_slp_perms_eq (perms, perm, vertices[idx].get_perm_in ())) @@ -3836,6 +3840,47 @@ vect_optimize_slp (vec_info *vinfo) } } while (changed); + statistics_counter_event (cfun, "SLP optimize perm iterations", iteration); + + /* Compute pre-order. */ + auto_vec<int> heads; + heads.reserve (vinfo->slp_instances.length ()); + for (slp_instance inst : vinfo->slp_instances) + heads.quick_push (SLP_INSTANCE_TREE (inst)->vertex); + auto_vec<int> po; + graphds_dfs (slpg, &heads[0], heads.length (), &po, true, NULL, NULL); + + /* Propagate materialized permutes to "any" permute nodes. For heads + ending up as "any" (reductions with just invariants), set them to + no permute. */ + for (int idx : heads) + if (vertices[idx].perm_out == -1) + vertices[idx].perm_out = 0; + for (i = po.length (); i > 0; --i) + { + int idx = po[i-1]; + int perm_in = vertices[idx].get_perm_in (); + slp_tree node = vertices[idx].node; + if (SLP_TREE_DEF_TYPE (node) == vect_external_def + || SLP_TREE_DEF_TYPE (node) == vect_constant_def) + continue; + gcc_assert (perm_in != -1); + for (graph_edge *succ = slpg->vertices[idx].succ; + succ; succ = succ->succ_next) + { + slp_tree succ_node = vertices[succ->dest].node; + if (SLP_TREE_DEF_TYPE (succ_node) == vect_external_def + || SLP_TREE_DEF_TYPE (succ_node) == vect_constant_def) + continue; + if (vertices[succ->dest].perm_out == -1) + vertices[succ->dest].perm_out = perm_in; + else + /* Propagation should have ensured that all preds have the same + permutation. */ + gcc_assert (vect_slp_perms_eq (perms, perm_in, + vertices[succ->dest].perm_out)); + } + } /* Materialize. */ for (i = 0; i < vertices.length (); ++i) -- 2.26.2