This avoids altering possibly shared SLP subtrees when attempting to get rid of permutations in SLP reductions by copying the SLP subtree before re-arranging stmts in it.
It's a little bit awkward and some refactoring I've done for GCC11 would make this less so but it's not the point to push this thus the following. Copying is also easier than verifying there's no sharing involved. Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed. Richard. 2020-02-24 Richard Biener <rguent...@suse.de> PR tree-optimization/93868 * tree-vect-slp.c (slp_copy_subtree): New function. (vect_attempt_slp_rearrange_stmts): Copy the SLP tree before re-arranging stmts in it. * gcc.dg/torture/pr93868.c: New testcase. --- gcc/testsuite/gcc.dg/torture/pr93868.c | 31 +++++++++++++++++++++ gcc/tree-vect-slp.c | 50 ++++++++++++++++++++++++++++++++++ 2 files changed, 81 insertions(+) create mode 100644 gcc/testsuite/gcc.dg/torture/pr93868.c diff --git a/gcc/testsuite/gcc.dg/torture/pr93868.c b/gcc/testsuite/gcc.dg/torture/pr93868.c new file mode 100644 index 00000000000..850eba1dd49 --- /dev/null +++ b/gcc/testsuite/gcc.dg/torture/pr93868.c @@ -0,0 +1,31 @@ +/* { dg-do run } */ +/* { dg-additional-options "-ftree-vectorize" } */ + +unsigned a[1024]; +unsigned b[1024]; + +void __attribute__((noipa)) +foo (unsigned *q, unsigned *r) +{ + unsigned sum1 = 0, sum2 = 0; + for (int i = 0; i < 512; ++i) + { + sum1 += a[2*i]; + sum2 += a[2*i+1]; + b[2*i] = a[2*i+1]; + b[2*i+1] = a[2*i]; + } + *q = sum1; + *r = sum2; +} + +int main() +{ + unsigned sum1, sum2; + a[0] = 0; + a[1] = 1; + foo (&sum1, &sum2); + if (b[0] != 1 || b[1] != 0) + __builtin_abort (); + return 0; +} diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c index c61c7e197d8..c1fa0e6b6a4 100644 --- a/gcc/tree-vect-slp.c +++ b/gcc/tree-vect-slp.c @@ -1749,6 +1749,44 @@ vect_mark_slp_stmts_relevant (slp_tree node) vect_mark_slp_stmts_relevant (node, visited); } +/* Copy the SLP subtree rooted at NODE. */ + +static slp_tree +slp_copy_subtree (slp_tree node, hash_map<slp_tree, slp_tree> &map) +{ + unsigned i; + + bool existed_p; + slp_tree © = map.get_or_insert (node, &existed_p); + if (existed_p) + return copy; + + copy = XNEW (_slp_tree); + memcpy (copy, node, sizeof (_slp_tree)); + if (SLP_TREE_SCALAR_STMTS (node).exists ()) + { + SLP_TREE_SCALAR_STMTS (copy) = SLP_TREE_SCALAR_STMTS (node).copy (); + stmt_vec_info stmt_info; + FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info) + STMT_VINFO_NUM_SLP_USES (stmt_info)++; + } + if (SLP_TREE_SCALAR_OPS (node).exists ()) + SLP_TREE_SCALAR_OPS (copy) = SLP_TREE_SCALAR_OPS (node).copy (); + if (SLP_TREE_LOAD_PERMUTATION (node).exists ()) + SLP_TREE_LOAD_PERMUTATION (copy) = SLP_TREE_LOAD_PERMUTATION (node).copy (); + if (SLP_TREE_CHILDREN (node).exists ()) + SLP_TREE_CHILDREN (copy) = SLP_TREE_CHILDREN (node).copy (); + gcc_assert (!SLP_TREE_VEC_STMTS (node).exists ()); + copy->refcnt = 0; + + slp_tree child; + FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (copy), i, child) + { + SLP_TREE_CHILDREN (copy)[i] = slp_copy_subtree (child, map); + SLP_TREE_CHILDREN (copy)[i]->refcnt++; + } + return copy; +} /* Rearrange the statements of NODE according to PERMUTATION. */ @@ -1841,6 +1879,18 @@ vect_attempt_slp_rearrange_stmts (slp_instance slp_instn) statements in the nodes is not important unless they are memory accesses, we can rearrange the statements in all the nodes according to the order of the loads. */ + + /* We have to unshare the SLP tree we modify. */ + hash_map<slp_tree, slp_tree> map; + slp_tree unshared = slp_copy_subtree (SLP_INSTANCE_TREE (slp_instn), map); + vect_free_slp_tree (SLP_INSTANCE_TREE (slp_instn), false); + unshared->refcnt++; + SLP_INSTANCE_TREE (slp_instn) = unshared; + FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node) + SLP_INSTANCE_LOADS (slp_instn)[i] = *map.get (node); + node = SLP_INSTANCE_LOADS (slp_instn)[0]; + + /* Do the actual re-arrangement. */ hash_set<slp_tree> visited; vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size, node->load_permutation, visited); -- 2.16.4