The following fixes eliding of the permutation of a BB reduction of an existing vector which breaks materialization of live lanes as we fail to permute the SLP_TREE_SCALAR_STMTS vector.
Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed. PR tree-optimization/113896 * tree-vect-slp.cc (vect_optimize_slp): Permute SLP_TREE_SCALAR_STMTS when eliding a permuation in a VEC_PERM node we need to preserve because it wraps an extern vector. * g++.dg/torture/pr113896.C: New testcase. --- gcc/testsuite/g++.dg/torture/pr113896.C | 35 +++++++++++++++++++++++++ gcc/tree-vect-slp.cc | 9 +++++++ 2 files changed, 44 insertions(+) create mode 100644 gcc/testsuite/g++.dg/torture/pr113896.C diff --git a/gcc/testsuite/g++.dg/torture/pr113896.C b/gcc/testsuite/g++.dg/torture/pr113896.C new file mode 100644 index 00000000000..534c1c2e1cc --- /dev/null +++ b/gcc/testsuite/g++.dg/torture/pr113896.C @@ -0,0 +1,35 @@ +// { dg-do run } +// { dg-additional-options "-ffast-math" } + +double a1 = 1.0; +double a2 = 1.0; + +void __attribute__((noipa)) +f(double K[2], bool b) +{ + double A[] = { + b ? a1 : a2, + 0, + 0, + 0 + }; + + double sum{}; + for(double a : A) sum += a; + for(double& a : A) a /= sum; + + if (b) { + K[0] = A[0]; // 1.0 + K[1] = A[1]; // 0.0 + } else { + K[0] = A[0] + A[1]; + } +} + +int main() +{ + double K[2]{}; + f(K, true); + if (K[0] != 1. || K[1] != 0.) + __builtin_abort (); +} diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc index af477c31aa3..b3e3d9e7009 100644 --- a/gcc/tree-vect-slp.cc +++ b/gcc/tree-vect-slp.cc @@ -4058,6 +4058,15 @@ vect_optimize_slp (vec_info *vinfo) { /* Preserve the special VEC_PERM we use to shield existing vector defs from the rest. But make it a no-op. */ + auto_vec<stmt_vec_info, 64> saved; + saved.create (SLP_TREE_SCALAR_STMTS (old).length ()); + for (unsigned i = 0; + i < SLP_TREE_SCALAR_STMTS (old).length (); ++i) + saved.quick_push (SLP_TREE_SCALAR_STMTS (old)[i]); + for (unsigned i = 0; + i < SLP_TREE_SCALAR_STMTS (old).length (); ++i) + SLP_TREE_SCALAR_STMTS (old)[i] + = saved[SLP_TREE_LANE_PERMUTATION (old)[i].second]; unsigned i = 0; for (std::pair<unsigned, unsigned> &p : SLP_TREE_LANE_PERMUTATION (old)) -- 2.35.3