The following fixes eliding of the permutation of a BB reduction
of an existing vector which breaks materialization of live lanes
as we fail to permute the SLP_TREE_SCALAR_STMTS vector.

Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed.

        PR tree-optimization/113896
        * tree-vect-slp.cc (vect_optimize_slp): Permute
        SLP_TREE_SCALAR_STMTS when eliding a permuation in a
        VEC_PERM node we need to preserve because it wraps an
        extern vector.

        * g++.dg/torture/pr113896.C: New testcase.
---
 gcc/testsuite/g++.dg/torture/pr113896.C | 35 +++++++++++++++++++++++++
 gcc/tree-vect-slp.cc                    |  9 +++++++
 2 files changed, 44 insertions(+)
 create mode 100644 gcc/testsuite/g++.dg/torture/pr113896.C

diff --git a/gcc/testsuite/g++.dg/torture/pr113896.C 
b/gcc/testsuite/g++.dg/torture/pr113896.C
new file mode 100644
index 00000000000..534c1c2e1cc
--- /dev/null
+++ b/gcc/testsuite/g++.dg/torture/pr113896.C
@@ -0,0 +1,35 @@
+// { dg-do run }
+// { dg-additional-options "-ffast-math" }
+
+double a1 = 1.0;
+double a2 = 1.0;
+
+void __attribute__((noipa))
+f(double K[2], bool b)
+{
+    double A[] = {
+        b ? a1 : a2,
+        0,
+        0,
+        0
+    };
+
+    double sum{};
+    for(double  a : A) sum += a;
+    for(double& a : A) a /= sum;
+
+    if (b) {
+        K[0] = A[0]; // 1.0
+        K[1] = A[1]; // 0.0
+    } else {
+        K[0] = A[0] + A[1];
+    }
+}
+
+int main()
+{
+  double K[2]{};
+  f(K, true);
+  if (K[0] != 1. || K[1] != 0.)
+    __builtin_abort ();
+}
diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index af477c31aa3..b3e3d9e7009 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -4058,6 +4058,15 @@ vect_optimize_slp (vec_info *vinfo)
                {
                  /* Preserve the special VEC_PERM we use to shield existing
                     vector defs from the rest.  But make it a no-op.  */
+                 auto_vec<stmt_vec_info, 64> saved;
+                 saved.create (SLP_TREE_SCALAR_STMTS (old).length ());
+                 for (unsigned i = 0;
+                      i < SLP_TREE_SCALAR_STMTS (old).length (); ++i)
+                   saved.quick_push (SLP_TREE_SCALAR_STMTS (old)[i]);
+                 for (unsigned i = 0;
+                      i < SLP_TREE_SCALAR_STMTS (old).length (); ++i)
+                   SLP_TREE_SCALAR_STMTS (old)[i]
+                     = saved[SLP_TREE_LANE_PERMUTATION (old)[i].second];
                  unsigned i = 0;
                  for (std::pair<unsigned, unsigned> &p
                       : SLP_TREE_LANE_PERMUTATION (old))
-- 
2.35.3

Reply via email to