Currently we do SLP CSE after permute optimization using a single map across
all SLP instances. These SLP instances can affect many different basic blocks
and the cache may replace a statement in one block with one from another block.
Because there are no further limitations and the blocks may be visited in an
arbitrary order, this may lead to a statement being used in paths that may not
be defined. This patch creates one map per SLP instance to address that.

        PR tree-optimization/116352

gcc/ChangeLog:

        * tree-vect-slp.cc (vect_optimize_slp): Use one scalar stmts to tree
        map per SLP instance.

gcc/testsuite/ChangeLog:

        * gcc.dg/pr116352.c: New test.

Signed-off-by: Manolis Tsamis <manolis.tsa...@vrull.eu>
---

 gcc/testsuite/gcc.dg/pr116352.c | 18 ++++++++++++++++++
 gcc/tree-vect-slp.cc            | 12 +++++++-----
 2 files changed, 25 insertions(+), 5 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/pr116352.c

diff --git a/gcc/testsuite/gcc.dg/pr116352.c b/gcc/testsuite/gcc.dg/pr116352.c
new file mode 100644
index 00000000000..c427eff3c08
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr116352.c
@@ -0,0 +1,18 @@
+/* PR tree-optimization/116352 */
+/* { dg-do compile } */
+/* { dg-options "-O3 -fchecking" } */
+
+int a;
+float b, c;
+void l(int h, int f, int g, float *e)
+{
+  for (int m = 0; m < h; m++)
+  {
+    float i = 2 * b, j = 2 * c;
+    if (a) {
+      e[m*4 + 0] = e[m*4 + 1] = (j - g * 0.5f);
+      e[m*4 + 2] = e[m*4 + 3] = (i + f * 0.5f);
+    } else
+      e[m*4 + 0] = f * 0.5f + g * 0.5f;
+  }
+}
diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index cfdf59ad386..a8836c2fb04 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -6356,13 +6356,15 @@ vect_optimize_slp (vec_info *vinfo)
   vect_optimize_slp_pass (vinfo).run ();
 
   /* Apply CSE again to nodes after permute optimization.  */
-  scalar_stmts_to_slp_tree_map_t *bst_map
-    = new scalar_stmts_to_slp_tree_map_t ();
-
   for (auto inst : vinfo->slp_instances)
-    vect_cse_slp_nodes (bst_map, SLP_INSTANCE_TREE (inst));
+    {
+      scalar_stmts_to_slp_tree_map_t *bst_map
+       = new scalar_stmts_to_slp_tree_map_t ();
 
-  release_scalar_stmts_to_slp_tree_map (bst_map);
+      vect_cse_slp_nodes (bst_map, SLP_INSTANCE_TREE (inst));
+
+      release_scalar_stmts_to_slp_tree_map (bst_map);
+    }
 }
 
 /* Gather loads reachable from the individual SLP graph entries.  */
-- 
2.34.1

Reply via email to