The recent change to relax store motion for variables that cannot have
store data races broke the optimization to share flag vars for stores
that all happen in the same single BB.  The following fixes this.

Bootstrapped on x86_64-unknown-linux-gnu, testing in progress.

Richard.

        PR tree-optimization/115579
        * tree-ssa-loop-im.cc (execute_sm): Return the auxiliary data
        created.
        (hoist_memory_references): Record the flag var that's eventually
        created and re-use it when all stores are in the same BB.

        * gcc.dg/pr115579.c: New testcase.
---
 gcc/testsuite/gcc.dg/pr115579.c | 18 ++++++++++++++++++
 gcc/tree-ssa-loop-im.cc         | 27 ++++++++++++++-------------
 2 files changed, 32 insertions(+), 13 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/pr115579.c

diff --git a/gcc/testsuite/gcc.dg/pr115579.c b/gcc/testsuite/gcc.dg/pr115579.c
new file mode 100644
index 00000000000..04781056723
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr115579.c
@@ -0,0 +1,18 @@
+/* { dg-do run } */
+/* { dg-options "-Os -fno-tree-sra" } */
+
+int printf(const char *, ...);
+int a, b = 1, c;
+int main() {
+  int d[2], *e = &d[1];
+  while (a) {
+    int *f = &b;
+    d[1] = 0;
+    *f = 0;
+  }
+  if (c)
+    printf("%d\n", *e);
+  if (b != 1)
+    __builtin_abort();
+  return 0;
+}
diff --git a/gcc/tree-ssa-loop-im.cc b/gcc/tree-ssa-loop-im.cc
index 3acbd886a0d..61c6339bc35 100644
--- a/gcc/tree-ssa-loop-im.cc
+++ b/gcc/tree-ssa-loop-im.cc
@@ -2269,7 +2269,7 @@ struct sm_aux
    temporary variable is put to the preheader of the loop, and assignments
    to the reference from the temporary variable are emitted to exits.  */
 
-static void
+static sm_aux *
 execute_sm (class loop *loop, im_mem_ref *ref,
            hash_map<im_mem_ref *, sm_aux *> &aux_map, bool maybe_mt,
            bool use_other_flag_var)
@@ -2345,6 +2345,8 @@ execute_sm (class loop *loop, im_mem_ref *ref,
       lim_data->tgt_loop = loop;
       gsi_insert_before (&gsi, load, GSI_SAME_STMT);
     }
+
+  return aux;
 }
 
 /* sm_ord is used for ordinary stores we can retain order with respect
@@ -2802,20 +2804,18 @@ hoist_memory_references (class loop *loop, bitmap 
mem_refs,
       hash_map<im_mem_ref *, sm_aux *> aux_map;
 
       /* Execute SM but delay the store materialization for ordered
-        sequences on exit.  */
-      bool first_p = true;
+        sequences on exit.  Remember a created flag var and make
+        sure to re-use it.  */
+      sm_aux *flag_var_aux = nullptr;
       EXECUTE_IF_SET_IN_BITMAP (mem_refs, 0, i, bi)
        {
          ref = memory_accesses.refs_list[i];
-         execute_sm (loop, ref, aux_map, true, !first_p);
-         first_p = false;
+         sm_aux *aux = execute_sm (loop, ref, aux_map, true,
+                                   flag_var_aux != nullptr);
+         if (aux->store_flag)
+           flag_var_aux = aux;
        }
 
-      /* Get at the single flag variable we eventually produced.  */
-      im_mem_ref *ref
-       = memory_accesses.refs_list[bitmap_first_set_bit (mem_refs)];
-      sm_aux *aux = *aux_map.get (ref);
-
       /* Materialize ordered store sequences on exits.  */
       edge e;
       FOR_EACH_VEC_ELT (exits, i, e)
@@ -2826,13 +2826,14 @@ hoist_memory_references (class loop *loop, bitmap 
mem_refs,
          /* Construct the single flag variable control flow and insert
             the ordered seq of stores in the then block.  With
             -fstore-data-races we can do the stores unconditionally.  */
-         if (aux->store_flag)
+         if (flag_var_aux)
            insert_e
              = single_pred_edge
                  (execute_sm_if_changed (e, NULL_TREE, NULL_TREE,
-                                         aux->store_flag,
+                                         flag_var_aux->store_flag,
                                          loop_preheader_edge (loop),
-                                         &aux->flag_bbs, append_cond_position,
+                                         &flag_var_aux->flag_bbs,
+                                         append_cond_position,
                                          last_cond_fallthru));
          execute_sm_exit (loop, insert_e, seq, aux_map, sm_ord,
                           append_cond_position, last_cond_fallthru);
-- 
2.43.0

Reply via email to