From: Ju-Zhe Zhong <juzhe.zh...@rivai.ai>

This patch fix issue: PR 99407
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=99407

The enhancement implementation is simple:
1.Search gimple statement in program reverse order.
2.Queue the store statement which may be possible kill the def
  of previous store statement.
3.Perform dse_def_ref_analysis to remove stores will not kill
  any def.
  For example:
    a[i_18] = _5;
    ...
    foo (&a);
    a[i_18] = _7;
    
  a[i_18] = _7 is queued at the begining and will be removed
  in dse_def_ref_analysis.
4.Remove the store if the def is confirmed to be killed.

I have fully tested it in RISC-V foundation downstream port (RVV):
https://github.com/riscv-collab/riscv-gcc/tree/riscv-gcc-rvv-next

Are you willing to review this patch and test it in ARM/x86?

gcc/ChangeLog:

        * tree-ssa-dse.cc (dse_search_def_stores): New function.
        (dse_can_def_ref_p): Ditto.
        (dse_def_ref_analysis): Add a new argument.
        (dse_optimize_stmt): Pass through stores_queue.
        (pass_dse::execute): Add dse_def_ref_analysis and stores_queue.

gcc/testsuite/ChangeLog:

        * gcc.dg/tree-ssa/pr99407.c: New test.

---
 gcc/testsuite/gcc.dg/tree-ssa/pr99407.c |  30 ++++
 gcc/tree-ssa-dse.cc                     | 209 +++++++++++++++++++++++-
 2 files changed, 236 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr99407.c

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr99407.c 
b/gcc/testsuite/gcc.dg/tree-ssa/pr99407.c
new file mode 100644
index 00000000000..57cea77da7c
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr99407.c
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-dse1-details" } */
+typedef float real_t;
+
+#define iterations 100000
+#define LEN_1D 32000
+#define LEN_2D 256
+real_t flat_2d_array[LEN_2D*LEN_2D];
+
+real_t x[LEN_1D];
+
+real_t a[LEN_1D],b[LEN_1D],c[LEN_1D],d[LEN_1D],e[LEN_1D],
+bb[LEN_2D][LEN_2D],cc[LEN_2D][LEN_2D],tt[LEN_2D][LEN_2D];
+
+int indx[LEN_1D];
+
+real_t* __restrict__ xx;
+real_t* yy;
+real_t s243(void)
+{
+  for (int nl = 0; nl < iterations; nl++) {
+    for (int i = 0; i < LEN_1D-1; i++) {
+        a[i] = b[i] + c[i  ] * d[i];
+        b[i] = a[i] + d[i  ] * e[i];
+        a[i] = b[i] + a[i+1] * d[i];
+    }
+  }
+}
+
+/* { dg-final { scan-tree-dump "Deleted dead store" "dse1" } } */
\ No newline at end of file
diff --git a/gcc/tree-ssa-dse.cc b/gcc/tree-ssa-dse.cc
index 34cfd1a8802..a8ca3672da2 100644
--- a/gcc/tree-ssa-dse.cc
+++ b/gcc/tree-ssa-dse.cc
@@ -1332,6 +1332,186 @@ dse_optimize_call (gimple_stmt_iterator *gsi, sbitmap 
live_bytes)
   return true;
 }
 
+/* Search the stores_queue to see whether there is a store has a same vdef
+   as the stmt.  */
+
+static bool
+dse_search_def_stores (function *fun, auto_vec<gimple *> &stores_queue,
+                      gimple *stmt)
+{
+  /* Consider the following sequcence:
+    a[i_18] = _5;
+    _8 = e[i_18];
+    _9 = _3 * _8;
+    _10 = _5 + _9;
+    b[i_18] = _10;
+    _12 = i_18 + 1;
+    _13 = a[_12];
+    _15 = _3 * _13;
+    _16 = _10 + _15;
+    a[i_18] = _16
+
+    We should be able to remove a[i_18] = _5.  */
+  for (unsigned int i = 0; i < stores_queue.length (); ++i)
+    {
+      if (!stores_queue[i])
+       continue;
+      tree lhs1 = gimple_assign_lhs (stores_queue[i]);
+      tree lhs2 = gimple_assign_lhs (stmt);
+
+      if (TREE_CODE (lhs1) != TREE_CODE (lhs2))
+       continue;
+      if (operand_equal_p (gimple_assign_lhs (stores_queue[i]),
+                          gimple_assign_lhs (stmt), OEP_ADDRESS_OF))
+       {
+         /* No matter it can be eliminated or not, remove it
+            in the worklist.  */
+         stores_queue[i] = NULL;
+         if (gimple_assign_single_p (stmt) && !gimple_has_side_effects (stmt)
+             && !is_ctrl_altering_stmt (stmt)
+             && (!stmt_could_throw_p (fun, stmt)
+                 || fun->can_delete_dead_exceptions))
+           return true;
+       }
+    }
+
+  return false;
+}
+
+/* Return true if the TREE_CODE of the mem op is allowed to do dse
+   according to def-ref analysis.  */
+
+static bool
+dse_can_def_ref_p (gimple *stmt)
+{
+  /*TODO: For now, we only support dse according to
+    def-ref analysis for ARRAY_REF.  */
+  return TREE_CODE (gimple_assign_lhs (stmt)) == ARRAY_REF;
+}
+
+/* Perform def-ref analysis on all the stores of stores_queue worklist.
+   Since dse is running on reverse program order walk, the stores in
+   stores_queue are always after stmt, clear the store in the stores_queue
+   if the address of store lhs is changed or the lhs of store is used
+   in stmt.  */
+
+static void
+dse_def_ref_analysis (gimple *stmt, auto_vec<gimple *> &stores_queue)
+{
+  for (unsigned int i = 0; i < stores_queue.length (); ++i)
+    {
+      if (!stores_queue[i])
+       continue;
+
+      /* If we meet non-call or non-assign statement, we disable the possible
+       * dse.  */
+      if (gimple_code (stmt) != GIMPLE_CALL
+         && gimple_code (stmt) != GIMPLE_ASSIGN)
+       {
+         stores_queue[i] = NULL;
+         continue;
+       }
+
+      tree lhs = gimple_get_lhs (stores_queue[i]);
+      if (!lhs)
+       continue;
+      switch (TREE_CODE (lhs))
+       {
+         /* Handle the array like a[i_18]:
+            1.if i_18 is changed in stmt which means lhs of stmt == i_18,
+              we remove the store from the stores_queue.
+
+            2.a or a[i_18] is used in stmt, we remove the store from the
+              stores_queue.  */
+         case ARRAY_REF: {
+           if (gimple_get_lhs (stmt)
+               && operand_equal_p (gimple_get_lhs (stmt),
+                                   TREE_OPERAND (lhs, 1), 0))
+             {
+               stores_queue[i] = NULL;
+               continue;
+             }
+
+           for (unsigned int j = 0; j < gimple_num_ops (stmt); ++j)
+             {
+               tree op = gimple_op (stmt, j);
+               if (!op)
+                 continue;
+
+               /* We only check the use op.  */
+               if (op == gimple_get_lhs (stmt))
+                 continue;
+
+               if (TREE_OPERAND_LENGTH (op) == 0)
+                 {
+                   if (operand_equal_p (op, lhs, 0)
+                       || operand_equal_p (op, TREE_OPERAND (lhs, 0), 0))
+                     stores_queue[i] = NULL;
+                 }
+               else
+                 {
+                   for (int k = 0; k < TREE_OPERAND_LENGTH (op); ++k)
+                     {
+                       if (!TREE_OPERAND (op, k))
+                         continue;
+
+                       if (TREE_CODE (op) == ARRAY_REF)
+                         {
+                           /*
+                           Disable optimization like this:
+                             a[i_18] = _5;
+                             ...
+                             foo (a[i_18]).
+
+                           Don't disable optimization like this:
+                             a[i_18] = _5;
+                             ...
+                             foo (a[i_12]).
+                           */
+                           if (operand_equal_p (op, lhs, OEP_ADDRESS_OF))
+                             {
+                               stores_queue[i] = NULL;
+                               break;
+                             }
+                         }
+                       else
+                         {
+                           /* To be conservative, if TREE_OPERAND (op, k)
+                              length > 1. Disable the possible dse
+                              optimization.
+                           Disable optimization like this:
+                             a[i_18] = _5;
+                             ...
+                             foo (&a).
+
+                           Or
+                             a[i_18] = _5;
+                             ...
+                             foo (test (&a)).
+                           */
+                           if (TREE_OPERAND_LENGTH (TREE_OPERAND (op, k)) > 0
+                               || operand_equal_p (TREE_OPERAND (op, k), lhs,
+                                                   0)
+                               || operand_equal_p (TREE_OPERAND (op, k),
+                                                   TREE_OPERAND (lhs, 0), 0))
+                             {
+                               stores_queue[i] = NULL;
+                               break;
+                             }
+                         }
+                     }
+                 }
+               if (!stores_queue[i])
+                 break;
+             }
+         }
+         break;
+       default:
+         gcc_unreachable ();
+       }
+    }
+}
+
 /* Attempt to eliminate dead stores in the statement referenced by BSI.
 
    A dead store is a store into a memory location which will later be
@@ -1344,7 +1524,8 @@ dse_optimize_call (gimple_stmt_iterator *gsi, sbitmap 
live_bytes)
    post dominates the first store, then the first store is dead.  */
 
 static void
-dse_optimize_stmt (function *fun, gimple_stmt_iterator *gsi, sbitmap 
live_bytes)
+dse_optimize_stmt (function *fun, gimple_stmt_iterator *gsi, sbitmap 
live_bytes,
+                  auto_vec<gimple *> &stores_queue)
 {
   gimple *stmt = gsi_stmt (*gsi);
 
@@ -1469,7 +1650,25 @@ dse_optimize_stmt (function *fun, gimple_stmt_iterator 
*gsi, sbitmap live_bytes)
                                         byte_tracking_enabled,
                                         live_bytes, &by_clobber_p);
       if (store_status == DSE_STORE_LIVE)
-       return;
+       {
+         if (gimple_assign_single_p (stmt) && !gimple_has_side_effects (stmt)
+             && !is_ctrl_altering_stmt (stmt)
+             && (!stmt_could_throw_p (fun, stmt)
+                 || fun->can_delete_dead_exceptions))
+           {
+             if (dse_search_def_stores (fun, stores_queue, stmt))
+               ;
+             else if (dse_can_def_ref_p (stmt))
+               {
+                 stores_queue.safe_push (stmt);
+                 return;
+               }
+             else
+               return;
+           }
+         else
+           return;
+       }
 
       if (store_status == DSE_STORE_MAYBE_PARTIAL_DEAD)
        {
@@ -1566,13 +1765,17 @@ pass_dse::execute (function *fun)
     {
       basic_block bb = BASIC_BLOCK_FOR_FN (fun, rpo[i-1]);
       gimple_stmt_iterator gsi;
+      /* Queue the stores which potentially updates mem of a previous
+        redundant store.  We only do the optimization within a basic block.  */
+      auto_vec<gimple *> stores_queue;
 
       for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi);)
        {
          gimple *stmt = gsi_stmt (gsi);
+         dse_def_ref_analysis (stmt, stores_queue);
 
          if (gimple_vdef (stmt))
-           dse_optimize_stmt (fun, &gsi, live_bytes);
+           dse_optimize_stmt (fun, &gsi, live_bytes, stores_queue);
          else if (def_operand_p
                     def_p = single_ssa_def_operand (stmt, SSA_OP_DEF))
            {
-- 
2.36.1

Reply via email to