This resurrects a patch from a bit over 2 years ago that I never wrapped up. IIRC, I ended up up catching covid, then in the hospital for an unrelated issue and it just got dropped on the floor in the insanity.

The basic idea here is to help postreload-cse eliminate more const/copies by recording a small set of conditional equivalences (as Richi said in 2022, "Ick").

It was originally to help eliminate an unnecessary constant load I saw in coremark, but as seen in BZ107455 the same issues show up in real code as well.

Changes since v2:

  - Simplified logic for blocks to examine
  - Remove redundant tests when filtering blocks to examine
  - Remove bogus check which only allowed reg->reg copies


Changes since v1:

Richard B and Richard S both had good comments last time around and their requests are reflected in this update:

  - Use rtx_equal_p rather than pointer equality
  - Restrict to register "destinations"
  - Restrict to integer modes
  - Adjust entry block handling

My own wider scale testing resulted in a few more changes.

  - Robustify extracting the (set (pc) ... ), which then required ...
  - Handle if src/dst are clobbered by the conditional branch
  - Fix logic error causing too many equivalences to be recorded

Pushing to the trunk.
Jeff

commit d23d338da4d2bd581b2d3fd97785dd2c26053a92
Author: Jeff Law <j...@ventanamicro.com>
Date:   Mon Jan 13 07:29:39 2025 -0700

    [PR rtl-optimization/107455] Eliminate unnecessary constant load
    
    This resurrects a patch from a bit over 2 years ago that I never wrapped up.
    IIRC, I ended up up catching covid, then in the hospital for an unrelated 
issue
    and it just got dropped on the floor in the insanity.
    
    The basic idea here is to help postreload-cse eliminate more const/copies by
    recording a small set of conditional equivalences (as Richi said in 2022,
    "Ick").
    
    It was originally to help eliminate an unnecessary constant load I saw in
    coremark, but as seen in BZ107455 the same issues show up in real code as 
well.
    
    Bootstrapped and regression tested on x86-64, also been through multiple 
spins
    in my tester.
    
    Changes since v2:
    
      - Simplified logic for blocks to examine
      - Remove redundant tests when filtering blocks to examine
      - Remove bogus check which only allowed reg->reg copies
    
    Changes since v1:
    
    Richard B and Richard S both had good comments last time around and their
    requests are reflected in this update:
    
      - Use rtx_equal_p rather than pointer equality
      - Restrict to register "destinations"
      - Restrict to integer modes
      - Adjust entry block handling
    
    My own wider scale testing resulted in a few more changes.
    
      - Robustify extracting the (set (pc) ... ), which then required ...
      - Handle if src/dst are clobbered by the conditional branch
      - Fix logic error causing too many equivalences to be recorded
    
            PR rtl-optimization/107455
    gcc/
            * postreload.cc (reload_cse_regs_1): Take advantage of conditional
            equivalences.
    
    gcc/testsuite
            * gcc.target/riscv/pr107455-1.c: New test.
            * gcc.target/riscv/pr107455-2.c: New test.

diff --git a/gcc/postreload.cc b/gcc/postreload.cc
index 05c2e0e2644..487aa8aad05 100644
--- a/gcc/postreload.cc
+++ b/gcc/postreload.cc
@@ -33,6 +33,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "emit-rtl.h"
 #include "recog.h"
 
+#include "cfghooks.h"
 #include "cfgrtl.h"
 #include "cfgbuild.h"
 #include "cfgcleanup.h"
@@ -221,13 +222,107 @@ reload_cse_regs_1 (void)
   init_alias_analysis ();
 
   FOR_EACH_BB_FN (bb, cfun)
-    FOR_BB_INSNS (bb, insn)
-      {
-       if (INSN_P (insn))
-         cfg_changed |= reload_cse_simplify (insn, testreg);
+    {
+      /* If BB has a small number of predecessors, see if each of the
+        has the same implicit set.  If so, record that implicit set so
+        that we can add it to the cselib tables.  */
+      rtx_insn *implicit_set;
 
-       cselib_process_insn (insn);
-      }
+      implicit_set = NULL;
+      if (EDGE_COUNT (bb->preds) <= 3)
+       {
+         edge e;
+         edge_iterator ei;
+         rtx src = NULL_RTX;
+         rtx dest = NULL_RTX;
+
+         /* Iterate over each incoming edge and see if they
+            all have the same implicit set.  */
+         FOR_EACH_EDGE (e, ei, bb->preds)
+           {
+             /* Skip the entry/exit block.  */
+             if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
+               break;
+
+             /* Verify this block ends with a suitable condjump  */
+             rtx_insn *condjump = BB_END (e->src);
+             if (!condjump || ! any_condjump_p (condjump))
+               break;
+
+             /* This predecessor ends with a possible equivalence
+                producing conditional branch.  Extract the condition
+                and try to use it to create an equivalence.  */
+             rtx pat = pc_set (condjump);
+             rtx i_t_e = SET_SRC (pat);
+             gcc_assert (GET_CODE (i_t_e) == IF_THEN_ELSE);
+             rtx cond = XEXP (i_t_e, 0);
+
+             if ((((e->flags & EDGE_FALLTHRU) != 0)
+                  == (XEXP (i_t_e, 1) == pc_rtx))
+                 ? GET_CODE (cond) == EQ
+                 : GET_CODE (cond) == NE)
+               {
+                 /* If this is the first time through record
+                    the source and destination.  */
+                 if (!dest)
+                   {
+                     dest = XEXP (cond, 0);
+                     src = XEXP (cond, 1);
+                   }
+                 /* If this is not the first time through, then
+                    verify the source and destination match.  */
+                 else if (rtx_equal_p (dest, XEXP (cond, 0))
+                          && rtx_equal_p (src, XEXP (cond, 1)))
+                   ;
+                 else
+                   break;
+
+                 /* A few more checks.  First make sure we're dealing with
+                    integer modes, second make sure the values aren't clobbered
+                    by the conditional branch itself.  Do this for every
+                    conditional jump participating in creation of the
+                    equivalence.  */
+                 if (!REG_P (dest)
+                     || !(REG_P (src) || CONST_INT_P (src))
+                     || GET_MODE_CLASS (GET_MODE (dest)) != MODE_INT
+                     || reg_set_p (dest, condjump)
+                     || reg_set_p (src, condjump))
+                   break;
+
+               }
+             else
+               break;
+           }
+
+         /* If all the incoming edges had the same implicit
+            set, then create a dummy insn for that set.
+
+            It will be entered into the cselib tables before
+            we process the first real insn in this block.  */
+         if (dest && ei_end_p (ei))
+           implicit_set = make_insn_raw (gen_rtx_SET (dest, src));
+       }
+
+      FOR_BB_INSNS (bb, insn)
+       {
+         if (INSN_P (insn))
+           {
+             /* If we recorded an implicit set, enter it
+                into the tables before the first real insn.
+
+                We have to do it this way because a CODE_LABEL
+                will flush the cselib tables.  */
+             if (implicit_set)
+               {
+                 cselib_process_insn (implicit_set);
+                 implicit_set = NULL;
+               }
+             cfg_changed |= reload_cse_simplify (insn, testreg);
+           }
+
+         cselib_process_insn (insn);
+       }
+    }
 
   /* Clean up.  */
   end_alias_analysis ();
diff --git a/gcc/testsuite/gcc.target/riscv/pr107455-1.c 
b/gcc/testsuite/gcc.target/riscv/pr107455-1.c
new file mode 100644
index 00000000000..59616b89176
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/pr107455-1.c
@@ -0,0 +1,46 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-dp" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-O1" "-O2" "-O3" "-Og" } } */
+
+
+typedef struct dllist
+{
+  int i;
+  struct dllist *ptr_to_next;
+  struct dllist *ptr_to_previous;
+} dllist;
+
+int sglib_dllist_len(dllist *list) {
+    int res;
+    dllist *_dl_;
+    int _r1_, _r2_;
+    if (list== ((void *)0)) {
+        res = 0;
+    } else { 
+        dllist *_ce_;
+        dllist *_ne_;
+        _r1_ = 0;
+        _ce_ = list;
+        while (_ce_!= ((void *)0)) {
+            _ne_ = _ce_->ptr_to_previous;
+            _r1_++;
+            _ce_ = _ne_;
+        }
+        _dl_ = list->ptr_to_next;
+        _r2_ = 0;
+        _ce_ = _dl_;
+        while (_ce_!= (void *)0) {
+            _ne_ = _ce_->ptr_to_next;
+            _r2_++;
+            _ce_ = _ne_;
+        }
+        res = _r1_ + _r2_;
+    }
+    return res;
+}
+
+
+/* There was an unnecessary assignment to the return value until
+   recently.  Scan for that in the resulting output.  */
+/* { dg-final { scan-assembler-times "li\\ta0,0" 1 } } */
+
diff --git a/gcc/testsuite/gcc.target/riscv/pr107455-2.c 
b/gcc/testsuite/gcc.target/riscv/pr107455-2.c
new file mode 100644
index 00000000000..91106bb5d80
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/pr107455-2.c
@@ -0,0 +1,40 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -dp" } */
+/* This was extracted from coremark.  */
+
+
+typedef signed short ee_s16;
+typedef struct list_data_s
+{
+    ee_s16 data16;
+    ee_s16 idx;
+} list_data;
+
+typedef struct list_head_s
+{
+    struct list_head_s *next;
+    struct list_data_s *info;
+} list_head;
+
+
+list_head *
+core_list_find(list_head *list, list_data *info)
+{
+    if (info->idx >= 0)
+    {
+        while (list && (list->info->idx != info->idx))
+            list = list->next;
+        return list;
+    }
+    else
+    {
+        while (list && ((list->info->data16 & 0xff) != info->data16))
+            list = list->next;
+        return list;
+    }
+}
+
+/* There was an unnecessary assignment to the return value until
+   recently.  Scan for that in the resulting output.  */
+/* { dg-final { scan-assembler-not "li\\ta0,0" } } */
+

Reply via email to