This patch implements a new RTL pass that combines "li a0, 0" and
"cm.popret" into a single "cm.popretz" instruction for the Zcmp
extension.

This optimization cannot be done during prologue/epilogue expansion
because it would cause shrink-wrapping to generate incorrect code as
documented in PR113715. The dedicated RTL pass runs after shrink-wrap
but before branch shortening, safely performing this combination.

Changes since v1:
- Tweak the testcase.

gcc/ChangeLog:

        * config/riscv/riscv-opt-popretz.cc: New file.
        * config/riscv/riscv-passes.def: Insert pass_combine_popretz before
        pass_shorten_branches.
        * config/riscv/riscv-protos.h (make_pass_combine_popretz): New
        declaration.
        * config/riscv/t-riscv: Add riscv-opt-popretz.o build rule.
        * config.gcc (riscv*): Add riscv-opt-popretz.o to extra_objs.

gcc/testsuite/ChangeLog:

        * gcc.target/riscv/pr113715.c: New test.
        * gcc.target/riscv/rv32e_zcmp.c: Update expected output for
        test_popretz.
        * gcc.target/riscv/rv32i_zcmp.c: Likewise.
---
 gcc/config.gcc                              |   2 +-
 gcc/config/riscv/riscv-opt-popretz.cc       | 294 ++++++++++++++++++++
 gcc/config/riscv/riscv-passes.def           |   1 +
 gcc/config/riscv/riscv-protos.h             |   1 +
 gcc/config/riscv/t-riscv                    |   6 +
 gcc/testsuite/gcc.target/riscv/pr113715.c   |  61 ++++
 gcc/testsuite/gcc.target/riscv/rv32e_zcmp.c |   3 +-
 gcc/testsuite/gcc.target/riscv/rv32i_zcmp.c |   3 +-
 8 files changed, 366 insertions(+), 5 deletions(-)
 create mode 100644 gcc/config/riscv/riscv-opt-popretz.cc
 create mode 100644 gcc/testsuite/gcc.target/riscv/pr113715.c

diff --git a/gcc/config.gcc b/gcc/config.gcc
index b0fa43b5eba..b61a452016b 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -559,7 +559,7 @@ riscv*)
        extra_objs="riscv-builtins.o riscv-c.o riscv-sr.o 
riscv-shorten-memrefs.o riscv-selftests.o riscv-string.o"
        extra_objs="${extra_objs} riscv-v.o riscv-vsetvl.o riscv-vector-costs.o 
riscv-avlprop.o riscv-vect-permconst.o"
        extra_objs="${extra_objs} riscv-vector-builtins.o 
riscv-vector-builtins-shapes.o riscv-vector-builtins-bases.o 
sifive-vector-builtins-bases.o andes-vector-builtins-bases.o"
-       extra_objs="${extra_objs} thead.o riscv-target-attr.o riscv-zicfilp.o 
riscv-bclr-lowest-set-bit.o"
+       extra_objs="${extra_objs} thead.o riscv-target-attr.o riscv-zicfilp.o 
riscv-bclr-lowest-set-bit.o riscv-opt-popretz.o"
        d_target_objs="riscv-d.o"
        extra_headers="riscv_vector.h riscv_crypto.h riscv_bitmanip.h 
riscv_th_vector.h sifive_vector.h andes_vector.h"
        target_gtfiles="$target_gtfiles 
\$(srcdir)/config/riscv/riscv-vector-builtins.cc"
diff --git a/gcc/config/riscv/riscv-opt-popretz.cc 
b/gcc/config/riscv/riscv-opt-popretz.cc
new file mode 100644
index 00000000000..54e964f263e
--- /dev/null
+++ b/gcc/config/riscv/riscv-opt-popretz.cc
@@ -0,0 +1,294 @@
+/* RISC-V cm.popretz optimization pass.
+   Copyright (C) 2025 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/*
+   This pass combines "li a0, 0" + "cm.popret" into "cm.popretz" instruction
+   for the RISC-V Zcmp extension.
+
+   Rationale:
+   ---------
+   Ideally, cm.popretz should be generated during prologue/epilogue expansion.
+   However, as documented in PR113715 [1], this approach causes shrink-wrapping
+   analysis to fail, resulting in incorrect code generation.
+
+   To address this issue, we use a dedicated RTL pass to combine these
+   instructions later in the compilation pipeline, after shrink-wrapping has
+   completed.
+
+   Why not use peephole2?
+   ----------------------
+   An alternative approach would be to use a peephole2 pattern to perform this
+   optimization. However, between "li a0, 0" and "cm.popret", there can be
+   STACK_TIE and other instructions that make it difficult to write a robust
+   peephole pattern that handles all cases.
+
+   For example, in RV32, when the return value is in DImode but the low part
+   (a0) is zero, this pattern is hard to describe effectively in peephole2.
+   Using a dedicated pass gives us more flexibility to handle these cases.
+
+   [1] https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113715  */
+
+#define IN_TARGET_CODE 1
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "backend.h"
+#include "target.h"
+#include "rtl.h"
+#include "tree.h"
+#include "tm_p.h"
+#include "emit-rtl.h"
+#include "dumpfile.h"
+#include "tree-pass.h"
+#include "insn-config.h"
+#include "insn-opinit.h"
+#include "recog.h"
+
+namespace {
+
+const pass_data pass_data_combine_popretz =
+{
+  RTL_PASS, /* type. */
+  "popretz", /* name. */
+  OPTGROUP_NONE, /* optinfo_flags. */
+  TV_MACH_DEP, /* tv_id. */
+  0, /* properties_required. */
+  0, /* properties_provided. */
+  0, /* properties_destroyed. */
+  0, /* todo_flags_start. */
+  0, /* todo_flags_finish. */
+};
+
+class pass_combine_popretz : public rtl_opt_pass
+{
+public:
+  pass_combine_popretz (gcc::context *ctxt)
+    : rtl_opt_pass (pass_data_combine_popretz, ctxt)
+  {}
+
+  virtual bool gate (function *)
+    {
+      return TARGET_ZCMP && !frame_pointer_needed;
+    }
+
+  virtual unsigned int execute (function *);
+}; // class pass_combine_popretz
+
+
+/* Check if the given instruction code is a cm.popret instruction.
+   Returns true if the code corresponds to any variant of gpr_multi_popret
+   (for different register bounds and modes).  */
+static bool
+riscv_popret_insn_p (int code)
+{
+#define CASE_CODE_FOR_POPRET_(REG_BOUND, MODE) \
+  case CODE_FOR_gpr_multi_popret_up_to_##REG_BOUND##_##MODE:
+#define CASE_CODE_FOR_POPRET(REG_BOUND) \
+  CASE_CODE_FOR_POPRET_(REG_BOUND, si) \
+  CASE_CODE_FOR_POPRET_(REG_BOUND, di)
+#define ALL_CASE_CODE_FOR_POPRET \
+  CASE_CODE_FOR_POPRET(ra) \
+  CASE_CODE_FOR_POPRET(s0) \
+  CASE_CODE_FOR_POPRET(s1) \
+  CASE_CODE_FOR_POPRET(s2) \
+  CASE_CODE_FOR_POPRET(s3) \
+  CASE_CODE_FOR_POPRET(s4) \
+  CASE_CODE_FOR_POPRET(s5) \
+  CASE_CODE_FOR_POPRET(s6) \
+  CASE_CODE_FOR_POPRET(s7) \
+  CASE_CODE_FOR_POPRET(s8) \
+  CASE_CODE_FOR_POPRET(s9) \
+  CASE_CODE_FOR_POPRET(s11) \
+
+  switch (code)
+    {
+    ALL_CASE_CODE_FOR_POPRET
+      return true;
+    default:
+      return false;
+    }
+
+#undef CASE_CODE_FOR_POPRET_
+#undef CASE_CODE_FOR_POPRET
+#undef ALL_CASE_CODE_FOR_POPRET
+}
+
+/* Convert a cm.popret instruction code to its corresponding cm.popretz code.
+   Given an instruction code for gpr_multi_popret, returns the equivalent
+   gpr_multi_popretz instruction code. Returns CODE_FOR_nothing if the
+   input is not a valid popret instruction.  */
+static int
+riscv_code_for_popretz (int code)
+{
+#define CASE_CODE_FOR_POPRETZ_(REG_BOUND, MODE) \
+  case CODE_FOR_gpr_multi_popret_up_to_##REG_BOUND##_##MODE: \
+    return CODE_FOR_gpr_multi_popretz_up_to_##REG_BOUND##_##MODE;
+
+#define CASE_CODE_FOR_POPRETZ(REG_BOUND) \
+  CASE_CODE_FOR_POPRETZ_(REG_BOUND, si) \
+  CASE_CODE_FOR_POPRETZ_(REG_BOUND, di)
+
+#define ALL_CASE_CODE_FOR_POPRETZ \
+  CASE_CODE_FOR_POPRETZ(ra) \
+  CASE_CODE_FOR_POPRETZ(s0) \
+  CASE_CODE_FOR_POPRETZ(s1) \
+  CASE_CODE_FOR_POPRETZ(s2) \
+  CASE_CODE_FOR_POPRETZ(s3) \
+  CASE_CODE_FOR_POPRETZ(s4) \
+  CASE_CODE_FOR_POPRETZ(s5) \
+  CASE_CODE_FOR_POPRETZ(s6) \
+  CASE_CODE_FOR_POPRETZ(s7) \
+  CASE_CODE_FOR_POPRETZ(s8) \
+  CASE_CODE_FOR_POPRETZ(s9) \
+  CASE_CODE_FOR_POPRETZ(s11) \
+
+  switch (code)
+    {
+    ALL_CASE_CODE_FOR_POPRETZ
+    default:
+      return CODE_FOR_nothing;
+    }
+
+#undef CASE_CODE_FOR_POPRETZ_
+#undef CASE_CODE_FOR_POPRETZ
+#undef ALL_CASE_CODE_FOR_POPRETZ
+}
+
+/* Combine "li a0, 0" with "cm.popret" to form "cm.popretz".
+
+   This pass scans basic blocks that precede the exit block, looking for
+   the following pattern:
+     1. A cm.popret instruction (function epilogue with return)
+     2. A (use a0) pseudo-instruction before the cm.popret
+     3. A "li a0, 0" instruction (set a0 to zero) before the use
+
+   When this pattern is found AND a0 is not referenced by any other
+   instructions between the "li a0, 0" and the (use a0), we can safely
+   combine them into a single cm.popretz instruction, which performs
+   the same operations more efficiently.
+
+   This is a late RTL pass that runs before branch shortening.  */
+unsigned int
+pass_combine_popretz::execute (function *fn)
+{
+  timevar_push (TV_MACH_DEP);
+  edge e;
+  edge_iterator ei;
+
+  /* Only visit exit block's pred since popret will only appear there.  */
+  FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (fn)->preds)
+    {
+      basic_block bb = e->src;
+      rtx_insn *popret_insn = BB_END (bb);
+      if (!JUMP_P (popret_insn))
+        continue;
+      int code = recog_memoized (popret_insn);
+      if (!riscv_popret_insn_p (code))
+        continue;
+
+      rtx_insn *def_a0_insn = NULL;
+      rtx_insn *use_a0_insn = NULL;
+      rtx a0_reg = NULL;
+      /* Scan backwards from popret to find the pattern:
+         1. First, find the (use a0) pseudo-instruction
+         2. Continue scanning to find "li a0, 0" (set a0 to const0_rtx)
+         3. Ensure a0 is not referenced by any instructions between them
+         4. Stop at the first definition of a0 (to ensure we have the
+            last/most recent def before the use).  */
+      for (rtx_insn *def_insn = PREV_INSN (popret_insn);
+          def_insn && def_insn != PREV_INSN (BB_HEAD (bb));
+          def_insn = PREV_INSN (def_insn))
+       {
+         if (!INSN_P (def_insn))
+           continue;
+         rtx def_pat = PATTERN (def_insn);
+         if (GET_CODE (def_pat) == USE
+             && REG_P (XEXP (def_pat, 0))
+             && REGNO (XEXP (def_pat, 0)) == A0_REGNUM)
+           {
+             a0_reg = XEXP (def_pat, 0);
+             use_a0_insn = def_insn;
+             continue;
+           }
+
+         if (use_a0_insn && reg_referenced_p (a0_reg, def_pat))
+           {
+             /* a0 is used by other instruction before its use in popret.  */
+             use_a0_insn = NULL;
+             break;
+           }
+
+         if (use_a0_insn
+             && GET_CODE (def_pat) == SET
+             && REG_P (SET_DEST (def_pat))
+             && REGNO (SET_DEST (def_pat)) == A0_REGNUM)
+           {
+             if (SET_SRC (def_pat) == const0_rtx)
+               def_a0_insn = def_insn;
+             /* Stop the search regardless of the value assigned to a0,
+                because we only want to match the last (most recent)
+                definition of a0 before the (use a0).  */
+             break;
+           }
+         }
+
+        /* If we found a def of a0 before its use, and the value is zero,
+          we can replace the popret with popretz.  */
+       if (!def_a0_insn || !use_a0_insn)
+         continue;
+
+       int code_for_popretz = riscv_code_for_popretz (code);
+       gcc_assert (code_for_popretz != CODE_FOR_nothing);
+
+       /* Extract the stack adjustment value from the popret instruction.
+          The popret pattern is a PARALLEL, and the first element is the
+          stack pointer adjustment: (set sp (plus sp const_int)).  */
+       rtx stack_adj_rtx = XVECEXP (PATTERN (popret_insn), 0, 0);
+       gcc_assert (GET_CODE (stack_adj_rtx) == SET
+                   && REG_P (SET_DEST (stack_adj_rtx))
+                   && REGNO (SET_DEST (stack_adj_rtx)) == SP_REGNUM
+                   && GET_CODE (SET_SRC (stack_adj_rtx)) == PLUS
+                   && CONST_INT_P (XEXP (SET_SRC (stack_adj_rtx), 1)));
+
+       rtx stack_adj_val = XEXP (SET_SRC (stack_adj_rtx), 1);
+
+       /* Generate and insert the popretz instruction at the position of
+          the original popret. emit_insn_after places the new instruction
+          after PREV_INSN(popret_insn).  */
+       rtx popretz = GEN_FCN (code_for_popretz) (stack_adj_val);
+       emit_insn_after (popretz, PREV_INSN (popret_insn));
+
+       /* Clean up those instructions.  */
+       remove_insn (popret_insn);
+       remove_insn (use_a0_insn);
+       remove_insn (def_a0_insn);
+    }
+
+  timevar_pop (TV_MACH_DEP);
+  return 0;
+}
+
+} // anon namespace
+
+rtl_opt_pass *
+make_pass_combine_popretz (gcc::context *ctxt)
+{
+  return new pass_combine_popretz (ctxt);
+}
diff --git a/gcc/config/riscv/riscv-passes.def 
b/gcc/config/riscv/riscv-passes.def
index 5aa41228e1f..d41cc58c1dc 100644
--- a/gcc/config/riscv/riscv-passes.def
+++ b/gcc/config/riscv/riscv-passes.def
@@ -22,5 +22,6 @@ INSERT_PASS_AFTER (pass_rtl_store_motion, 1, 
pass_shorten_memrefs);
 INSERT_PASS_AFTER (pass_split_all_insns, 1, pass_avlprop);
 INSERT_PASS_BEFORE (pass_fast_rtl_dce, 1, pass_vsetvl);
 INSERT_PASS_BEFORE (pass_shorten_branches, 1, pass_insert_landing_pad);
+INSERT_PASS_BEFORE (pass_shorten_branches, 1, pass_combine_popretz);
 INSERT_PASS_AFTER (pass_cse2, 1, pass_vector_permconst);
 
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 570acb14f58..a372779cf9f 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -208,6 +208,7 @@ rtl_opt_pass * make_pass_vsetvl (gcc::context *ctxt);
 rtl_opt_pass * make_pass_insert_landing_pad (gcc::context *ctxt);
 rtl_opt_pass * make_pass_vector_permconst (gcc::context *ctxt);
 rtl_opt_pass * make_pass_bclr_lowest_set_bit (gcc::context *ctxt);
+rtl_opt_pass * make_pass_combine_popretz (gcc::context *ctxt);
 
 /* Routines implemented in riscv-vsetvl.cc.  */
 extern bool has_vtype_op (rtx_insn *);
diff --git a/gcc/config/riscv/t-riscv b/gcc/config/riscv/t-riscv
index b53a2dff2cf..3f92feab50e 100644
--- a/gcc/config/riscv/t-riscv
+++ b/gcc/config/riscv/t-riscv
@@ -89,6 +89,12 @@ riscv-sr.o: $(srcdir)/config/riscv/riscv-sr.cc $(CONFIG_H) \
        $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
                $(srcdir)/config/riscv/riscv-sr.cc
 
+riscv-opt-popretz.o: $(srcdir)/config/riscv/riscv-opt-popretz.cc $(CONFIG_H) \
+  $(SYSTEM_H) coretypes.h $(TM_H) $(TREE_H) $(TARGET_H) recog.h insn-opinit.h \
+  tree-pass.h emit-rtl.h insn-config.h
+       $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
+               $(srcdir)/config/riscv/riscv-opt-popretz.cc
+
 riscv-c.o: $(srcdir)/config/riscv/riscv-c.cc $(CONFIG_H) $(SYSTEM_H) \
     coretypes.h $(TM_H) $(TREE_H) output.h $(C_COMMON_H) $(TARGET_H)
        $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
diff --git a/gcc/testsuite/gcc.target/riscv/pr113715.c 
b/gcc/testsuite/gcc.target/riscv/pr113715.c
new file mode 100644
index 00000000000..493071c1c7d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/pr113715.c
@@ -0,0 +1,61 @@
+/* { dg-do compile } */
+/* { dg-options " -Os -march=rv32imaf_zca_zcmp -mabi=ilp32f -mcmodel=medlow 
-fno-pic" }*/
+/* { dg-skip-if "" { *-*-* } {"-O0" "-O1" "-O2" "-Og" "-O3" "-flto"} } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+void test_1(int);
+
+/*
+**test_err:
+** ...
+**     li      a0,1
+**     call    test_1
+**     cm.popretz      {ra}, 16
+** ...
+*/
+int test_err(int mode)
+{
+    if (mode == 2) {
+        test_1(1);
+    }
+
+    return 0;
+}
+
+/*
+**test_err2:
+** ...
+**     li      a0,1
+**     call    test_1
+**     li      a1,0
+**     cm.popretz      {ra}, 16
+** ...
+*/
+long long test_err2(int mode)
+{
+    if (mode == 2) {
+        test_1(1);
+    }
+
+    return 0;
+}
+
+
+/*
+**test_err3:
+** ...
+**     li      a0,1
+**     call    test_1
+**     li      a1,1
+**     cm.popretz      {ra}, 16
+** ...
+*/
+long long test_err3(int mode)
+{
+    if (mode == 2) {
+        test_1(1);
+       return 0x100000000ll;
+    }
+
+    return 0;
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rv32e_zcmp.c 
b/gcc/testsuite/gcc.target/riscv/rv32e_zcmp.c
index fd845f53335..8e3a36db586 100644
--- a/gcc/testsuite/gcc.target/riscv/rv32e_zcmp.c
+++ b/gcc/testsuite/gcc.target/riscv/rv32e_zcmp.c
@@ -259,8 +259,7 @@ foo (void)
 **test_popretz:
 **     cm.push {ra}, -16
 **     call    f1(?:@plt)?
-**     li      a0,0
-**     cm.popret       {ra}, 16
+**     cm.popretz      {ra}, 16
 */
 long
 test_popretz ()
diff --git a/gcc/testsuite/gcc.target/riscv/rv32i_zcmp.c 
b/gcc/testsuite/gcc.target/riscv/rv32i_zcmp.c
index d90f4f47c8d..7bcffebacb5 100644
--- a/gcc/testsuite/gcc.target/riscv/rv32i_zcmp.c
+++ b/gcc/testsuite/gcc.target/riscv/rv32i_zcmp.c
@@ -259,8 +259,7 @@ foo (void)
 **test_popretz:
 **     cm.push {ra}, -16
 **     call    f1(?:@plt)?
-**     li      a0,0
-**     cm.popret       {ra}, 16
+**     cm.popretz      {ra}, 16
 */
 long
 test_popretz ()
-- 
2.34.1

Reply via email to