For thumb2, popping a single low register off the stack should prefer
POP over LDR to mirror the behaviour of the PUSH on entry.  This saves
a couple of bytes in the resulting image.  This is a relatively niche
case as it's rare to push a single low register onto the stack, but
still worth getting right.

Whilst fixing this I've also restructured the code here somewhat to
fix a bug I observed by inspection and to improve the code slightly.

Firstly, the single register case is hoisted above the main loop.
This not only avoids creating some RTL that immediately becomes
garbage but also avoids us needing to check for this case in every
iteration of the main loop body.

Secondly, we iterate over just the non-zero bits in the reg mask
rather than every bit and then checking if there's work to do for that
bit.

Finally, when emitting a pop that also pops SP off the stack we
shouldn't be emitting a stack-adjust CFA note.  The new SP value comes
from the popped value, not from an adjustment of the previous SP
value.

gcc:
        PR target/118089
        * config/arm/arm.cc (arm_emit_multi_reg_pop): Restructure.
        Don't emit LDR on thumb2 when POP can be used for smaller code.
        Don't add a CFA adjust note when SP is popped off the stack.

gcc/testsuite:
        PR target/118089
        * gcc.target/arm/thumb2-pop-loreg.c: New test.
---
 gcc/config/arm/arm.cc                         | 99 +++++++++++--------
 .../gcc.target/arm/thumb2-pop-loreg.c         | 18 ++++
 2 files changed, 75 insertions(+), 42 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/arm/thumb2-pop-loreg.c

diff --git a/gcc/config/arm/arm.cc b/gcc/config/arm/arm.cc
index 503401544cb..a95ddf8201f 100644
--- a/gcc/config/arm/arm.cc
+++ b/gcc/config/arm/arm.cc
@@ -22543,24 +22543,50 @@ static void
 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
 {
   int num_regs = 0;
-  int i, j;
   rtx par;
   rtx dwarf = NULL_RTX;
   rtx tmp, reg;
   bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
   int offset_adj;
   int emit_update;
+  unsigned long reg_bits;
 
   offset_adj = return_in_pc ? 1 : 0;
-  for (i = 0; i <= LAST_ARM_REGNUM; i++)
-    if (saved_regs_mask & (1 << i))
-      num_regs++;
+  for (reg_bits = saved_regs_mask; reg_bits;
+       reg_bits &= ~(reg_bits & -reg_bits))
+    num_regs++;
 
   gcc_assert (num_regs && num_regs <= 16);
 
   /* If SP is in reglist, then we don't emit SP update insn.  */
   emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
 
+  /* If popping just one register, use LDR reg, [SP], #4, unless
+     we're generating Thumb code and reg is a low reg.  */
+  if (num_regs == 1
+      && emit_update
+      && !return_in_pc
+      && (TARGET_ARM
+         /* For Thumb we want to use POP for a single low register.  */
+         || (saved_regs_mask & ~0xff)))
+    {
+      int i = exact_log2 (saved_regs_mask);
+
+      rtx dwarf_reg = reg = gen_rtx_REG (SImode, i);
+      if (arm_current_function_pac_enabled_p () && i == IP_REGNUM)
+       dwarf_reg = gen_rtx_REG (SImode, RA_AUTH_CODE);
+      /* Emit single load with writeback.       */
+      tmp = gen_frame_mem (SImode,
+                          gen_rtx_POST_INC (Pmode,
+                                            stack_pointer_rtx));
+      tmp = emit_insn (gen_rtx_SET (reg, tmp));
+      REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, dwarf_reg,
+                                       dwarf);
+      arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD,
+                                  stack_pointer_rtx, stack_pointer_rtx);
+      return;
+    }
+
   /* The parallel needs to hold num_regs SETs
      and one SET for the stack update.  */
   par = gen_rtx_PARALLEL (VOIDmode,
@@ -22582,50 +22608,39 @@ arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
     }
 
   /* Now restore every reg, which may include PC.  */
-  for (j = 0, i = 0; j < num_regs; i++)
-    if (saved_regs_mask & (1 << i))
-      {
-       rtx dwarf_reg = reg = gen_rtx_REG (SImode, i);
-       if (arm_current_function_pac_enabled_p () && i == IP_REGNUM)
-         dwarf_reg = gen_rtx_REG (SImode, RA_AUTH_CODE);
-       if ((num_regs == 1) && emit_update && !return_in_pc)
-         {
-           /* Emit single load with writeback.  */
-           tmp = gen_frame_mem (SImode,
-                                gen_rtx_POST_INC (Pmode,
-                                                  stack_pointer_rtx));
-           tmp = emit_insn (gen_rtx_SET (reg, tmp));
-           REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, dwarf_reg,
-                                             dwarf);
-           arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD,
-                                        stack_pointer_rtx, stack_pointer_rtx);
-           return;
-         }
-
-       tmp = gen_rtx_SET (reg,
-                          gen_frame_mem
-                          (SImode,
-                           plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
-       RTX_FRAME_RELATED_P (tmp) = 1;
-       XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
-
-       /* We need to maintain a sequence for DWARF info too.  As dwarf info
-          should not have PC, skip PC.  */
-       if (i != PC_REGNUM)
-         dwarf = alloc_reg_note (REG_CFA_RESTORE, dwarf_reg, dwarf);
+  int j = 0;
+  int elt = emit_update + offset_adj;
+  for (reg_bits = saved_regs_mask; reg_bits;
+       reg_bits &= ~(reg_bits & -reg_bits))
+    {
+      int i = exact_log2 (reg_bits & -reg_bits);
+      rtx dwarf_reg = reg = gen_rtx_REG (SImode, i);
 
-       j++;
-      }
+      if (i == IP_REGNUM && arm_current_function_pac_enabled_p ())
+       dwarf_reg = gen_rtx_REG (SImode, RA_AUTH_CODE);
+      tmp = gen_rtx_SET (reg,
+                        gen_frame_mem
+                        (SImode,
+                         plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
+      RTX_FRAME_RELATED_P (tmp) = 1;
+      XVECEXP (par, 0, elt) = tmp;
 
-  if (return_in_pc)
-    par = emit_jump_insn (par);
-  else
-    par = emit_insn (par);
+      /* We need to maintain a sequence for DWARF info too.  As dwarf info
+        should not have PC, skip PC.    */
+      if (i != PC_REGNUM)
+       dwarf = alloc_reg_note (REG_CFA_RESTORE, dwarf_reg, dwarf);
+      j++;
+      elt++;
+    }
 
+  par = return_in_pc ? emit_jump_insn (par) : emit_insn (par);
   REG_NOTES (par) = dwarf;
-  if (!return_in_pc)
+
+  if (!return_in_pc && emit_update)
     arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
                                 stack_pointer_rtx, stack_pointer_rtx);
+  else if (!return_in_pc)
+    RTX_FRAME_RELATED_P (par) = 1;
 }
 
 /* Generate and emit an insn pattern that we will recognize as a pop_multi
diff --git a/gcc/testsuite/gcc.target/arm/thumb2-pop-loreg.c 
b/gcc/testsuite/gcc.target/arm/thumb2-pop-loreg.c
new file mode 100644
index 00000000000..6db66b84cd9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/thumb2-pop-loreg.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-require-effective_target arm_thumb2_ok } */
+/* { dg-options "-Os" } */
+
+int __attribute__((noinline)) f (void)
+{
+  asm ("");
+}
+
+int g (void)
+{
+  char buf[32];
+  register char *x asm ("r4") = buf;
+  asm volatile ("" : : "r" (x));
+  return f();
+}
+/* Unstacking a single low register in thumb2 should use POP.  */
+/* { dg-final { scan-assembler "pop\t{r4}" } } */
-- 
2.34.1

Reply via email to