For thumb2, popping a single low register off the stack should prefer POP over LDR to mirror the behaviour of the PUSH on entry. This saves a couple of bytes in the resulting image. This is a relatively niche case as it's rare to push a single low register onto the stack, but still worth getting right.
Whilst fixing this I've also restructured the code here somewhat to fix a bug I observed by inspection and to improve the code slightly. Firstly, the single register case is hoisted above the main loop. This not only avoids creating some RTL that immediately becomes garbage but also avoids us needing to check for this case in every iteration of the main loop body. Secondly, we iterate over just the non-zero bits in the reg mask rather than every bit and then checking if there's work to do for that bit. Finally, when emitting a pop that also pops SP off the stack we shouldn't be emitting a stack-adjust CFA note. The new SP value comes from the popped value, not from an adjustment of the previous SP value. gcc: PR target/118089 * config/arm/arm.cc (arm_emit_multi_reg_pop): Restructure. Don't emit LDR on thumb2 when POP can be used for smaller code. Don't add a CFA adjust note when SP is popped off the stack. gcc/testsuite: PR target/118089 * gcc.target/arm/thumb2-pop-loreg.c: New test. --- gcc/config/arm/arm.cc | 99 +++++++++++-------- .../gcc.target/arm/thumb2-pop-loreg.c | 18 ++++ 2 files changed, 75 insertions(+), 42 deletions(-) create mode 100644 gcc/testsuite/gcc.target/arm/thumb2-pop-loreg.c diff --git a/gcc/config/arm/arm.cc b/gcc/config/arm/arm.cc index 503401544cb..a95ddf8201f 100644 --- a/gcc/config/arm/arm.cc +++ b/gcc/config/arm/arm.cc @@ -22543,24 +22543,50 @@ static void arm_emit_multi_reg_pop (unsigned long saved_regs_mask) { int num_regs = 0; - int i, j; rtx par; rtx dwarf = NULL_RTX; rtx tmp, reg; bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM); int offset_adj; int emit_update; + unsigned long reg_bits; offset_adj = return_in_pc ? 1 : 0; - for (i = 0; i <= LAST_ARM_REGNUM; i++) - if (saved_regs_mask & (1 << i)) - num_regs++; + for (reg_bits = saved_regs_mask; reg_bits; + reg_bits &= ~(reg_bits & -reg_bits)) + num_regs++; gcc_assert (num_regs && num_regs <= 16); /* If SP is in reglist, then we don't emit SP update insn. */ emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1; + /* If popping just one register, use LDR reg, [SP], #4, unless + we're generating Thumb code and reg is a low reg. */ + if (num_regs == 1 + && emit_update + && !return_in_pc + && (TARGET_ARM + /* For Thumb we want to use POP for a single low register. */ + || (saved_regs_mask & ~0xff))) + { + int i = exact_log2 (saved_regs_mask); + + rtx dwarf_reg = reg = gen_rtx_REG (SImode, i); + if (arm_current_function_pac_enabled_p () && i == IP_REGNUM) + dwarf_reg = gen_rtx_REG (SImode, RA_AUTH_CODE); + /* Emit single load with writeback. */ + tmp = gen_frame_mem (SImode, + gen_rtx_POST_INC (Pmode, + stack_pointer_rtx)); + tmp = emit_insn (gen_rtx_SET (reg, tmp)); + REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, dwarf_reg, + dwarf); + arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD, + stack_pointer_rtx, stack_pointer_rtx); + return; + } + /* The parallel needs to hold num_regs SETs and one SET for the stack update. */ par = gen_rtx_PARALLEL (VOIDmode, @@ -22582,50 +22608,39 @@ arm_emit_multi_reg_pop (unsigned long saved_regs_mask) } /* Now restore every reg, which may include PC. */ - for (j = 0, i = 0; j < num_regs; i++) - if (saved_regs_mask & (1 << i)) - { - rtx dwarf_reg = reg = gen_rtx_REG (SImode, i); - if (arm_current_function_pac_enabled_p () && i == IP_REGNUM) - dwarf_reg = gen_rtx_REG (SImode, RA_AUTH_CODE); - if ((num_regs == 1) && emit_update && !return_in_pc) - { - /* Emit single load with writeback. */ - tmp = gen_frame_mem (SImode, - gen_rtx_POST_INC (Pmode, - stack_pointer_rtx)); - tmp = emit_insn (gen_rtx_SET (reg, tmp)); - REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, dwarf_reg, - dwarf); - arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD, - stack_pointer_rtx, stack_pointer_rtx); - return; - } - - tmp = gen_rtx_SET (reg, - gen_frame_mem - (SImode, - plus_constant (Pmode, stack_pointer_rtx, 4 * j))); - RTX_FRAME_RELATED_P (tmp) = 1; - XVECEXP (par, 0, j + emit_update + offset_adj) = tmp; - - /* We need to maintain a sequence for DWARF info too. As dwarf info - should not have PC, skip PC. */ - if (i != PC_REGNUM) - dwarf = alloc_reg_note (REG_CFA_RESTORE, dwarf_reg, dwarf); + int j = 0; + int elt = emit_update + offset_adj; + for (reg_bits = saved_regs_mask; reg_bits; + reg_bits &= ~(reg_bits & -reg_bits)) + { + int i = exact_log2 (reg_bits & -reg_bits); + rtx dwarf_reg = reg = gen_rtx_REG (SImode, i); - j++; - } + if (i == IP_REGNUM && arm_current_function_pac_enabled_p ()) + dwarf_reg = gen_rtx_REG (SImode, RA_AUTH_CODE); + tmp = gen_rtx_SET (reg, + gen_frame_mem + (SImode, + plus_constant (Pmode, stack_pointer_rtx, 4 * j))); + RTX_FRAME_RELATED_P (tmp) = 1; + XVECEXP (par, 0, elt) = tmp; - if (return_in_pc) - par = emit_jump_insn (par); - else - par = emit_insn (par); + /* We need to maintain a sequence for DWARF info too. As dwarf info + should not have PC, skip PC. */ + if (i != PC_REGNUM) + dwarf = alloc_reg_note (REG_CFA_RESTORE, dwarf_reg, dwarf); + j++; + elt++; + } + par = return_in_pc ? emit_jump_insn (par) : emit_insn (par); REG_NOTES (par) = dwarf; - if (!return_in_pc) + + if (!return_in_pc && emit_update) arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs, stack_pointer_rtx, stack_pointer_rtx); + else if (!return_in_pc) + RTX_FRAME_RELATED_P (par) = 1; } /* Generate and emit an insn pattern that we will recognize as a pop_multi diff --git a/gcc/testsuite/gcc.target/arm/thumb2-pop-loreg.c b/gcc/testsuite/gcc.target/arm/thumb2-pop-loreg.c new file mode 100644 index 00000000000..6db66b84cd9 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/thumb2-pop-loreg.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-require-effective_target arm_thumb2_ok } */ +/* { dg-options "-Os" } */ + +int __attribute__((noinline)) f (void) +{ + asm (""); +} + +int g (void) +{ + char buf[32]; + register char *x asm ("r4") = buf; + asm volatile ("" : : "r" (x)); + return f(); +} +/* Unstacking a single low register in thumb2 should use POP. */ +/* { dg-final { scan-assembler "pop\t{r4}" } } */ -- 2.34.1