Helper function for epilogue expansion. Emit RTL for APCS frame epilogue
(when -mapcs-frame command line option is specified).
This function is used by a later patch.
For APCS frame epilogue, the compiler currently generates LDM with SP as
both the base register
and one of the destination registers. For example:
@ APCS_FRAME epilogue
ldmfd sp, {r4, fp, sp, pc}
@ non-APCS_FRAME epilogue
ldmfd sp!, {r4, fp, pc}
The use of SP in LDM register list is deprecated, but this patch does not
address the problem.
To generate the epilogue for APCS frame in RTL, this patch adds a new
alternative to arm_addsi2 insn in ARM mode only to generate "sub sp, fp,
#imm". Previously, there was no pattern to generate sub with SP as the
destination register and not SP as the operand register.
ChangeLog:
gcc
2012-05-31 Ian Bolton <ian.bol...@arm.com>
Sameera Deshpande <sameera.deshpa...@arm.com>
Greta Yorsh <greta.yo...@arm.com>
* config/arm/arm.c (arm_expand_epilogue_apcs_frame): New function.
* config/arm/arm.md (arm_addsi3) Add an alternative.
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 491ffea..d6b4c2e 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -22896,6 +22896,232 @@ thumb1_expand_epilogue (void)
emit_use (gen_rtx_REG (SImode, LR_REGNUM));
}
+/* Epilogue code for APCS frame. */
+static void
+arm_expand_epilogue_apcs_frame (bool really_return)
+{
+ unsigned long func_type;
+ unsigned long saved_regs_mask;
+ int num_regs = 0;
+ int i;
+ int floats_from_frame = 0;
+ arm_stack_offsets *offsets;
+
+ gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
+ func_type = arm_current_func_type ();
+
+ /* Get frame offsets for ARM. */
+ offsets = arm_get_frame_offsets ();
+ saved_regs_mask = offsets->saved_regs_mask;
+
+ /* Find the offset of the floating-point save area in the frame. */
+ floats_from_frame = offsets->saved_args - offsets->frame;
+
+ /* Compute how many core registers saved and how far away the floats are. */
+ for (i = 0; i <= LAST_ARM_REGNUM; i++)
+ if (saved_regs_mask & (1 << i))
+ {
+ num_regs++;
+ floats_from_frame += 4;
+ }
+
+ if (TARGET_HARD_FLOAT && TARGET_VFP)
+ {
+ int start_reg;
+
+ /* The offset is from IP_REGNUM. */
+ int saved_size = arm_get_vfp_saved_size ();
+ if (saved_size > 0)
+ {
+ floats_from_frame += saved_size;
+ emit_insn (gen_addsi3 (gen_rtx_REG (SImode, IP_REGNUM),
+ hard_frame_pointer_rtx,
+ GEN_INT (-floats_from_frame)));
+ }
+
+ /* Generate VFP register multi-pop. */
+ start_reg = FIRST_VFP_REGNUM;
+
+ for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
+ /* Look for a case where a reg does not need restoring. */
+ if ((!df_regs_ever_live_p (i) || call_used_regs[i])
+ && (!df_regs_ever_live_p (i + 1)
+ || call_used_regs[i + 1]))
+ {
+ if (start_reg != i)
+ arm_emit_vfp_multi_reg_pop (start_reg,
+ (i - start_reg) / 2,
+ gen_rtx_REG (SImode,
+ IP_REGNUM));
+ start_reg = i + 2;
+ }
+
+ /* Restore the remaining regs that we have discovered (or possibly
+ even all of them, if the conditional in the for loop never
+ fired). */
+ if (start_reg != i)
+ arm_emit_vfp_multi_reg_pop (start_reg,
+ (i - start_reg) / 2,
+ gen_rtx_REG (SImode, IP_REGNUM));
+ }
+ else if (TARGET_FPA_EMU2)
+ {
+ for (i = LAST_FPA_REGNUM; i >= FIRST_FPA_REGNUM; i--)
+ if (df_regs_ever_live_p (i) && !call_used_regs[i])
+ {
+ rtx addr;
+ rtx insn;
+ floats_from_frame += 12;
+ addr = gen_rtx_MEM (XFmode,
+ gen_rtx_PLUS (SImode,
+ hard_frame_pointer_rtx,
+ GEN_INT (- floats_from_frame)));
+ set_mem_alias_set (addr, get_frame_alias_set ());
+ insn = emit_insn (gen_rtx_SET (XFmode,
+ gen_rtx_REG (XFmode, i),
+ addr));
+ REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
+ gen_rtx_REG (XFmode, i),
+ NULL_RTX);
+ }
+ }
+ else
+ {
+ int idx = 0;
+ rtx load_seq[4];
+ rtx dwarf = NULL_RTX;
+ rtx par;
+ rtx frame_mem;
+
+ for (i = LAST_FPA_REGNUM; i >= FIRST_FPA_REGNUM; i--)
+ {
+ /* We can't unstack more than four registers at once. */
+ if (idx == 4)
+ {
+ par = emit_insn (gen_rtx_PARALLEL (VOIDmode,
+ gen_rtvec_v (idx, load_seq)));
+ REG_NOTES (par) = dwarf;
+ dwarf = NULL_RTX;
+ idx = 0;
+ }
+
+ if (df_regs_ever_live_p (i) && !call_used_regs[i])
+ {
+ floats_from_frame += 12;
+
+ frame_mem = gen_frame_mem (XFmode,
+ plus_constant (hard_frame_pointer_rtx,
+ - floats_from_frame));
+ load_seq[idx] = gen_rtx_SET (VOIDmode, gen_rtx_REG (XFmode, i),
+ frame_mem);
+ dwarf = alloc_reg_note (REG_CFA_RESTORE, gen_rtx_REG (XFmode, i),
+ dwarf);
+ idx++;
+ }
+ else if (idx)
+ {
+ /* Registers must be consecutive. */
+ par = emit_insn (gen_rtx_PARALLEL (VOIDmode,
+ gen_rtvec_v (idx, load_seq)));
+ REG_NOTES (par) = dwarf;
+ dwarf = NULL_RTX;
+ idx = 0;
+ }
+ }
+
+ /* Pop the last registers. */
+ if (idx)
+ {
+ par = emit_insn (gen_rtx_PARALLEL (VOIDmode,
+ gen_rtvec_v (idx, load_seq)));
+ REG_NOTES (par) = dwarf;
+ }
+ }
+
+ if (TARGET_IWMMXT)
+ {
+ /* The frame pointer is guaranteed to be non-double-word aligned, as
+ it is set to double-word-aligned old_stack_pointer - 4. */
+ rtx insn;
+ int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
+
+ for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
+ if (df_regs_ever_live_p (i) && !call_used_regs[i])
+ {
+ rtx addr = gen_frame_mem (V2SImode,
+ plus_constant (hard_frame_pointer_rtx,
+ - lrm_count * 4));
+ insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
+ REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
+ gen_rtx_REG (V2SImode, i),
+ NULL_RTX);
+ lrm_count += 2;
+ }
+ }
+
+ /* saved_regs_mask should contain IP which contains old stack pointer
+ at the time of activation creation. Since SP and IP are adjacent
registers,
+ we can restore the value directly into SP. */
+ gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
+ saved_regs_mask &= ~(1 << IP_REGNUM);
+ saved_regs_mask |= (1 << SP_REGNUM);
+
+ /* There are two registers left in saved_regs_mask - LR and PC. We
+ only need to restore LR (the return address), but to
+ save time we can load it directly into PC, unless we need a
+ special function exit sequence, or we are not really returning. */
+ if (really_return
+ && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
+ && !crtl->calls_eh_return)
+ /* Delete LR from the register mask, so that LR on
+ the stack is loaded into the PC in the register mask. */
+ saved_regs_mask &= ~(1 << LR_REGNUM);
+ else
+ saved_regs_mask &= ~(1 << PC_REGNUM);
+
+ num_regs = bit_count (saved_regs_mask);
+ if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
+ {
+ /* Unwind the stack to just below the saved registers. */
+ emit_insn (gen_addsi3 (stack_pointer_rtx,
+ hard_frame_pointer_rtx,
+ GEN_INT (- 4 * num_regs)));
+ }
+
+ arm_emit_multi_reg_pop (saved_regs_mask);
+
+ if (IS_INTERRUPT (func_type))
+ {
+ /* Interrupt handlers will have pushed the
+ IP onto the stack, so restore it now. */
+ rtx insn;
+ rtx addr = gen_rtx_MEM (SImode,
+ gen_rtx_POST_INC (SImode,
+ stack_pointer_rtx));
+ set_mem_alias_set (addr, get_frame_alias_set ());
+ insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
+ REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
+ gen_rtx_REG (SImode, IP_REGNUM),
+ NULL_RTX);
+ }
+
+ if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
+ return;
+
+ if (crtl->calls_eh_return)
+ emit_insn (gen_addsi3 (stack_pointer_rtx,
+ stack_pointer_rtx,
+ GEN_INT (ARM_EH_STACKADJ_REGNUM)));
+
+ if (IS_STACKALIGN (func_type))
+ /* Restore the original stack pointer. Before prologue, the stack was
+ realigned and the original stack pointer saved in r0. For details,
+ see comment in arm_expand_prologue. */
+ emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, 0)));
+
+ emit_jump_insn (simple_return_rtx);
+}
+
/* Implementation of insn prologue_thumb1_interwork. This is the first
"instruction" of a function called in ARM mode. Swap to thumb mode. */
diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index 98387fa..3a237c8 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -718,9 +718,9 @@
;; (plus (reg rN) (reg sp)) into (reg rN). In this case reload will
;; put the duplicated register first, and not try the commutative version.
(define_insn_and_split "*arm_addsi3"
- [(set (match_operand:SI 0 "s_register_operand" "=r, k,r,r, k, r,
k,r, k, r")
- (plus:SI (match_operand:SI 1 "s_register_operand" "%rk,k,r,rk,k,
rk,k,rk,k, rk")
- (match_operand:SI 2 "reg_or_int_operand" "rI,rI,k,Pj,Pj,L,
L,PJ,PJ,?n")))]
+ [(set (match_operand:SI 0 "s_register_operand" "=r, k,r,r, k, r,
k,k,r, k, r")
+ (plus:SI (match_operand:SI 1 "s_register_operand" "%rk,k,r,rk,k,
rk,k,r,rk,k, rk")
+ (match_operand:SI 2 "reg_or_int_operand" "rI,rI,k,Pj,Pj,L,
L,L,PJ,PJ,?n")))]
"TARGET_32BIT"
"@
add%?\\t%0, %1, %2
@@ -730,6 +730,7 @@
addw%?\\t%0, %1, %2
sub%?\\t%0, %1, #%n2
sub%?\\t%0, %1, #%n2
+ sub%?\\t%0, %1, #%n2
subw%?\\t%0, %1, #%n2
subw%?\\t%0, %1, #%n2
#"
@@ -744,9 +745,9 @@
operands[1], 0);
DONE;
"
- [(set_attr "length" "4,4,4,4,4,4,4,4,4,16")
+ [(set_attr "length" "4,4,4,4,4,4,4,4,4,4,16")
(set_attr "predicable" "yes")
- (set_attr "arch" "*,*,*,t2,t2,*,*,t2,t2,*")]
+ (set_attr "arch" "*,*,*,t2,t2,*,*,a,t2,t2,*")]
)
(define_insn_and_split "*thumb1_addsi3"