Hi Ramana,
Please find attached reworked patch. The patch is tested with check-gcc,
check-gdb and bootstrap with no regression.
On Fri, 2011-10-21 at 13:43 +0100, Ramana Radhakrishnan wrote:
> Why are you differentiating on stack_only ? Does it really matter ?
>
The patterns pop_multi* generate pop instruction, hence I wanted to be
sure that base register is stack.
I can remove stack_only option by
1. Modifying pattern to match SP as base-register explicitly or
2. Generate ldm%(ia%) instruction for non-SP base registers.
I chose second option.
> Hmmm isn't this true of only LDM's in Thumb state ? Though it could be argued
> that this patch is only T2 epilogues.
>
Yes, its true. But for single register we want to match LDR pattern and
not any of ldm* or pop_multi* pattern. So, I am barring LDM for single
register here.
> >+ strcpy (pattern, \"fldmfdd\\t\");
> >+ strcat (pattern,
> >+ reg_names[REGNO (SET_DEST (XVECEXP (operands[0], 0,
> >0)))]);
> >+ strcat (pattern, \"!, {\");
> >+ strcat (pattern, table[(REGNO (XEXP (XVECEXP (operands[0], 0, 1), 0))
> >+ - FIRST_VFP_REGNUM) / 2].name);
>
> Can't you reuse names from arm.h and avoid the table here ?
>
The array REGISTER_NAMES in aout.h use S0, S2, ... names for double
registers. Is there any way to use OVERLAPPING_REGISTER_NAMES? If that
can be done, I can eliminate the table here.
Updated ChangeLog entry:
2011-09-28 Ian Bolton <ian.bol...@arm.com>
Sameera Deshpande <sameera.deshpa...@arm.com>
* config/arm/arm-protos.h (load_multiple_operation_p): New
declaration.
(thumb2_expand_epilogue): Likewise.
(thumb2_output_return): Likewise
(thumb2_expand_return): Likewise.
(thumb_unexpanded_epilogue): Rename to...
(thumb1_unexpanded_epilogue): ...this
* config/arm/arm.c (load_multiple_operation_p): New function.
(thumb2_emit_multi_reg_pop): Likewise.
(thumb2_emit_vfp_multi_reg_pop): Likewise.
(thumb2_expand_return): Likewise.
(thumb2_expand_epilogue): Likewise.
(thumb2_output_return): Likewise
(thumb_unexpanded_epilogue): Rename to...
( thumb1_unexpanded_epilogue): ...this
* config/arm/arm.md (pop_multiple_with_stack_update): New
pattern.
(pop_multiple_with_stack_update_and_return): Likewise.
(thumb2_ldr_with_return): Likewise.
(vfp_point_pop_multiple_with_stack_update): Likewise.
(return): Update condition and code for pattern.
(arm_return): Likewise.
(epilogue_insns): Likewise.
* config/arm/predicates.md (load_multiple_operation): Update
predicate.
(load_multiple_operation_return): New predicate.
(load_multiple_operation_fp): Likewise.
* config/arm/thumb2.md (thumb2_return): Remove.
(thumb2_rtl_epilogue_return): New pattern.
- Thanks and regards,
Sameera D.
diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h
index 23a29c6..2c38883 100644
--- a/gcc/config/arm/arm-protos.h
+++ b/gcc/config/arm/arm-protos.h
@@ -65,6 +65,7 @@ extern int thumb1_legitimate_address_p (enum machine_mode, rtx, int);
extern int arm_const_double_rtx (rtx);
extern int neg_const_double_rtx_ok_for_fpa (rtx);
extern int vfp3_const_double_rtx (rtx);
+extern bool load_multiple_operation_p (rtx, bool, enum machine_mode, bool);
extern int neon_immediate_valid_for_move (rtx, enum machine_mode, rtx *, int *);
extern int neon_immediate_valid_for_logic (rtx, enum machine_mode, int, rtx *,
int *);
@@ -176,10 +177,13 @@ extern int arm_float_words_big_endian (void);
/* Thumb functions. */
extern void arm_init_expanders (void);
-extern const char *thumb_unexpanded_epilogue (void);
+extern const char *thumb1_unexpanded_epilogue (void);
extern void thumb1_expand_prologue (void);
extern void thumb1_expand_epilogue (void);
extern const char *thumb1_output_interwork (void);
+extern void thumb2_expand_epilogue (void);
+extern void thumb2_output_return (rtx);
+extern void thumb2_expand_return (void);
#ifdef TREE_CODE
extern int is_called_in_ARM_mode (tree);
#endif
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index e07c8c3..ec87892 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -8906,6 +8906,137 @@ neon_valid_immediate (rtx op, enum machine_mode mode, int inverse,
#undef CHECK
}
+/* Return true if OP is a valid load multiple operation for MODE mode.
+ CONSECUTIVE is true if the registers in the operation must form
+ a consecutive sequence in the register bank. STACK_ONLY is true
+ if the base register must be the stack pointer. RETURN_PC is true
+ if value is to be loaded in PC. */
+bool
+load_multiple_operation_p (rtx op, bool consecutive, enum machine_mode mode,
+ bool return_pc)
+{
+ HOST_WIDE_INT count = XVECLEN (op, 0);
+ unsigned dest_regno, first_dest_regno;
+ rtx src_addr;
+ HOST_WIDE_INT i = 1, base = 0;
+ HOST_WIDE_INT offset = 0;
+ rtx elt;
+ bool addr_reg_loaded = false;
+ bool update = false;
+ int reg_increment, regs_per_val;
+ int offset_adj;
+
+ /* If DFmode, we must be asking for consecutive,
+ since fldmdd can only do consecutive regs. */
+ gcc_assert ((mode != DFmode) || consecutive);
+
+ /* Set up the increments and the regs per val based on the mode. */
+ reg_increment = GET_MODE_SIZE (mode);
+ regs_per_val = mode == DFmode ? 2 : 1;
+ offset_adj = return_pc ? 1 : 0;
+
+ if (count <= 1
+ || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
+ || !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj))))
+ return false;
+
+ /* Check to see if this might be a write-back. */
+ if (GET_CODE (SET_SRC (elt = XVECEXP (op, 0, offset_adj))) == PLUS)
+ {
+ i++;
+ base = 1;
+ update = true;
+
+ /* The offset adjustment should be same as number of registers being
+ popped * size of single register. */
+ if (!REG_P (SET_DEST (elt))
+ || !REG_P (XEXP (SET_SRC (elt), 0))
+ || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
+ || INTVAL (XEXP (SET_SRC (elt), 1)) !=
+ ((count - 1 - offset_adj) * reg_increment))
+ return false;
+ }
+
+ i = i + offset_adj;
+ base = base + offset_adj;
+ /* Perform a quick check so we don't blow up below. */
+ if (GET_CODE (XVECEXP (op, 0, i - 1)) != SET
+ || !REG_P (SET_DEST (XVECEXP (op, 0, i - 1)))
+ || !MEM_P (SET_SRC (XVECEXP (op, 0, i - 1))))
+ return false;
+
+ /* If only one reg being loaded, success depends on the type:
+ FLDMDD can do just one reg, LDM must do at least two. */
+ if (count <= i)
+ return mode == DFmode ? true : false;
+
+ first_dest_regno = REGNO (SET_DEST (XVECEXP (op, 0, i - 1)));
+ dest_regno = first_dest_regno;
+
+ src_addr = XEXP (SET_SRC (XVECEXP (op, 0, i - 1)), 0);
+
+ if (GET_CODE (src_addr) == PLUS)
+ {
+ if (!CONST_INT_P (XEXP (src_addr, 1)))
+ return false;
+ offset = INTVAL (XEXP (src_addr, 1));
+ src_addr = XEXP (src_addr, 0);
+ }
+
+ if (!REG_P (src_addr))
+ return false;
+
+ /* The pattern we are trying to match here is:
+ [(SET (R_d0) (MEM (PLUS (src_addr) (offset))))
+ (SET (R_d1) (MEM (PLUS (src_addr) (offset + <reg_increment>))))
+ :
+ :
+ (SET (R_dn) (MEM (PLUS (src_addr) (offset + n * <reg_increment>))))
+ ]
+ Where,
+ 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
+ 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
+ 3. If consecutive is TRUE, then for kth register being loaded,
+ REGNO (R_dk) = REGNO (R_d0) + k. */
+ for (; i < count; i++)
+ {
+ elt = XVECEXP (op, 0, i);
+
+ if (GET_CODE (elt) != SET
+ || !REG_P (SET_DEST (elt))
+ || GET_MODE (SET_DEST (elt)) != mode
+ || (consecutive
+ && (REGNO (SET_DEST (elt))
+ != (unsigned int) (first_dest_regno + regs_per_val * (i - base))))
+ || REGNO (SET_DEST (elt)) <= dest_regno
+ || !MEM_P (SET_SRC (elt))
+ || GET_MODE (SET_SRC (elt)) != mode
+ || ((GET_CODE (XEXP (SET_SRC (elt), 0)) != PLUS
+ || !rtx_equal_p (XEXP (XEXP (SET_SRC (elt), 0), 0), src_addr)
+ || !CONST_INT_P (XEXP (XEXP (SET_SRC (elt), 0), 1))
+ || INTVAL (XEXP (XEXP (SET_SRC (elt), 0), 1)) !=
+ (offset + (i - base) * reg_increment))
+ && (!REG_P (XEXP (SET_SRC (elt), 0))
+ || offset + (i - base) * reg_increment != 0)))
+ return false;
+
+ dest_regno = REGNO (SET_DEST (elt));
+ if (dest_regno == REGNO (src_addr))
+ addr_reg_loaded = true;
+ }
+
+ if (update && addr_reg_loaded)
+ return false;
+
+ /* For Thumb-1, address register is always modified - either by write-back
+ or by explicit load. If the pattern does not describe an update, it must
+ be because the address register is in the list of loaded registers. */
+ if (TARGET_THUMB1)
+ return update || addr_reg_loaded;
+
+ return true;
+}
+
/* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
float elements), and a modified constant (whatever should be output for a
@@ -16092,6 +16223,137 @@ emit_multi_reg_push (unsigned long mask)
return par;
}
+/* Generate and emit an insn pattern that we will recognize as a pop_multi.
+ SAVED_REGS_MASK shows which registers need to be restored.
+
+ Unfortunately, since this insn does not reflect very well the actual
+ semantics of the operation, we need to annotate the insn for the benefit
+ of DWARF2 frame unwind information.
+
+ There's no reason why this couldn't be used for Thumb-1 or ARM, in theory,
+ but currently the pattern that matches this in the MD file is only enabled
+ for Thumb-2. */
+static void
+thumb2_emit_multi_reg_pop (unsigned long saved_regs_mask, bool really_return)
+{
+ int num_regs = 0;
+ int i, j;
+ rtx par;
+ rtx dwarf = NULL_RTX;
+ rtx tmp, reg;
+ int offset_adj = really_return ? 1 : 0;
+
+ for (i = 0; i <= LAST_ARM_REGNUM; i++)
+ if (saved_regs_mask & (1 << i))
+ num_regs++;
+
+ gcc_assert (num_regs && num_regs <= 16);
+
+ /* The parallel needs to hold num_regs SETs
+ and one SET for the stack update. */
+ par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1 + offset_adj));
+
+ if (really_return)
+ {
+ tmp = ret_rtx;
+ XVECEXP (par, 0, 0) = tmp;
+ }
+
+ /* Increment the stack pointer, based on there being
+ num_regs 4-byte registers to restore. */
+ tmp = gen_rtx_SET (VOIDmode,
+ stack_pointer_rtx,
+ plus_constant (stack_pointer_rtx, 4 * num_regs));
+ RTX_FRAME_RELATED_P (tmp) = 1;
+ XVECEXP (par, 0, offset_adj) = tmp;
+
+ /* Now restore every reg, which may include PC. */
+ for (j = 0, i = 0; j < num_regs; i++)
+ if (saved_regs_mask & (1 << i))
+ {
+ reg = gen_rtx_REG (SImode, i);
+ tmp = gen_rtx_SET (VOIDmode,
+ reg,
+ gen_frame_mem
+ (SImode,
+ plus_constant (stack_pointer_rtx, 4 * j)));
+ RTX_FRAME_RELATED_P (tmp) = 1;
+ XVECEXP (par, 0, j + 1 + offset_adj) = tmp;
+
+ /* We need to maintain a sequence for DWARF info too. As dwarf info
+ should not have PC, skip PC. */
+ if (i != PC_REGNUM)
+ dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
+
+ j++;
+ }
+
+ if (really_return)
+ par = emit_jump_insn (par);
+ else
+ par = emit_insn (par);
+
+ REG_NOTES (par) = dwarf;
+}
+
+/* Generate and emit an insn pattern that we will recognize as a pop_multi
+ of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
+
+ Unfortunately, since this insn does not reflect very well the actual
+ semantics of the operation, we need to annotate the insn for the benefit
+ of DWARF2 frame unwind information. */
+static void
+thumb2_emit_vfp_multi_reg_pop (int first_reg, int num_regs)
+{
+ int i, j;
+ rtx par;
+ rtx dwarf = NULL_RTX;
+ rtx tmp, reg;
+
+ gcc_assert (num_regs && num_regs <= 32);
+
+ if (num_regs > 16)
+ {
+ thumb2_emit_vfp_multi_reg_pop (first_reg, 16);
+ thumb2_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16);
+ return;
+ }
+
+ /* The parallel needs to hold num_regs SETs
+ and one SET for the stack update. */
+ par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
+
+ /* Increment the stack pointer, based on there being
+ num_regs 8-byte registers to restore. */
+ tmp = gen_rtx_SET (VOIDmode,
+ stack_pointer_rtx,
+ plus_constant (stack_pointer_rtx, 8 * num_regs));
+ RTX_FRAME_RELATED_P (tmp) = 1;
+ XVECEXP (par, 0, 0) = tmp;
+
+ /* now show EVERY reg that will be restored, using a SET for each. */
+ for (j = 0, i=first_reg; j < num_regs; i += 2)
+ {
+ reg = gen_rtx_REG (DFmode, i);
+
+ tmp = gen_rtx_SET (VOIDmode,
+ reg,
+ gen_frame_mem
+ (DFmode,
+ plus_constant (stack_pointer_rtx,
+ 8 * j)));
+ RTX_FRAME_RELATED_P (tmp) = 1;
+ XVECEXP (par, 0, j + 1) = tmp;
+
+ dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
+
+ j++;
+ }
+
+ par = emit_insn (par);
+ REG_NOTES (par) = dwarf;
+}
+
/* Calculate the size of the return value that is passed in registers. */
static unsigned
arm_size_return_regs (void)
@@ -21622,7 +21884,7 @@ thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
/* The bits which aren't usefully expanded as rtl. */
const char *
-thumb_unexpanded_epilogue (void)
+thumb1_unexpanded_epilogue (void)
{
arm_stack_offsets *offsets;
int regno;
@@ -22191,7 +22453,6 @@ thumb1_expand_prologue (void)
cfun->machine->lr_save_eliminated = 0;
}
-
void
thumb1_expand_epilogue (void)
{
@@ -22246,6 +22507,242 @@ thumb1_expand_epilogue (void)
emit_use (gen_rtx_REG (SImode, LR_REGNUM));
}
+/* Generate pattern *pop_multiple_with_stack_update_and_return if single
+ POP instruction can be generated. LR should be replaced by PC. All
+ the checks required are already done by USE_RETURN_INSN (). Hence,
+ all we really need to check here is if single register is to be
+ returned, or multiple register return. */
+void
+thumb2_expand_return (void)
+{
+ int i, num_regs;
+ unsigned long saved_regs_mask;
+ arm_stack_offsets *offsets;
+
+ offsets = arm_get_frame_offsets ();
+ saved_regs_mask = offsets->saved_regs_mask;
+ for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
+ if (saved_regs_mask & (1 << i))
+ num_regs++;
+
+ if (saved_regs_mask)
+ {
+ if (num_regs == 1)
+ {
+ rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
+ rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
+ rtx addr = gen_rtx_MEM (SImode,
+ gen_rtx_POST_INC (SImode,
+ stack_pointer_rtx));
+ set_mem_alias_set (addr, get_frame_alias_set ());
+ XVECEXP (par, 0, 0) = ret_rtx;
+ XVECEXP (par, 0, 1) = gen_rtx_SET (SImode, reg, addr);
+ RTX_FRAME_RELATED_P (par) = 1;
+ emit_jump_insn (par);
+ }
+ else
+ {
+ saved_regs_mask &= ~ (1 << LR_REGNUM);
+ saved_regs_mask |= (1 << PC_REGNUM);
+ thumb2_emit_multi_reg_pop (saved_regs_mask, true);
+ }
+ }
+ else
+ {
+ emit_jump_insn (ret_rtx);
+ }
+}
+
+/* Generate RTL to represent a Thumb-2 epilogue.
+
+ Note that this RTL does not include the
+ Return insn, which is created separately and
+ handled in thumb2_output_return. */
+void
+thumb2_expand_epilogue (void)
+{
+ HOST_WIDE_INT amount;
+ int reg;
+ unsigned long saved_regs_mask;
+ unsigned long func_type;
+ int i;
+ arm_stack_offsets *offsets;
+ int num_regs = 0;
+ bool really_return = false;
+
+ func_type = arm_current_func_type ();
+
+ /* Naked functions don't have epilogues. */
+ if (IS_NAKED (func_type)
+ || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
+ {
+ emit_jump_insn (ret_rtx);
+ return;
+ }
+
+ /* At the end of the code of a function, the stack pointer will
+ be pointing at the outgoing args area, so we first need to
+ get it to point at the saved_regs area. */
+
+ /* Determine how much to add to the stack pointer. */
+ offsets = arm_get_frame_offsets ();
+ saved_regs_mask = offsets->saved_regs_mask;
+
+ for (i = 0; i <= LAST_ARM_REGNUM; i++)
+ if (saved_regs_mask & (1 << i))
+ num_regs++;
+
+ /* In Thumb-2 mode, the frame pointer points to the last
+ saved register. */
+ amount = offsets->outgoing_args - offsets->saved_regs;
+
+ if (frame_pointer_needed)
+ {
+ emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
+ amount = offsets->locals_base - offsets->saved_regs;
+ }
+
+ gcc_assert (amount >= 0);
+ if (amount)
+ emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
+ GEN_INT (amount)));
+
+ /* Emit a USE (stack_pointer_rtx), so that
+ the stack adjustment will not be deleted. */
+ emit_insn (gen_prologue_use (stack_pointer_rtx));
+
+ /* Now handle any VFP restoration. */
+ if (TARGET_HARD_FLOAT && TARGET_VFP)
+ {
+ int end_reg = LAST_VFP_REGNUM + 1;
+
+ /* Scan the registers in reverse order. We need to match
+ any groupings made in the prologue and generate matching
+ fldmdd operations. The need to match groups is because,
+ unlike pop, fldmdd can only do consecutive regs. */
+ for (reg = LAST_VFP_REGNUM - 1; reg >= FIRST_VFP_REGNUM; reg -= 2)
+ /* Look for a case where a reg does not need restoring. */
+ if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
+ && (!df_regs_ever_live_p (reg + 1)
+ || call_used_regs[reg + 1]))
+ {
+ /* Restore the regs discovered so far (from reg+2 to end_reg). */
+ if (end_reg > reg + 2)
+ thumb2_emit_vfp_multi_reg_pop (reg + 2,
+ (end_reg - (reg + 2)) / 2);
+ end_reg = reg;
+ }
+
+ /* Restore the remaining regs that we have discovered (or possibly
+ even all of them, if the conditional in the for loop never fired). */
+ if (end_reg > reg + 2)
+ thumb2_emit_vfp_multi_reg_pop (reg + 2, (end_reg - (reg + 2)) / 2);
+ }
+
+ /* iWMMXt is not supported when Thumb-2 in use. If it were, we would
+ want to be restoring the appropriate iWMMXt regs here, in a similar
+ way to arm_output_epilogue. */
+
+ /* If there are registers to restore, make it happen. */
+ if (saved_regs_mask)
+ {
+ /* It's illegal to do a pop for only one reg, so generate an ldr. */
+ if (num_regs == 1)
+ {
+ for (i = 0; i <= LAST_ARM_REGNUM; i++)
+ if (saved_regs_mask & (1 << i))
+ {
+ rtx addr = gen_rtx_MEM (SImode,
+ gen_rtx_POST_INC (SImode,
+ stack_pointer_rtx));
+ set_mem_alias_set (addr, get_frame_alias_set ());
+ emit_insn (gen_movsi (gen_rtx_REG (SImode, i), addr));
+ }
+ }
+
+ /* Two or more regs warrants the use of a multi-reg pop. */
+ else
+ {
+ /* If multi-pop is last instruction, don't generate `branch to
+ return-address' instruction. Instead, pop LR in PC. */
+ if (ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
+ && !IS_STACKALIGN (func_type)
+ && crtl->args.pretend_args_size == 0
+ && saved_regs_mask & (1 << LR_REGNUM)
+ && !crtl->calls_eh_return)
+ {
+ saved_regs_mask &= ~ (1 << LR_REGNUM);
+ saved_regs_mask |= (1 << PC_REGNUM);
+ really_return = true;
+ }
+
+ thumb2_emit_multi_reg_pop (saved_regs_mask, really_return);
+ if (really_return == true)
+ return;
+ }
+ }
+
+ /* Unwind the pre-pushed regs. */
+ if (crtl->args.pretend_args_size)
+ emit_insn (gen_addsi3 (stack_pointer_rtx,
+ stack_pointer_rtx,
+ GEN_INT (crtl->args.pretend_args_size)));
+
+ /* Stack adjustment for exception handler. */
+ if (crtl->calls_eh_return)
+ emit_insn (gen_addsi3 (stack_pointer_rtx,
+ stack_pointer_rtx,
+ gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
+
+ emit_jump_insn (ret_rtx);
+}
+
+
+/* Generate the appropriate instruction to return for Thumb-2.
+ OPERAND holds a condition, which must be passed to output_asm_insn. */
+void
+thumb2_output_return (rtx operand)
+{
+ char instr[100];
+ unsigned long func_type;
+
+ func_type = arm_current_func_type ();
+
+ if (IS_NAKED (func_type))
+ /* Do nothing if naked function. */
+ return;
+
+ if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
+ {
+ rtx op;
+
+ /* A volatile function should never return. Call abort. */
+ op = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)" : "abort");
+ assemble_external_libcall (op);
+ output_asm_insn ("bl\t%a0", &op);
+
+ return;
+ }
+
+ switch ((int) ARM_FUNC_TYPE (func_type))
+ {
+ case ARM_FT_ISR:
+ case ARM_FT_FIQ:
+ sprintf (instr, "subs\t%%|pc, %%|lr, #4");
+ break;
+
+ case ARM_FT_EXCEPTION:
+ sprintf (instr, "movs\t%%|pc, %%|lr");
+ break;
+
+ default:
+ sprintf (instr, "bx\t%%|lr");
+ break;
+ }
+
+ output_asm_insn (instr, &operand);
+}
+
/* Implementation of insn prologue_thumb1_interwork. This is the first
"instruction" of a function called in ARM mode. Swap to thumb mode. */
diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index a78ba88..64444f2 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -6669,6 +6669,151 @@
FALSE, operands[1], &offset);
})
+;; Pop (as used in epilogue RTL)
+;;
+;; This should really be in thumb2.md, but it needs to live above
+;; the ldmsi patterns, so that it matches before them.
+;; Furthermore, there is no reason why it could not be extended
+;; to support Thumb-1 and ARM at a later date (whereupon it would
+;; fully deserve its spot in this file).
+(define_insn "*pop_multiple_with_stack_update"
+ [(match_parallel 0 "load_multiple_operation"
+ [(set (match_operand:SI 1 "s_register_operand" "+rk")
+ (plus:SI (match_dup 1)
+ (match_operand:SI 2 "const_int_operand" "I")))
+ ])]
+ "TARGET_THUMB2"
+ "*
+ {
+ int i;
+ char pattern[100];
+ int num_saves = XVECLEN (operands[0], 0);
+
+ if (REGNO (operands[1]) == SP_REGNUM)
+ {
+ strcpy (pattern, \"pop\\t{\");
+ }
+ else
+ {
+ strcpy (pattern, \"ldm%(ia%)\\t\");
+ strcat (pattern, reg_names[REGNO (operands[1])]);
+ strcat (pattern, \"!, {\");
+ }
+
+ strcat (pattern,
+ reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, 1), 0))]);
+
+ /* Skip over the first element and the one we just generated. */
+ for (i = 2; i < (num_saves); i++)
+ {
+ strcat (pattern, \", %|\");
+ strcat (pattern,
+ reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
+ }
+
+ strcat (pattern, \"}\");
+ output_asm_insn (pattern, operands);
+
+ return \"\";
+ }
+ "
+ [(set_attr "type" "load4")]
+)
+
+;; Pop with return (as used in epilogue RTL)
+;;
+;; This instruction is generated when the registers are popped at end of
+;; epilogue. Here, instead of popping the value in LR and then generating
+;; jump to LR, value is popped in PC. Hence, the pattern is combined with
+;; (return).
+(define_insn "*pop_multiple_with_stack_update_and_return"
+ [(match_parallel 0 "load_multiple_operation_return"
+ [(return)
+ (set (match_operand:SI 1 "s_register_operand" "+k")
+ (plus:SI (match_dup 1)
+ (match_operand:SI 2 "const_int_operand" "I")))
+ ])]
+ "TARGET_THUMB2"
+ "*
+ {
+ int i;
+ char pattern[100];
+ int num_saves = XVECLEN (operands[0], 0);
+
+ strcpy (pattern, \"pop\\t{\");
+ strcat (pattern,
+ reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, 2), 0))]);
+
+ /* Skip over the first two elements and the one we just generated. */
+ for (i = 3; i < (num_saves); i++)
+ {
+ strcat (pattern, \", %|\");
+ strcat (pattern,
+ reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
+ }
+
+ strcat (pattern, \"}\");
+ output_asm_insn (pattern, operands);
+
+ return \"\";
+ }
+ "
+ [(set_attr "type" "load4")]
+)
+
+(define_insn "*thumb2_ldr_with_return"
+ [(return)
+ (set (reg:SI PC_REGNUM)
+ (mem:SI (post_inc:SI (match_operand:SI 0 "s_register_operand" "+k"))))]
+ "TARGET_THUMB2"
+ "ldr%?\t%|pc, [%0], #4"
+ [(set_attr "type" "load1")
+ (set_attr "predicable" "yes")]
+)
+
+(define_insn "*vfp_pop_multiple_with_stack_update"
+ [(match_parallel 0 "load_multiple_operation_fp"
+ [(set (match_operand:SI 1 "s_register_operand" "+k")
+ (plus:SI (match_dup 1)
+ (match_operand:SI 2 "const_int_operand" "I")))
+ (set (match_operand:DF 3 "arm_hard_register_operand" "")
+ (mem:DF (match_dup 1)))])]
+ "TARGET_THUMB2 && TARGET_HARD_FLOAT && TARGET_VFP"
+ "*
+ {
+ int num_regs = XVECLEN (operands[0], 0);
+ static const struct { const char *const name; } table[]
+ = { {\"d0\"}, {\"d1\"}, {\"d2\"}, {\"d3\"},
+ {\"d4\"}, {\"d5\"}, {\"d6\"}, {\"d7\"},
+ {\"d8\"}, {\"d9\"}, {\"d10\"}, {\"d11\"},
+ {\"d12\"}, {\"d13\"}, {\"d14\"}, {\"d15\"},
+ {\"d16\"}, {\"d17\"}, {\"d18\"}, {\"d19\"},
+ {\"d20\"}, {\"d21\"}, {\"d22\"}, {\"d23\"},
+ {\"d24\"}, {\"d25\"}, {\"d26\"}, {\"d27\"},
+ {\"d28\"}, {\"d29\"}, {\"d30\"}, {\"d31\"} };
+ char pattern[100];
+ strcpy (pattern, \"fldmfdd\\t\");
+ strcat (pattern,
+ reg_names[REGNO (SET_DEST (XVECEXP (operands[0], 0, 0)))]);
+ strcat (pattern, \"!, {\");
+ strcat (pattern, table[(REGNO (XEXP (XVECEXP (operands[0], 0, 1), 0))
+ - FIRST_VFP_REGNUM) / 2].name);
+ if ((num_regs - 1) > 1)
+ {
+ strcat (pattern, \"-%|\");
+ strcat (pattern, table[(REGNO (XEXP (XVECEXP (operands[0], 0,
+ num_regs - 1), 0))
+ - FIRST_VFP_REGNUM) / 2].name);
+ }
+
+ strcat (pattern, \"}\");
+ output_asm_insn (pattern, operands);
+ return \"\";
+ }
+ "
+ [(set_attr "type" "load4")]
+)
+
(define_expand "store_multiple"
[(match_par_dup 3 [(set (match_operand:SI 0 "" "")
(match_operand:SI 1 "" ""))
@@ -8486,8 +8631,19 @@
(define_expand "return"
[(return)]
- "TARGET_32BIT && USE_RETURN_INSN (FALSE)"
- "")
+ "(TARGET_ARM || (TARGET_THUMB2
+ && ARM_FUNC_TYPE (arm_current_func_type ()) == ARM_FT_NORMAL
+ && !IS_STACKALIGN (arm_current_func_type ())))
+ && USE_RETURN_INSN (FALSE)"
+ "
+ {
+ if (TARGET_THUMB2)
+ {
+ thumb2_expand_return ();
+ DONE;
+ }
+ }
+ ")
;; Often the return insn will be the same as loading from memory, so set attr
(define_insn "*arm_return"
@@ -10529,6 +10685,11 @@
emit_insn (gen_prologue_use (gen_rtx_REG (Pmode, 2)));
if (TARGET_THUMB1)
thumb1_expand_epilogue ();
+ else if (TARGET_THUMB2)
+ {
+ thumb2_expand_epilogue ();
+ DONE;
+ }
else if (USE_RETURN_INSN (FALSE))
{
emit_jump_insn (gen_return ());
@@ -10572,12 +10733,12 @@
(define_insn "*epilogue_insns"
[(unspec_volatile [(return)] VUNSPEC_EPILOGUE)]
- "TARGET_EITHER"
+ "TARGET_ARM || TARGET_THUMB1"
"*
if (TARGET_32BIT)
return arm_output_epilogue (NULL);
else /* TARGET_THUMB1 */
- return thumb_unexpanded_epilogue ();
+ return thumb1_unexpanded_epilogue ();
"
; Length is absolute worst case
[(set_attr "length" "44")
diff --git a/gcc/config/arm/predicates.md b/gcc/config/arm/predicates.md
index 92eb004..7e2203d 100644
--- a/gcc/config/arm/predicates.md
+++ b/gcc/config/arm/predicates.md
@@ -369,84 +369,22 @@
(define_special_predicate "load_multiple_operation"
(match_code "parallel")
{
- HOST_WIDE_INT count = XVECLEN (op, 0);
- unsigned dest_regno;
- rtx src_addr;
- HOST_WIDE_INT i = 1, base = 0;
- HOST_WIDE_INT offset = 0;
- rtx elt;
- bool addr_reg_loaded = false;
- bool update = false;
-
- if (count <= 1
- || GET_CODE (XVECEXP (op, 0, 0)) != SET
- || !REG_P (SET_DEST (XVECEXP (op, 0, 0))))
- return false;
-
- /* Check to see if this might be a write-back. */
- if (GET_CODE (SET_SRC (elt = XVECEXP (op, 0, 0))) == PLUS)
- {
- i++;
- base = 1;
- update = true;
-
- /* Now check it more carefully. */
- if (GET_CODE (SET_DEST (elt)) != REG
- || GET_CODE (XEXP (SET_SRC (elt), 0)) != REG
- || GET_CODE (XEXP (SET_SRC (elt), 1)) != CONST_INT
- || INTVAL (XEXP (SET_SRC (elt), 1)) != (count - 1) * 4)
- return false;
- }
-
- /* Perform a quick check so we don't blow up below. */
- if (count <= i
- || GET_CODE (XVECEXP (op, 0, i - 1)) != SET
- || GET_CODE (SET_DEST (XVECEXP (op, 0, i - 1))) != REG
- || GET_CODE (SET_SRC (XVECEXP (op, 0, i - 1))) != MEM)
- return false;
-
- dest_regno = REGNO (SET_DEST (XVECEXP (op, 0, i - 1)));
- src_addr = XEXP (SET_SRC (XVECEXP (op, 0, i - 1)), 0);
- if (GET_CODE (src_addr) == PLUS)
- {
- if (GET_CODE (XEXP (src_addr, 1)) != CONST_INT)
- return false;
- offset = INTVAL (XEXP (src_addr, 1));
- src_addr = XEXP (src_addr, 0);
- }
- if (!REG_P (src_addr))
- return false;
+ return load_multiple_operation_p (op, /*consecutive=*/false,
+ SImode, /*return_pc*/false);
+})
- for (; i < count; i++)
- {
- elt = XVECEXP (op, 0, i);
+(define_special_predicate "load_multiple_operation_return"
+ (match_code "parallel")
+{
+ return load_multiple_operation_p (op, /*consecutive=*/false,
+ SImode, /*return_pc*/true);
+})
- if (GET_CODE (elt) != SET
- || GET_CODE (SET_DEST (elt)) != REG
- || GET_MODE (SET_DEST (elt)) != SImode
- || REGNO (SET_DEST (elt)) <= dest_regno
- || GET_CODE (SET_SRC (elt)) != MEM
- || GET_MODE (SET_SRC (elt)) != SImode
- || ((GET_CODE (XEXP (SET_SRC (elt), 0)) != PLUS
- || !rtx_equal_p (XEXP (XEXP (SET_SRC (elt), 0), 0), src_addr)
- || GET_CODE (XEXP (XEXP (SET_SRC (elt), 0), 1)) != CONST_INT
- || INTVAL (XEXP (XEXP (SET_SRC (elt), 0), 1)) != offset + (i - base) * 4)
- && (!REG_P (XEXP (SET_SRC (elt), 0))
- || offset + (i - base) * 4 != 0)))
- return false;
- dest_regno = REGNO (SET_DEST (elt));
- if (dest_regno == REGNO (src_addr))
- addr_reg_loaded = true;
- }
- /* For Thumb, we only have updating instructions. If the pattern does
- not describe an update, it must be because the address register is
- in the list of loaded registers - on the hardware, this has the effect
- of overriding the update. */
- if (update && addr_reg_loaded)
- return false;
- if (TARGET_THUMB1)
- return update || addr_reg_loaded;
- return true;
+(define_special_predicate "load_multiple_operation_fp"
+ (match_code "parallel")
+{
+ return load_multiple_operation_p (op, /*consecutive=*/true,
+ DFmode, /*return_pc*/false);
})
(define_special_predicate "store_multiple_operation"
diff --git a/gcc/config/arm/thumb2.md b/gcc/config/arm/thumb2.md
index 05585da..78f4e81 100644
--- a/gcc/config/arm/thumb2.md
+++ b/gcc/config/arm/thumb2.md
@@ -635,17 +635,18 @@
(set_attr "length" "20")]
)
-;; Note: this is not predicable, to avoid issues with linker-generated
-;; interworking stubs.
-(define_insn "*thumb2_return"
+(define_insn "*thumb2_rtl_epilogue_return"
[(return)]
- "TARGET_THUMB2 && USE_RETURN_INSN (FALSE)"
+ "(TARGET_THUMB2)"
"*
{
- return output_return_instruction (const_true_rtx, TRUE, FALSE);
+ thumb2_output_return (const_true_rtx);
+ return \"\";
}"
- [(set_attr "type" "load1")
- (set_attr "length" "12")]
+ [(set_attr "type" "branch")
+ (set_attr "length" "4")
+ (set_attr "predicable" "no")
+ (set_attr "conds" "unconditional")]
)
(define_insn_and_split "thumb2_eh_return"