This patch adds new define_insn patterns for epilogue with integer registers.
The patterns can handle pop multiple with writeback and return (loading into PC directly). To handle return, the patterns use a new special predicate pop_multiple_return, that uses ldm_stm_operation_p function from a previous patch. To output assembly, the patterns use a new function arm_output_multireg_pop. This patch also adds a new function arm_emit_multi_reg_pop that emits RTL that matches the new pop patterns for integer registers. This is a helper function for epilogue expansion. It is used by a later patch. ChangeLog: gcc 2012-05-31 Ian Bolton <ian.bol...@arm.com> Sameera Deshpande <sameera.deshpa...@arm.com> Greta Yorsh <greta.yo...@arm.com> * config/arm/arm.md (load_multiple_with_writeback) New define_insn. (load_multiple, pop_multiple_with_writeback_and_return) Likewise. (pop_multiple_with_return, ldr_with_return) Likewise. * config/arm/predicates.md (pop_multiple_return) New special predicate. * config/arm/arm-protos.h (arm_output_multireg_pop) New declaration. * config/arm/arm.c (arm_output_multireg_pop) New function. (arm_emit_multi_reg_pop): New function. (ldm_stm_operation_p): Check SP in the register list.
diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h index 53c2aef..7b25e37 100644 --- a/gcc/config/arm/arm-protos.h +++ b/gcc/config/arm/arm-protos.h @@ -156,6 +156,7 @@ extern int arm_emit_vector_const (FILE *, rtx); extern void arm_emit_fp16_const (rtx c); extern const char * arm_output_load_gr (rtx *); extern const char *vfp_output_fstmd (rtx *); +extern void arm_output_multireg_pop (rtx *, bool, rtx, bool, bool); extern void arm_set_return_address (rtx, rtx); extern int arm_eliminable_register (rtx); extern const char *arm_output_shift(rtx *, int); diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index 4717725..9093801 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -13815,6 +13815,84 @@ vfp_output_fldmd (FILE * stream, unsigned int base, int reg, int count) } +/* OPERANDS[0] is the entire list of insns that constitute pop, + OPERANDS[1] is the base register, RETURN_PC is true iff return insn + is in the list, UPDATE is true iff the list contains explicit + update of base register. + */ +void +arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse, + bool update) +{ + int i; + char pattern[100]; + int offset; + const char *conditional; + int num_saves = XVECLEN (operands[0], 0); + unsigned int regno; + unsigned int regno_base = REGNO (operands[1]); + + offset = 0; + offset += update ? 1 : 0; + offset += return_pc ? 1 : 0; + + /* Is the base register in the list? */ + for (i = offset; i < num_saves; i++) + { + regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0)); + /* If SP is in the list, then the base register must be SP. */ + gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM)); + /* If base register is in the list, there must be no explicit update. */ + if (regno == regno_base) + gcc_assert (!update); + } + + conditional = reverse ? "%?%D0" : "%?%d0"; + if ((regno_base == SP_REGNUM) && TARGET_UNIFIED_ASM) + { + /* Output pop (not stmfd) because it has a shorter encoding. */ + gcc_assert (update); + sprintf (pattern, "pop%s\t{", conditional); + } + else + { + /* Output ldmfd when the base register is SP, otherwise output ldmia. + It's just a convention, their semantics are identical. */ + if (regno_base == SP_REGNUM) + sprintf (pattern, "ldm%sfd\t", conditional); + else if (TARGET_UNIFIED_ASM) + sprintf (pattern, "ldmia%s\t", conditional); + else + sprintf (pattern, "ldm%sia\t", conditional); + + strcat (pattern, reg_names[regno_base]); + if (update) + strcat (pattern, "!, {"); + else + strcat (pattern, ", {"); + } + + /* Output the first destination register. */ + strcat (pattern, + reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]); + + /* Output the rest of the destination registers. */ + for (i = offset + 1; i < num_saves; i++) + { + strcat (pattern, ", "); + strcat (pattern, + reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]); + } + + strcat (pattern, "}"); + + if (IS_INTERRUPT (arm_current_func_type ()) && return_pc) + strcat (pattern, "^"); + + output_asm_insn (pattern, &cond); +} + + /* Output the assembly for a store multiple. */ const char * @@ -16461,6 +16539,85 @@ emit_multi_reg_push (unsigned long mask) return par; } +/* Generate and emit an insn pattern that we will recognize as a pop_multi. + SAVED_REGS_MASK shows which registers need to be restored. + + Unfortunately, since this insn does not reflect very well the actual + semantics of the operation, we need to annotate the insn for the benefit + of DWARF2 frame unwind information. */ +static void +arm_emit_multi_reg_pop (unsigned long saved_regs_mask) +{ + int num_regs = 0; + int i, j; + rtx par; + rtx dwarf = NULL_RTX; + rtx tmp, reg; + bool return_in_pc; + int offset_adj; + int emit_update; + + return_in_pc = (saved_regs_mask & (1 << PC_REGNUM)) ? true : false; + offset_adj = return_in_pc ? 1 : 0; + for (i = 0; i <= LAST_ARM_REGNUM; i++) + if (saved_regs_mask & (1 << i)) + num_regs++; + + gcc_assert (num_regs && num_regs <= 16); + + /* If SP is in reglist, then we don't emit SP update insn. */ + emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1; + + /* The parallel needs to hold num_regs SETs + and one SET for the stack update. */ + par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj)); + + if (return_in_pc) + { + tmp = ret_rtx; + XVECEXP (par, 0, 0) = tmp; + } + + if (emit_update) + { + /* Increment the stack pointer, based on there being + num_regs 4-byte registers to restore. */ + tmp = gen_rtx_SET (VOIDmode, + stack_pointer_rtx, + plus_constant (stack_pointer_rtx, 4 * num_regs)); + RTX_FRAME_RELATED_P (tmp) = 1; + XVECEXP (par, 0, offset_adj) = tmp; + } + + /* Now restore every reg, which may include PC. */ + for (j = 0, i = 0; j < num_regs; i++) + if (saved_regs_mask & (1 << i)) + { + reg = gen_rtx_REG (SImode, i); + tmp = gen_rtx_SET (VOIDmode, + reg, + gen_frame_mem + (SImode, + plus_constant (stack_pointer_rtx, 4 * j))); + RTX_FRAME_RELATED_P (tmp) = 1; + XVECEXP (par, 0, j + emit_update + offset_adj) = tmp; + + /* We need to maintain a sequence for DWARF info too. As dwarf info + should not have PC, skip PC. */ + if (i != PC_REGNUM) + dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf); + + j++; + } + + if (return_in_pc) + par = emit_jump_insn (par); + else + par = emit_insn (par); + + REG_NOTES (par) = dwarf; +} + /* Calculate the size of the return value that is passed in registers. */ static unsigned arm_size_return_regs (void) diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md index ed33c9b..862ccf4 100644 --- a/gcc/config/arm/arm.md +++ b/gcc/config/arm/arm.md @@ -10959,6 +10959,89 @@ [(set_attr "type" "f_fpa_store")] ) +;; Pop (as used in epilogue RTL) +;; +(define_insn "*load_multiple_with_writeback" + [(match_parallel 0 "load_multiple_operation" + [(set (match_operand:SI 1 "s_register_operand" "+rk") + (plus:SI (match_dup 1) + (match_operand:SI 2 "const_int_operand" "I"))) + (set (match_operand:SI 3 "s_register_operand" "=rk") + (mem:SI (match_dup 1))) + ])] + "TARGET_32BIT && (reload_in_progress || reload_completed)" + "* + { + arm_output_multireg_pop (operands, /*return_pc=*/FALSE, + /*cond=*/const_true_rtx, + /*reverse=*/FALSE, + /*update=*/TRUE); + return \"\"; + } + " + [(set_attr "type" "load4") + (set_attr "predicable" "yes")] +) + +;; Pop with return (as used in epilogue RTL) +;; +;; This instruction is generated when the registers are popped at the end of +;; epilogue. Here, instead of popping the value into LR and then generating +;; jump to LR, value is popped into PC directly. Hence, the pattern is combined +;; with (return). +(define_insn "*pop_multiple_with_writeback_and_return" + [(match_parallel 0 "pop_multiple_return" + [(return) + (set (match_operand:SI 1 "s_register_operand" "+rk") + (plus:SI (match_dup 1) + (match_operand:SI 2 "const_int_operand" "I"))) + (set (match_operand:SI 3 "s_register_operand" "=rk") + (mem:SI (match_dup 1))) + ])] + "TARGET_32BIT && (reload_in_progress || reload_completed)" + "* + { + arm_output_multireg_pop (operands, /*return_pc=*/TRUE, + /*cond=*/const_true_rtx, + /*reverse=*/FALSE, + /*update=*/TRUE); + return \"\"; + } + " + [(set_attr "type" "load4") + (set_attr "predicable" "yes")] +) + +(define_insn "*pop_multiple_with_return" + [(match_parallel 0 "pop_multiple_return" + [(return) + (set (match_operand:SI 2 "s_register_operand" "=rk") + (mem:SI (match_operand:SI 1 "s_register_operand" "rk"))) + ])] + "TARGET_32BIT && (reload_in_progress || reload_completed)" + "* + { + arm_output_multireg_pop (operands, /*return_pc=*/TRUE, + /*cond=*/const_true_rtx, + /*reverse=*/FALSE, + /*update=*/FALSE); + return \"\"; + } + " + [(set_attr "type" "load4") + (set_attr "predicable" "yes")] +) + +;; Load into PC and return +(define_insn "*ldr_with_return" + [(return) + (set (reg:SI PC_REGNUM) + (mem:SI (post_inc:SI (match_operand:SI 0 "s_register_operand" "+rk"))))] + "TARGET_32BIT && (reload_in_progress || reload_completed)" + "ldr%?\t%|pc, [%0], #4" + [(set_attr "type" "load1") + (set_attr "predicable" "yes")] +) ;; Special patterns for dealing with the constant pool (define_insn "align_4" @@ -11390,6 +11473,27 @@ ;; Load the load/store multiple patterns (include "ldmstm.md") + +;; Patterns in ldmstm.md don't cover more than 4 registers. This pattern covers +;; large lists without explicit writeback generated for APCS_FRAME epilogue. +(define_insn "*load_multiple" + [(match_parallel 0 "load_multiple_operation" + [(set (match_operand:SI 2 "s_register_operand" "=rk") + (mem:SI (match_operand:SI 1 "s_register_operand" "rk"))) + ])] + "TARGET_32BIT" + "* + { + arm_output_multireg_pop (operands, /*return_pc=*/FALSE, + /*cond=*/const_true_rtx, + /*reverse=*/FALSE, + /*update=*/FALSE); + return \"\"; + } + " + [(set_attr "predicable" "yes")] +) + ;; Load the FPA co-processor patterns (include "fpa.md") ;; Load the Maverick co-processor patterns diff --git a/gcc/config/arm/predicates.md b/gcc/config/arm/predicates.md index 428f9e0..24dd4ea 100644 --- a/gcc/config/arm/predicates.md +++ b/gcc/config/arm/predicates.md @@ -393,6 +393,14 @@ /*return_pc=*/false); }) +(define_special_predicate "pop_multiple_return" + (match_code "parallel") +{ + return ldm_stm_operation_p (op, /*load=*/true, SImode, + /*consecutive=*/false, + /*return_pc=*/true); +}) + (define_special_predicate "multi_register_push" (match_code "parallel") {