ping From: Wilco Dijkstra Sent: 31 October 2016 18:29 To: GCC Patches Cc: nd Subject: [RFC][PATCH][AArch64] Cleanup frame pointer usage This patch cleans up all code related to the frame pointer. On AArch64 we emit a frame chain even in cases where the frame pointer is not required. So make this explicit by introducing a boolean emit_frame_chain in aarch64_frame record.
When the frame pointer is enabled but not strictly required (eg. no use of alloca), we emit a frame chain in non-leaf functions, but continue to use the stack pointer to access locals. This results in smaller code and unwind info. Also simplify the complex logic in aarch64_override_options_after_change_1 and compute whether the frame chain is required in aarch64_layout_frame instead. As a result aarch64_frame_pointer_required is now redundant and aarch64_can_eliminate can be greatly simplified. Finally convert all callee save/restore functions to use gen_frame_mem. Bootstrap OK. Any comments? ChangeLog: 2016-10-31 Wilco Dijkstra <wdijk...@arm.com> gcc/ * config/aarch64/aarch64.h (aarch64_frame): Add emit_frame_chain boolean. * config/aarch64/aarch64.c (aarch64_frame_pointer_required) Remove. (aarch64_layout_frame): Initialise emit_frame_chain. (aarch64_pushwb_single_reg): Use gen_frame_mem. (aarch64_pop_regs): Likewise. (aarch64_gen_load_pair): Likewise. (aarch64_save_callee_saves): Likewise. (aarch64_restore_callee_saves): Likewise. (aarch64_expand_prologue): Use emit_frame_chain. (aarch64_can_eliminate): Simplify. When FP needed or outgoing arguments are large, eliminate to FP, otherwise SP. (aarch64_override_options_after_change_1): Simplify. (TARGET_FRAME_POINTER_REQUIRED): Remove define. -- diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h index fa81e4b853dafcccc08842955288861ec7e7acca..6e32dc9f6f171dde0c182fdd7857230251f71712 100644 --- a/gcc/config/aarch64/aarch64.h +++ b/gcc/config/aarch64/aarch64.h @@ -583,6 +583,9 @@ struct GTY (()) aarch64_frame /* The size of the stack adjustment after saving callee-saves. */ HOST_WIDE_INT final_adjust; + /* Store FP,LR and setup a frame pointer. */ + bool emit_frame_chain; + unsigned wb_candidate1; unsigned wb_candidate2; diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index f07d771ea343803e054e03f59c8c1efb698bf474..6c06ac18d16f8afa7ee1cc5e8530e285a60e2b0f 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -2728,24 +2728,6 @@ aarch64_output_probe_stack_range (rtx reg1, rtx reg2) return ""; } -static bool -aarch64_frame_pointer_required (void) -{ - /* In aarch64_override_options_after_change - flag_omit_leaf_frame_pointer turns off the frame pointer by - default. Turn it back on now if we've not got a leaf - function. */ - if (flag_omit_leaf_frame_pointer - && (!crtl->is_leaf || df_regs_ever_live_p (LR_REGNUM))) - return true; - - /* Force a frame pointer for EH returns so the return address is at FP+8. */ - if (crtl->calls_eh_return) - return true; - - return false; -} - /* Mark the registers that need to be saved by the callee and calculate the size of the callee-saved registers area and frame record (both FP and LR may be omitted). */ @@ -2758,6 +2740,18 @@ aarch64_layout_frame (void) if (reload_completed && cfun->machine->frame.laid_out) return; + /* Force a frame chain for EH returns so the return address is at FP+8. */ + cfun->machine->frame.emit_frame_chain + = frame_pointer_needed || crtl->calls_eh_return; + + /* Emit a frame chain if the frame pointer is enabled. + If -momit-leaf-frame-pointer is used, do not use a frame chain + in leaf functions which do not use LR. */ + if (flag_omit_frame_pointer == 2 + && !(flag_omit_leaf_frame_pointer && crtl->is_leaf + && !df_regs_ever_live_p (LR_REGNUM))) + cfun->machine->frame.emit_frame_chain = true; + #define SLOT_NOT_REQUIRED (-2) #define SLOT_REQUIRED (-1) @@ -2789,7 +2783,7 @@ aarch64_layout_frame (void) && !call_used_regs[regno]) cfun->machine->frame.reg_offset[regno] = SLOT_REQUIRED; - if (frame_pointer_needed) + if (cfun->machine->frame.emit_frame_chain) { /* FP and LR are placed in the linkage record. */ cfun->machine->frame.reg_offset[R29_REGNUM] = 0; @@ -2937,7 +2931,7 @@ aarch64_pushwb_single_reg (machine_mode mode, unsigned regno, reg = gen_rtx_REG (mode, regno); mem = gen_rtx_PRE_MODIFY (Pmode, base_rtx, plus_constant (Pmode, base_rtx, -adjustment)); - mem = gen_rtx_MEM (mode, mem); + mem = gen_frame_mem (mode, mem); insn = emit_move_insn (mem, reg); RTX_FRAME_RELATED_P (insn) = 1; @@ -3011,7 +3005,7 @@ aarch64_pop_regs (unsigned regno1, unsigned regno2, HOST_WIDE_INT adjustment, { rtx mem = plus_constant (Pmode, stack_pointer_rtx, adjustment); mem = gen_rtx_POST_MODIFY (Pmode, stack_pointer_rtx, mem); - emit_move_insn (reg1, gen_rtx_MEM (mode, mem)); + emit_move_insn (reg1, gen_frame_mem (mode, mem)); } else { @@ -3062,8 +3056,6 @@ aarch64_save_callee_saves (machine_mode mode, HOST_WIDE_INT start_offset, unsigned start, unsigned limit, bool skip_wb) { rtx_insn *insn; - rtx (*gen_mem_ref) (machine_mode, rtx) = (frame_pointer_needed - ? gen_frame_mem : gen_rtx_MEM); unsigned regno; unsigned regno2; @@ -3081,8 +3073,8 @@ aarch64_save_callee_saves (machine_mode mode, HOST_WIDE_INT start_offset, reg = gen_rtx_REG (mode, regno); offset = start_offset + cfun->machine->frame.reg_offset[regno]; - mem = gen_mem_ref (mode, plus_constant (Pmode, stack_pointer_rtx, - offset)); + mem = gen_frame_mem (mode, plus_constant (Pmode, stack_pointer_rtx, + offset)); regno2 = aarch64_next_callee_save (regno + 1, limit); @@ -3095,8 +3087,8 @@ aarch64_save_callee_saves (machine_mode mode, HOST_WIDE_INT start_offset, rtx mem2; offset = start_offset + cfun->machine->frame.reg_offset[regno2]; - mem2 = gen_mem_ref (mode, plus_constant (Pmode, stack_pointer_rtx, - offset)); + mem2 = gen_frame_mem (mode, plus_constant (Pmode, stack_pointer_rtx, + offset)); insn = emit_insn (aarch64_gen_store_pair (mode, mem, reg, mem2, reg2)); @@ -3120,8 +3112,6 @@ aarch64_restore_callee_saves (machine_mode mode, unsigned limit, bool skip_wb, rtx *cfi_ops) { rtx base_rtx = stack_pointer_rtx; - rtx (*gen_mem_ref) (machine_mode, rtx) = (frame_pointer_needed - ? gen_frame_mem : gen_rtx_MEM); unsigned regno; unsigned regno2; HOST_WIDE_INT offset; @@ -3139,7 +3129,7 @@ aarch64_restore_callee_saves (machine_mode mode, reg = gen_rtx_REG (mode, regno); offset = start_offset + cfun->machine->frame.reg_offset[regno]; - mem = gen_mem_ref (mode, plus_constant (Pmode, base_rtx, offset)); + mem = gen_frame_mem (mode, plus_constant (Pmode, base_rtx, offset)); regno2 = aarch64_next_callee_save (regno + 1, limit); @@ -3151,7 +3141,7 @@ aarch64_restore_callee_saves (machine_mode mode, rtx mem2; offset = start_offset + cfun->machine->frame.reg_offset[regno2]; - mem2 = gen_mem_ref (mode, plus_constant (Pmode, base_rtx, offset)); + mem2 = gen_frame_mem (mode, plus_constant (Pmode, base_rtx, offset)); emit_insn (aarch64_gen_load_pair (mode, reg, mem, reg2, mem2)); *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg2, *cfi_ops); @@ -3217,6 +3207,7 @@ aarch64_expand_prologue (void) HOST_WIDE_INT callee_offset = cfun->machine->frame.callee_offset; unsigned reg1 = cfun->machine->frame.wb_candidate1; unsigned reg2 = cfun->machine->frame.wb_candidate2; + bool emit_frame_chain = cfun->machine->frame.emit_frame_chain; rtx_insn *insn; if (flag_stack_usage_info) @@ -3239,7 +3230,7 @@ aarch64_expand_prologue (void) if (callee_adjust != 0) aarch64_push_regs (reg1, reg2, callee_adjust); - if (frame_pointer_needed) + if (emit_frame_chain) { if (callee_adjust == 0) aarch64_save_callee_saves (DImode, callee_offset, R29_REGNUM, @@ -3247,12 +3238,12 @@ aarch64_expand_prologue (void) insn = emit_insn (gen_add3_insn (hard_frame_pointer_rtx, stack_pointer_rtx, GEN_INT (callee_offset))); - RTX_FRAME_RELATED_P (insn) = 1; + RTX_FRAME_RELATED_P (insn) = frame_pointer_needed; emit_insn (gen_stack_tie (stack_pointer_rtx, hard_frame_pointer_rtx)); } aarch64_save_callee_saves (DImode, callee_offset, R0_REGNUM, R30_REGNUM, - callee_adjust != 0 || frame_pointer_needed); + callee_adjust != 0 || emit_frame_chain); aarch64_save_callee_saves (DFmode, callee_offset, V0_REGNUM, V31_REGNUM, callee_adjust != 0 || frame_pointer_needed); aarch64_sub_sp (IP1_REGNUM, final_adjust, !frame_pointer_needed); @@ -5157,36 +5148,13 @@ aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x, } static bool -aarch64_can_eliminate (const int from, const int to) +aarch64_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to) { - /* If we need a frame pointer, we must eliminate FRAME_POINTER_REGNUM into - HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM. */ - - if (frame_pointer_needed) - { - if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM) - return true; - if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM) - return false; - if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM - && !cfun->calls_alloca) - return true; - if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM) - return true; - - return false; - } - else - { - /* If we decided that we didn't need a leaf frame pointer but then used - LR in the function, then we'll want a frame pointer after all, so - prevent this elimination to ensure a frame pointer is used. */ - if (to == STACK_POINTER_REGNUM - && flag_omit_leaf_frame_pointer - && df_regs_ever_live_p (LR_REGNUM)) - return false; - } - + /* If we need a frame pointer, eliminate to HARD_FRAME_POINTER_REGNUM. + Use FP as well as with a large number of outgoing arguments so + that stack offsets are smaller - this may generate better code. */ + if (frame_pointer_needed || crtl->outgoing_args_size > 64) + return to == HARD_FRAME_POINTER_REGNUM; return true; } @@ -8112,24 +8080,13 @@ aarch64_parse_override_string (const char* input_string, static void aarch64_override_options_after_change_1 (struct gcc_options *opts) { - /* The logic here is that if we are disabling all frame pointer generation - then we do not need to disable leaf frame pointer generation as a - separate operation. But if we are *only* disabling leaf frame pointer - generation then we set flag_omit_frame_pointer to true, but in - aarch64_frame_pointer_required we return false only for leaf functions. - - PR 70044: We have to be careful about being called multiple times for the - same function. Once we have decided to set flag_omit_frame_pointer just - so that we can omit leaf frame pointers, we must then not interpret a - second call as meaning that all frame pointer generation should be - omitted. We do this by setting flag_omit_frame_pointer to a special, - non-zero value. */ - if (opts->x_flag_omit_frame_pointer == 2) - opts->x_flag_omit_frame_pointer = 0; - - if (opts->x_flag_omit_frame_pointer) - opts->x_flag_omit_leaf_frame_pointer = false; - else if (opts->x_flag_omit_leaf_frame_pointer) + /* PR 70044: We have to be careful about being called multiple times for the + same function. This means all changes should be repeatable. */ + + /* If the frame pointer is enabled, set the flag to a special value. + To implement -momit-leaf-frame-pointer this special value is checked in + aarch64_layout_frame. The frame chain is emitted only when required. */ + if (opts->x_flag_omit_frame_pointer == 0) opts->x_flag_omit_frame_pointer = 2; /* If not optimizing for size, set the default @@ -14126,9 +14083,6 @@ aarch64_optab_supported_p (int op, machine_mode mode1, machine_mode, #undef TARGET_FUNCTION_VALUE_REGNO_P #define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p -#undef TARGET_FRAME_POINTER_REQUIRED -#define TARGET_FRAME_POINTER_REQUIRED aarch64_frame_pointer_required - #undef TARGET_GIMPLE_FOLD_BUILTIN #define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin