ping Wilco Dijkstra wrote: > James Greenhalgh wrote: > > > I note this is still marked as an RFC, are you now proposing it as a > > patch to be merged to trunk? > > Absolutely. It was marked as an RFC to get some comments - I thought it > may be controversial to separate the frame pointer and frame chain concept. > And this fixes the long standing bugs caused by changing the global frame > pointer option to an incorrect value for the leaf function optimization.
Here is a rebased version that should patch without merge issues: Cleanup frame pointer usage. Introduce a boolean emit_frame_chain which determines whether to store FP and LR and setup FP to point at this record. When the frame pointer is enabled but not strictly required (eg. no use of alloca), we emit a frame chain in non-leaf functions, but don't use the frame pointer to access locals. This results in smaller code and unwind info. Simplify the logic in aarch64_override_options_after_change_1 () and compute whether the frame chain is required in aarch64_layout_frame () instead. As a result aarch64_frame_pointer_required is now redundant. Convert all callee save/restore functions to use gen_frame_mem. Bootstrap OK. ChangeLog: 2017-06-15 Wilco Dijkstra <wdijk...@arm.com> gcc/ PR middle-end/60580 * config/aarch64/aarch64.h (aarch64_frame): Add emit_frame_chain boolean. * config/aarch64/aarch64.c (aarch64_frame_pointer_required) Remove. (aarch64_layout_frame): Initialise emit_frame_chain. (aarch64_pushwb_single_reg): Use gen_frame_mem. (aarch64_pop_regs): Likewise. (aarch64_gen_load_pair): Likewise. (aarch64_save_callee_saves): Likewise. (aarch64_restore_callee_saves): Likewise. (aarch64_expand_prologue): Use emit_frame_chain. (aarch64_can_eliminate): Simplify. When FP needed or outgoing arguments are large, eliminate to FP, otherwise SP. (aarch64_override_options_after_change_1): Simplify. (TARGET_FRAME_POINTER_REQUIRED): Remove define. -- diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h index 08acdeb52d4083f50a4b44f43fb98009cdcc041f..722c39cfc4d57280d621fb6130e4d9f4d59d1e72 100644 --- a/gcc/config/aarch64/aarch64.h +++ b/gcc/config/aarch64/aarch64.h @@ -591,6 +591,9 @@ struct GTY (()) aarch64_frame /* The size of the stack adjustment after saving callee-saves. */ HOST_WIDE_INT final_adjust; + /* Store FP,LR and setup a frame pointer. */ + bool emit_frame_chain; + unsigned wb_candidate1; unsigned wb_candidate2; diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index fd3005d8056e65cb32c92bbd5eb752c977c885a5..a97b4bbe9dc0f7bccc90a9337519038041241531 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -2761,24 +2761,6 @@ aarch64_output_probe_stack_range (rtx reg1, rtx reg2) return ""; } -static bool -aarch64_frame_pointer_required (void) -{ - /* In aarch64_override_options_after_change - flag_omit_leaf_frame_pointer turns off the frame pointer by - default. Turn it back on now if we've not got a leaf - function. */ - if (flag_omit_leaf_frame_pointer - && (!crtl->is_leaf || df_regs_ever_live_p (LR_REGNUM))) - return true; - - /* Force a frame pointer for EH returns so the return address is at FP+8. */ - if (crtl->calls_eh_return) - return true; - - return false; -} - /* Mark the registers that need to be saved by the callee and calculate the size of the callee-saved registers area and frame record (both FP and LR may be omitted). */ @@ -2791,6 +2773,18 @@ aarch64_layout_frame (void) if (reload_completed && cfun->machine->frame.laid_out) return; + /* Force a frame chain for EH returns so the return address is at FP+8. */ + cfun->machine->frame.emit_frame_chain + = frame_pointer_needed || crtl->calls_eh_return; + + /* Emit a frame chain if the frame pointer is enabled. + If -momit-leaf-frame-pointer is used, do not use a frame chain + in leaf functions which do not use LR. */ + if (flag_omit_frame_pointer == 2 + && !(flag_omit_leaf_frame_pointer && crtl->is_leaf + && !df_regs_ever_live_p (LR_REGNUM))) + cfun->machine->frame.emit_frame_chain = true; + #define SLOT_NOT_REQUIRED (-2) #define SLOT_REQUIRED (-1) @@ -2825,7 +2819,7 @@ aarch64_layout_frame (void) last_fp_reg = regno; } - if (frame_pointer_needed) + if (cfun->machine->frame.emit_frame_chain) { /* FP and LR are placed in the linkage record. */ cfun->machine->frame.reg_offset[R29_REGNUM] = 0; @@ -2997,7 +2991,7 @@ aarch64_pushwb_single_reg (machine_mode mode, unsigned regno, reg = gen_rtx_REG (mode, regno); mem = gen_rtx_PRE_MODIFY (Pmode, base_rtx, plus_constant (Pmode, base_rtx, -adjustment)); - mem = gen_rtx_MEM (mode, mem); + mem = gen_frame_mem (mode, mem); insn = emit_move_insn (mem, reg); RTX_FRAME_RELATED_P (insn) = 1; @@ -3085,7 +3079,7 @@ aarch64_pop_regs (unsigned regno1, unsigned regno2, HOST_WIDE_INT adjustment, { rtx mem = plus_constant (Pmode, stack_pointer_rtx, adjustment); mem = gen_rtx_POST_MODIFY (Pmode, stack_pointer_rtx, mem); - emit_move_insn (reg1, gen_rtx_MEM (mode, mem)); + emit_move_insn (reg1, gen_frame_mem (mode, mem)); } else { @@ -3161,8 +3155,6 @@ aarch64_save_callee_saves (machine_mode mode, HOST_WIDE_INT start_offset, unsigned start, unsigned limit, bool skip_wb) { rtx_insn *insn; - rtx (*gen_mem_ref) (machine_mode, rtx) = (frame_pointer_needed - ? gen_frame_mem : gen_rtx_MEM); unsigned regno; unsigned regno2; @@ -3183,8 +3175,8 @@ aarch64_save_callee_saves (machine_mode mode, HOST_WIDE_INT start_offset, reg = gen_rtx_REG (mode, regno); offset = start_offset + cfun->machine->frame.reg_offset[regno]; - mem = gen_mem_ref (mode, plus_constant (Pmode, stack_pointer_rtx, - offset)); + mem = gen_frame_mem (mode, plus_constant (Pmode, stack_pointer_rtx, + offset)); regno2 = aarch64_next_callee_save (regno + 1, limit); @@ -3198,8 +3190,8 @@ aarch64_save_callee_saves (machine_mode mode, HOST_WIDE_INT start_offset, rtx mem2; offset = start_offset + cfun->machine->frame.reg_offset[regno2]; - mem2 = gen_mem_ref (mode, plus_constant (Pmode, stack_pointer_rtx, - offset)); + mem2 = gen_frame_mem (mode, plus_constant (Pmode, stack_pointer_rtx, + offset)); insn = emit_insn (aarch64_gen_store_pair (mode, mem, reg, mem2, reg2)); @@ -3228,8 +3220,6 @@ aarch64_restore_callee_saves (machine_mode mode, unsigned limit, bool skip_wb, rtx *cfi_ops) { rtx base_rtx = stack_pointer_rtx; - rtx (*gen_mem_ref) (machine_mode, rtx) = (frame_pointer_needed - ? gen_frame_mem : gen_rtx_MEM); unsigned regno; unsigned regno2; HOST_WIDE_INT offset; @@ -3250,7 +3240,7 @@ aarch64_restore_callee_saves (machine_mode mode, reg = gen_rtx_REG (mode, regno); offset = start_offset + cfun->machine->frame.reg_offset[regno]; - mem = gen_mem_ref (mode, plus_constant (Pmode, base_rtx, offset)); + mem = gen_frame_mem (mode, plus_constant (Pmode, base_rtx, offset)); regno2 = aarch64_next_callee_save (regno + 1, limit); @@ -3263,7 +3253,7 @@ aarch64_restore_callee_saves (machine_mode mode, rtx mem2; offset = start_offset + cfun->machine->frame.reg_offset[regno2]; - mem2 = gen_mem_ref (mode, plus_constant (Pmode, base_rtx, offset)); + mem2 = gen_frame_mem (mode, plus_constant (Pmode, base_rtx, offset)); emit_insn (aarch64_gen_load_pair (mode, reg, mem, reg2, mem2)); *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg2, *cfi_ops); @@ -3568,6 +3558,7 @@ aarch64_expand_prologue (void) HOST_WIDE_INT callee_offset = cfun->machine->frame.callee_offset; unsigned reg1 = cfun->machine->frame.wb_candidate1; unsigned reg2 = cfun->machine->frame.wb_candidate2; + bool emit_frame_chain = cfun->machine->frame.emit_frame_chain; rtx_insn *insn; /* Sign return address for functions. */ @@ -3598,7 +3589,7 @@ aarch64_expand_prologue (void) if (callee_adjust != 0) aarch64_push_regs (reg1, reg2, callee_adjust); - if (frame_pointer_needed) + if (emit_frame_chain) { if (callee_adjust == 0) aarch64_save_callee_saves (DImode, callee_offset, R29_REGNUM, @@ -3606,12 +3597,12 @@ aarch64_expand_prologue (void) insn = emit_insn (gen_add3_insn (hard_frame_pointer_rtx, stack_pointer_rtx, GEN_INT (callee_offset))); - RTX_FRAME_RELATED_P (insn) = 1; + RTX_FRAME_RELATED_P (insn) = frame_pointer_needed; emit_insn (gen_stack_tie (stack_pointer_rtx, hard_frame_pointer_rtx)); } aarch64_save_callee_saves (DImode, callee_offset, R0_REGNUM, R30_REGNUM, - callee_adjust != 0 || frame_pointer_needed); + callee_adjust != 0 || emit_frame_chain); aarch64_save_callee_saves (DFmode, callee_offset, V0_REGNUM, V31_REGNUM, callee_adjust != 0 || frame_pointer_needed); aarch64_sub_sp (IP1_REGNUM, final_adjust, !frame_pointer_needed); @@ -5663,36 +5654,13 @@ aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x, } static bool -aarch64_can_eliminate (const int from, const int to) +aarch64_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to) { - /* If we need a frame pointer, we must eliminate FRAME_POINTER_REGNUM into - HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM. */ - - if (frame_pointer_needed) - { - if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM) - return true; - if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM) - return false; - if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM - && !cfun->calls_alloca) - return true; - if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM) - return true; - - return false; - } - else - { - /* If we decided that we didn't need a leaf frame pointer but then used - LR in the function, then we'll want a frame pointer after all, so - prevent this elimination to ensure a frame pointer is used. */ - if (to == STACK_POINTER_REGNUM - && flag_omit_leaf_frame_pointer - && df_regs_ever_live_p (LR_REGNUM)) - return false; - } - + /* If we need a frame pointer, eliminate to HARD_FRAME_POINTER_REGNUM. + Use FP as well as with a large number of outgoing arguments so + that stack offsets are smaller - this may generate better code. */ + if (frame_pointer_needed || crtl->outgoing_args_size > 64) + return to == HARD_FRAME_POINTER_REGNUM; return true; } @@ -8636,24 +8604,13 @@ aarch64_parse_override_string (const char* input_string, static void aarch64_override_options_after_change_1 (struct gcc_options *opts) { - /* The logic here is that if we are disabling all frame pointer generation - then we do not need to disable leaf frame pointer generation as a - separate operation. But if we are *only* disabling leaf frame pointer - generation then we set flag_omit_frame_pointer to true, but in - aarch64_frame_pointer_required we return false only for leaf functions. - - PR 70044: We have to be careful about being called multiple times for the - same function. Once we have decided to set flag_omit_frame_pointer just - so that we can omit leaf frame pointers, we must then not interpret a - second call as meaning that all frame pointer generation should be - omitted. We do this by setting flag_omit_frame_pointer to a special, - non-zero value. */ - if (opts->x_flag_omit_frame_pointer == 2) - opts->x_flag_omit_frame_pointer = 0; - - if (opts->x_flag_omit_frame_pointer) - opts->x_flag_omit_leaf_frame_pointer = false; - else if (opts->x_flag_omit_leaf_frame_pointer) + /* PR 70044: We have to be careful about being called multiple times for the + same function. This means all changes should be repeatable. */ + + /* If the frame pointer is enabled, set the flag to a special value. + To implement -momit-leaf-frame-pointer this special value is checked in + aarch64_layout_frame(). The frame chain is emitted only when required. */ + if (opts->x_flag_omit_frame_pointer == 0) opts->x_flag_omit_frame_pointer = 2; /* If not optimizing for size, set the default @@ -15039,9 +14996,6 @@ aarch64_run_selftests (void) #undef TARGET_FUNCTION_VALUE_REGNO_P #define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p -#undef TARGET_FRAME_POINTER_REQUIRED -#define TARGET_FRAME_POINTER_REQUIRED aarch64_frame_pointer_required - #undef TARGET_GIMPLE_FOLD_BUILTIN #define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin