Adds functions emit_msabi_outlined_save and emit_msabi_outlined_restore, which are called from ix86_expand_prologue and ix86_expand_epilogue, respectively. Also adds the code to ix86_expand_call that enables the optimization (setting the machine_function's outline_ms_sysv field). --- gcc/config/i386/i386.c | 298 +++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 279 insertions(+), 19 deletions(-)
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 1dc244e..6345c61 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -13882,6 +13882,103 @@ ix86_elim_entry_set_got (rtx reg) } } +static rtx +gen_frame_set (rtx reg, rtx frame_reg, int offset, bool store) +{ + rtx addr, mem; + + if (offset) + addr = gen_rtx_PLUS (Pmode, frame_reg, GEN_INT (offset)); + mem = gen_frame_mem (GET_MODE (reg), offset ? addr : frame_reg); + return gen_rtx_SET (store ? mem : reg, store ? reg : mem); +} + +static inline rtx +gen_frame_load (rtx reg, rtx frame_reg, int offset) +{ + return gen_frame_set (reg, frame_reg, offset, false); +} + +static inline rtx +gen_frame_store (rtx reg, rtx frame_reg, int offset) +{ + return gen_frame_set (reg, frame_reg, offset, true); +} + +static void +emit_msabi_outlined_save (const struct ix86_frame &frame) +{ + struct machine_function *m = cfun->machine; + const unsigned ncregs = NUM_X86_64_MS_CLOBBERED_REGS + + m->outline_ms_sysv_extra_regs; + rtvec v = rtvec_alloc (ncregs - 1 + 3); + rtx insn, sym, tmp; + rtx rax = gen_rtx_REG (word_mode, AX_REG); + unsigned i = 0; + unsigned j; + const struct xlogue_layout &xlogue = xlogue_layout::get_instance (); + HOST_WIDE_INT stack_used = xlogue.get_stack_space_used (); + HOST_WIDE_INT stack_alloc_size = stack_used; + HOST_WIDE_INT rax_offset = xlogue.get_stub_ptr_offset (); + + /* Verify that the incoming stack 16-byte alignment offset matches the + layout we're using. */ + gcc_assert ((m->fs.sp_offset & 15) == xlogue.get_stack_align_off_in ()); + + sym = xlogue.get_stub_rtx (frame_pointer_needed ? XLOGUE_STUB_SAVE_HFP + : XLOGUE_STUB_SAVE); + RTVEC_ELT (v, i++) = gen_rtx_USE (VOIDmode, sym); + + /* Combine as many other allocations as possible. */ + if (frame.nregs == 0) + { + if (frame.nsseregs == 0) + /* If no other GP or SSE regs, we allocate the whole stack frame. */ + stack_alloc_size = frame.stack_pointer_offset - m->fs.sp_offset; + else + stack_alloc_size = frame.reg_save_offset - m->fs.sp_offset; + + gcc_assert (stack_alloc_size >= stack_used); + } + + if (crtl->stack_realign_needed) + { + int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT; + + gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT); + insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx, stack_pointer_rtx, + GEN_INT (-align_bytes))); + RTX_FRAME_RELATED_P (insn) = 1; + RTVEC_ELT (v, i++) = const1_rtx; + } + else + RTVEC_ELT (v, i++) = const0_rtx; + + tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-rax_offset)); + insn = emit_insn (gen_rtx_SET (rax, tmp)); + + pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, + GEN_INT (-stack_alloc_size), -1, + m->fs.cfa_reg == stack_pointer_rtx); + + for (j = 0; j < ncregs; ++j) + { + const xlogue_layout::reginfo &r = xlogue.get_reginfo (j); + rtx store; + rtx reg; + + reg = gen_rtx_REG (SSE_REGNO_P (r.regno) ? V4SFmode : word_mode, + r.regno); + store = gen_frame_store (reg, rax, -r.offset); + RTVEC_ELT (v, i++) = store; + } + + gcc_assert (i == (unsigned)GET_NUM_ELEM (v)); + + insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, v)); + RTX_FRAME_RELATED_P (insn) = true; +} + /* Expand the prologue into a bunch of separate insns. */ void @@ -14095,6 +14192,11 @@ ix86_expand_prologue (void) } } + /* Call to outlining stub occurs after pushing frame pointer (if it was + needed). */ + if (m->outline_ms_sysv) + emit_msabi_outlined_save (frame); + if (!int_registers_saved) { /* If saving registers via PUSH, do so now. */ @@ -14123,20 +14225,24 @@ ix86_expand_prologue (void) int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT; gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT); - /* The computation of the size of the re-aligned stack frame means - that we must allocate the size of the register save area before - performing the actual alignment. Otherwise we cannot guarantee - that there's enough storage above the realignment point. */ - if (m->fs.sp_offset != frame.sse_reg_save_offset) - pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, - GEN_INT (m->fs.sp_offset - - frame.sse_reg_save_offset), - -1, false); + /* If using stub, stack will have already been aligned. */ + if (!m->outline_ms_sysv) + { + /* The computation of the size of the re-aligned stack frame means + that we must allocate the size of the register save area before + performing the actual alignment. Otherwise we cannot guarantee + that there's enough storage above the realignment point. */ + if (m->fs.sp_offset != frame.sse_reg_save_offset) + pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, + GEN_INT (m->fs.sp_offset + - frame.sse_reg_save_offset), + -1, false); - /* Align the stack. */ - insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx, - stack_pointer_rtx, - GEN_INT (-align_bytes))); + /* Align the stack. */ + insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx, + stack_pointer_rtx, + GEN_INT (-align_bytes))); + } /* For the purposes of register save area addressing, the stack pointer is no longer valid. As for the value of sp_offset, @@ -14466,17 +14572,19 @@ ix86_emit_restore_regs_using_pop (void) unsigned int regno; for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) - if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, false)) + if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, false, true)) ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno)); } -/* Emit code and notes for the LEAVE instruction. */ +/* Emit code and notes for the LEAVE instruction. If insn is non-null, + omits the emit and only attaches the notes. */ static void -ix86_emit_leave (void) +ix86_emit_leave (rtx_insn *insn) { struct machine_function *m = cfun->machine; - rtx_insn *insn = emit_insn (ix86_gen_leave ()); + if (!insn) + insn = emit_insn (ix86_gen_leave ()); ix86_add_queued_cfa_restore_notes (insn); @@ -14568,6 +14676,140 @@ ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset, } } +static void +emit_msabi_outlined_restore (const struct ix86_frame &frame, bool use_call, + int style) +{ + struct machine_function *m = cfun->machine; + const unsigned ncregs = NUM_X86_64_MS_CLOBBERED_REGS + + m->outline_ms_sysv_extra_regs; + unsigned elems_needed = ncregs + 1; + rtvec v; + rtx_insn *insn; + rtx sym, tmp; + rtx rsi = gen_rtx_REG (word_mode, SI_REG); + rtx r10 = NULL_RTX; + rtx cfa_adjust_note = NULL_RTX; + unsigned i = 0; + unsigned j; + const struct xlogue_layout &xlogue = xlogue_layout::get_instance (); + HOST_WIDE_INT stack_restore_offset; + HOST_WIDE_INT stub_ptr_offset = xlogue.get_stub_ptr_offset (); + HOST_WIDE_INT rsi_offset; + rtx rsi_frame_load = NULL_RTX; + HOST_WIDE_INT rsi_restore_offset = 0x7fffffff; + enum xlogue_stub stub; + + stack_restore_offset = m->fs.sp_offset - frame.hard_frame_pointer_offset; + rsi_offset = stack_restore_offset - stub_ptr_offset; + gcc_assert (!m->fs.fp_valid || frame_pointer_needed); + tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (rsi_offset)); + insn = emit_insn (gen_rtx_SET (rsi, tmp)); + + if (frame_pointer_needed) + stub = use_call ? XLOGUE_STUB_RESTORE_HFP + : XLOGUE_STUB_RESTORE_HFP_TAIL; + else + stub = use_call ? XLOGUE_STUB_RESTORE + : XLOGUE_STUB_RESTORE_TAIL; + + sym = xlogue.get_stub_rtx (stub); + + if (!use_call) + elems_needed += frame_pointer_needed ? 2 : 3; + v = rtvec_alloc (elems_needed); + + /* If: we need to pop incoming args or a sibling call will follow, then + we want to call the epilogue stub instead of jumping to it. */ + if (use_call) + RTVEC_ELT (v, i++) = gen_rtx_USE (VOIDmode, sym); + else + { + RTVEC_ELT (v, i++) = ret_rtx; + RTVEC_ELT (v, i++) = gen_rtx_USE (VOIDmode, sym); + if (!frame_pointer_needed) + { + gcc_assert (!m->fs.fp_valid); + gcc_assert (m->fs.cfa_reg == stack_pointer_rtx); + gcc_assert (m->fs.sp_valid); + + tmp = GEN_INT (stub_ptr_offset); + tmp = gen_rtx_PLUS (Pmode, rsi, tmp); + r10 = gen_rtx_REG (DImode, R10_REG); + insn = emit_insn (gen_rtx_SET (r10, tmp)); + RTVEC_ELT (v, i++) = const0_rtx; + } + else + { + gcc_assert (m->fs.fp_valid); + gcc_assert (m->fs.cfa_reg == hard_frame_pointer_rtx); + + RTVEC_ELT (v, i++) = const1_rtx; + } + } + + for (j = 0; j < ncregs; ++j) + { + const xlogue_layout::reginfo &r = xlogue.get_reginfo (j); + enum machine_mode mode = SSE_REGNO_P (r.regno) ? V4SFmode : word_mode; + rtx reg, restore_note; + + reg = gen_rtx_REG (mode, r.regno); + restore_note = gen_frame_load (reg, rsi, r.offset); + + /* Save RSI frame load insn & note to add later. */ + if (r.regno == SI_REG) + { + gcc_assert (!rsi_frame_load); + rsi_frame_load = restore_note; + rsi_restore_offset = r.offset; + } + else + { + RTVEC_ELT (v, i++) = restore_note; + ix86_add_cfa_restore_note (NULL, reg, r.offset); + } + } + + /* Add RSI frame load & restore note at the end. */ + gcc_assert (rsi_frame_load); + RTVEC_ELT (v, i++) = rsi_frame_load; + ix86_add_cfa_restore_note (NULL, gen_rtx_REG (DImode, SI_REG), + rsi_restore_offset); + + /* Finally, for tail-call w/o a hard frame pointer, set SP to R10. */ + if (!use_call && !frame_pointer_needed) + { + cfa_adjust_note = gen_rtx_SET(stack_pointer_rtx, r10); + RTVEC_ELT (v, i++) = cfa_adjust_note; + m->fs.cfa_offset -= stack_restore_offset; + m->fs.sp_offset -= stack_restore_offset; + } + + gcc_assert (i == (unsigned)GET_NUM_ELEM (v)); + tmp = gen_rtx_PARALLEL (VOIDmode, v); + if (use_call) + insn = emit_insn (tmp); + else + { + insn = emit_jump_insn (tmp); + JUMP_LABEL (insn) = ret_rtx; + + if (frame_pointer_needed) + ix86_emit_leave (insn); + else + add_reg_note (insn, REG_CFA_ADJUST_CFA, cfa_adjust_note); + } + + RTX_FRAME_RELATED_P (insn) = true; + ix86_add_queued_cfa_restore_notes (insn); + + if (use_call) + pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, + GEN_INT (stack_restore_offset), style, + m->fs.cfa_reg == stack_pointer_rtx); +} + /* Restore function stack, frame, and registers. */ void @@ -14578,6 +14820,7 @@ ix86_expand_epilogue (int style) struct ix86_frame frame; bool restore_regs_via_mov; bool using_drap; + bool restore_stub_uses_call = false; ix86_finalize_stack_realign_flags (); ix86_compute_frame_layout (&frame); @@ -14782,6 +15025,10 @@ ix86_expand_epilogue (int style) - frame.reg_save_offset), style, false); } + /* If using an out-of-lined stub and there are no int regs to restore + inline then we want to let the stub handle the stack restore. */ + else if (m->outline_ms_sysv && !frame.nregs) + ; else if (m->fs.sp_offset != frame.reg_save_offset) { pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, @@ -14794,6 +15041,15 @@ ix86_expand_epilogue (int style) ix86_emit_restore_regs_using_pop (); } + if (m->outline_ms_sysv) + { + int popc = crtl->args.pops_args && crtl->args.size ? crtl->args.size : 0; + + restore_stub_uses_call = popc || style == 0 || (m->fs.fp_valid + && !crtl->stack_realign_needed); + emit_msabi_outlined_restore (frame, restore_stub_uses_call, style); + } + /* If we used a stack pointer and haven't already got rid of it, then do so now. */ if (m->fs.fp_valid) @@ -14807,7 +15063,7 @@ ix86_expand_epilogue (int style) else if (TARGET_USE_LEAVE || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun)) || !cfun->machine->use_fast_prologue_epilogue) - ix86_emit_leave (); + ix86_emit_leave (NULL); else { pro_epilogue_adjust_stack (stack_pointer_rtx, @@ -14917,7 +15173,7 @@ ix86_expand_epilogue (int style) else emit_jump_insn (gen_simple_return_pop_internal (popc)); } - else + else if (!m->outline_ms_sysv || restore_stub_uses_call) emit_jump_insn (gen_simple_return_internal ()); /* Restore the state back to the state from the prologue, @@ -28568,6 +28824,10 @@ ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1, clobber_reg (&use, gen_rtx_REG (mode, regno)); } + + /* Set here, but it may get cleared later. */ + if (TARGET_OUTLINE_MSABI_XLOGUES) + cfun->machine->outline_ms_sysv = true; } if (vec_len > 1) -- 2.9.0