It would probably be useful to post the actual code. The below function
emit_msabi_outlined_restore() is is called from ix86_expand_epilogue()
to emit the RTL to call the restore stub. Like ix86_expand_epilogue, it
uses style == 0 to indicate that there will be a sibling call following
the epilogue, so we will call the stub rather than jmp. But it also uses
a call if we need to pop incoming args or are using a hard frame pointer.
The problem appears to be the lack of a function declaration causing
get_call_reg_set_usage() (in final.c) to use the target default
"regs_invalidated_by_call" value instead of what I've supplied with
add_function_usage_to() and the gen_frame_load() insns for each register
restored. I'm developing on 5.4.0 since I need a known good compiler for
Wine testing and I plan to rebase it later.
static bool
emit_msabi_outlined_restore (const struct ix86_frame &frame, bool use_call,
int style)
{
struct machine_function *m = cfun->machine;
const unsigned ncregs = NUM_X86_64_MS_CLOBBERED_REGS
+ m->outline_ms_sysv_extra_regs;
rtvec v = rtvec_alloc (ncregs - 1 + (use_call ? 3 : 5));
rtx insn, sym, tmp;
rtx rsi = gen_rtx_REG (word_mode, SI_REG);
rtx use = NULL_RTX;
rtx note = NULL_RTX;
unsigned i = 0;
const struct xlogue_layout &xlogue = xlogue_layout::get_instance ();
HOST_WIDE_INT stack_restore_offset;
HOST_WIDE_INT reg_data_offset;
HOST_WIDE_INT rsi_offset;
rtx rsi_frame_load = NULL_RTX;
HOST_WIDE_INT rsi_restore_offset = 0x7fffffff;
const typeof (xlogue.regs[0]) *ri;
gcc_assert (m->fs.sp_valid);
stack_restore_offset = m->fs.sp_offset - frame.hard_frame_pointer_offset;
rsi_offset = stack_restore_offset - xlogue.get_offset ();
reg_data_offset = stack_restore_offset;
/* adjust for alignment */
if (m->outline_ms_sysv_offset_in)
reg_data_offset -= UNITS_PER_WORD;
tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT(rsi_offset));
insn = emit_insn (gen_rtx_SET (VOIDmode, rsi, tmp));
use_reg (&use, rsi);
/* construct restore_multiple/restore_multiple_and_return insn */
sym = xlogue.get_stub_rtx (use_call ? XLOGUE_STUB_RESTORE
: XLOGUE_STUB_RESTORE_RET);
/* Verify that note queue is empty. */
gcc_assert(!queued_cfa_restores);
/* If:
* we need to pop incoming args,
* this is a sibcall, or
* we have a hard frame pointer
then we want to call the epilogue stub instead of jumping to it. */
if (use_call)
{
tmp = gen_rtx_MEM (QImode, sym);
RTVEC_ELT (v, i++) = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
}
else
{
rtx r10;
RTVEC_ELT (v, i++) = ret_rtx;
RTVEC_ELT (v, i++) = gen_rtx_USE (VOIDmode, sym);
tmp = GEN_INT(stack_restore_offset);
tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, tmp);
r10 = gen_rtx_REG (DImode, R10_REG);
RTVEC_ELT (v, i++) = gen_rtx_SET (VOIDmode, r10, tmp);
gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
gcc_assert (m->fs.sp_valid);
m->fs.sp_offset -= stack_restore_offset;
note = gen_rtx_PLUS (Pmode, stack_pointer_rtx,
GEN_INT(stack_restore_offset));
note = gen_rtx_SET (VOIDmode, stack_pointer_rtx, note);
}
RTVEC_ELT (v, i++) = gen_rtx_CLOBBER (VOIDmode,
gen_rtx_REG (CCmode, FLAGS_REG));
for (ri = &xlogue.regs[0]; ri != &xlogue.regs[ncregs]; ++ri)
{
enum machine_mode mode = SSE_REGNO_P(ri->regno) ? V4SFmode :
word_mode;
rtx reg, restore_note;
HOST_WIDE_INT offset = ri->offset - 0x70;
reg = gen_rtx_REG (mode, ri->regno);
restore_note = gen_frame_load (reg, rsi, offset);
/* Make sure RSI frame load/restore note is last */
/* TODO: Do I really need to reorder this? */
if (ri->regno == SI_REG)
{
gcc_assert (!rsi_frame_load);
rsi_frame_load = restore_note;
rsi_restore_offset = offset;
}
else
{
RTVEC_ELT (v, i++) = restore_note;
ix86_add_cfa_restore_note (NULL_RTX, reg, offset);
}
}
/* add frame load & restore note for RSI last */
gcc_assert (rsi_frame_load);
RTVEC_ELT (v, i++) = rsi_frame_load;
ix86_add_cfa_restore_note (NULL_RTX, gen_rtx_REG (DImode, SI_REG),
rsi_restore_offset);
gcc_assert (i == (unsigned)GET_NUM_ELEM (v));
tmp = gen_rtx_PARALLEL (VOIDmode, v);
if (use_call)
{
insn = emit_call_insn (tmp);
add_reg_note (insn, REG_CALL_DECL, sym);
add_function_usage_to (insn, use);
}
else
{
insn = emit_jump_insn (tmp);
JUMP_LABEL (insn) = ret_rtx;
add_reg_note(insn, REG_CFA_ADJUST_CFA, note);
}
RTX_FRAME_RELATED_P(insn) = true;
ix86_add_queued_cfa_restore_notes (insn);
if (use_call)
pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
GEN_INT (stack_restore_offset), style,
m->fs.cfa_reg == stack_pointer_rtx);
return use_call;
}
Finally, these are the actual stubs:
#ifdef __x86_64__
# ifdef __ELF__
# define ELFFN(fn) .type fn,@function
# else
# define ELFFN(fn)
# endif
# define HIDDEN_FUNC(fn) \
.global fn; \
.hidden fn; \
ELFFN(fn); \
fn:
# define FUNC_END(fn) .size fn,.-fn
# ifdef __AVX__
# define MOVAPS vmovaps
# else
# define MOVAPS movaps
# endif
/* Save SSE registers 6-15. off is the offset from the stack pointer
where xmm6 is stored. */
.macro SSE_SAVE off=0
MOVAPS %xmm15,(\off - 0x90)(%rax)
MOVAPS %xmm14,(\off - 0x80)(%rax)
MOVAPS %xmm13,(\off - 0x70)(%rax)
MOVAPS %xmm12,(\off - 0x60)(%rax)
MOVAPS %xmm11,(\off - 0x50)(%rax)
MOVAPS %xmm10,(\off - 0x40)(%rax)
MOVAPS %xmm9, (\off - 0x30)(%rax)
MOVAPS %xmm8, (\off - 0x20)(%rax)
MOVAPS %xmm7, (\off - 0x10)(%rax)
MOVAPS %xmm6, \off(%rax)
.endm
/* Restore SSE registers 6-15. */
.macro SSE_RESTORE off=0
MOVAPS (\off - 0x90)(%rsi), %xmm15
MOVAPS (\off - 0x80)(%rsi), %xmm14
MOVAPS (\off - 0x70)(%rsi), %xmm13
MOVAPS (\off - 0x60)(%rsi), %xmm12
MOVAPS (\off - 0x50)(%rsi), %xmm11
MOVAPS (\off - 0x40)(%rsi), %xmm10
MOVAPS (\off - 0x30)(%rsi), %xmm9
MOVAPS (\off - 0x20)(%rsi), %xmm8
MOVAPS (\off - 0x10)(%rsi), %xmm7
MOVAPS \off(%rsi), %xmm6
.endm
.text
/*
* to call:
* lea -xxx(%rsp), %rax # xxx is 0x70 or 0x78 (depending
upon incoming stack alignment offset)
* subq $xxx, %rsp # xxx is however much stack space
the fn needs
* callq __msabi_save_<nregs>
*/
HIDDEN_FUNC(__msabi_save_18)
mov %r15,-0x70(%rax)
HIDDEN_FUNC(__msabi_save_17)
mov %r14,-0x68(%rax)
HIDDEN_FUNC(__msabi_save_16)
mov %r13,-0x60(%rax)
HIDDEN_FUNC(__msabi_save_15)
mov %r12,-0x58(%rax)
HIDDEN_FUNC(__msabi_save_14)
mov %rbp,-0x50(%rax)
HIDDEN_FUNC(__msabi_save_13)
mov %rbx,-0x48(%rax)
HIDDEN_FUNC(__msabi_save_12)
mov %rdi,-0x40(%rax)
mov %rsi,-0x38(%rax)
SSE_SAVE off=0x60
ret
FUNC_END(__msabi_save_12)
FUNC_END(__msabi_save_13)
FUNC_END(__msabi_save_14)
FUNC_END(__msabi_save_15)
FUNC_END(__msabi_save_16)
FUNC_END(__msabi_save_17)
FUNC_END(__msabi_save_18)
/*
* to call:
* lea xxx(%rsp), %rsi # xxx = SP adjustment to point to
-0x70 offset for data
* lea xxx(%rsp), r10 # xxx = SP adjustment to restore
stack
* jmp __msabi_restore_ret_<nregs>
*/
HIDDEN_FUNC(__msabi_restore_ret_18)
mov -0x70(%rsi),%r15
HIDDEN_FUNC(__msabi_restore_ret_17)
mov -0x68(%rsi),%r14
HIDDEN_FUNC(__msabi_restore_ret_16)
mov -0x60(%rsi),%r13
HIDDEN_FUNC(__msabi_restore_ret_15)
mov -0x58(%rsi),%r12
HIDDEN_FUNC(__msabi_restore_ret_14)
mov -0x50(%rsi),%rbp
HIDDEN_FUNC(__msabi_restore_ret_13)
mov -0x48(%rsi),%rbx
HIDDEN_FUNC(__msabi_restore_ret_12)
mov -0x40(%rsi),%rdi
SSE_RESTORE off=0x60
mov -0x38(%rsi),%rsi
mov %r10,%rsp
ret
FUNC_END(__msabi_restore_ret_12)
FUNC_END(__msabi_restore_ret_13)
FUNC_END(__msabi_restore_ret_14)
FUNC_END(__msabi_restore_ret_15)
FUNC_END(__msabi_restore_ret_16)
FUNC_END(__msabi_restore_ret_17)
FUNC_END(__msabi_restore_ret_18)
/*
* to call:
* lea xxx(%rsp), %rsi # xxx = SP adjustment to point to
-0x70 offset for data
* callq __msabi_restore_<nregs>
* subq $xxx,%rsp # xxx = SP adjustment to restore stack
*/
HIDDEN_FUNC(__msabi_restore_18)
mov -0x70(%rsi),%r15
HIDDEN_FUNC(__msabi_restore_17)
mov -0x68(%rsi),%r14
HIDDEN_FUNC(__msabi_restore_16)
mov -0x60(%rsi),%r13
HIDDEN_FUNC(__msabi_restore_15)
mov -0x58(%rsi),%r12
HIDDEN_FUNC(__msabi_restore_14)
mov -0x50(%rsi),%rbp
HIDDEN_FUNC(__msabi_restore_13)
mov -0x48(%rsi),%rbx
HIDDEN_FUNC(__msabi_restore_12)
mov -0x40(%rsi),%rdi
SSE_RESTORE off=0x60
mov -0x38(%rsi),%rsi
ret
FUNC_END(__msabi_restore_12)
FUNC_END(__msabi_restore_13)
FUNC_END(__msabi_restore_14)
FUNC_END(__msabi_restore_15)
FUNC_END(__msabi_restore_16)
FUNC_END(__msabi_restore_17)
FUNC_END(__msabi_restore_18)
#endif /* __x86_64__ */
Thanks!
Daniel