Create an IRET-compatible top of stack at syscall entry and use this information to return to user mode in the sysret path. This removes the need for the FIXUP_TOP_OF_STACK and RESTORE_TOP_OF_STACK macros.
Signed-off-by: Alexander van Heukelum <heuke...@fastmail.fm> --- arch/x86/kernel/entry_64.S | 75 +++++++++++++--------------------------------- 1 file changed, 21 insertions(+), 54 deletions(-) diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 6b95c2f..c4cb8f1 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -33,8 +33,6 @@ * - SAVE_REST/RESTORE_REST - Handle the registers not saved by SAVE_ARGS. * Gives a full stack frame. * - ENTRY/END Define functions in the symbol table. - * - FIXUP_TOP_OF_STACK/RESTORE_TOP_OF_STACK - Fix up the hardware stack - * frame that is otherwise undefined after a SYSCALL * - TRACE_IRQ_* - Trace hard interrupt state for lock debugging. * - idtentry - Define exception entry points. */ @@ -130,33 +128,6 @@ ENDPROC(native_usergs_sysret64) #endif /* - * C code is not supposed to know about undefined top of stack. Every time - * a C function with an pt_regs argument is called from the SYSCALL based - * fast path FIXUP_TOP_OF_STACK is needed. - * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs - * manipulation. - */ - - /* %rsp:at FRAMEEND */ - .macro FIXUP_TOP_OF_STACK tmp offset=0 - movq PER_CPU_VAR(old_rsp),\tmp - movq \tmp,RSP+\offset(%rsp) - movq $__USER_DS,SS+\offset(%rsp) - movq $__USER_CS,CS+\offset(%rsp) - movq RIP+\offset(%rsp),\tmp /* get rip */ - movq \tmp,RCX+\offset(%rsp) /* copy it to rcx as sysret would do */ - movq R11+\offset(%rsp),\tmp /* get eflags */ - movq \tmp,EFLAGS+\offset(%rsp) - .endm - - .macro RESTORE_TOP_OF_STACK tmp offset=0 - movq RSP+\offset(%rsp),\tmp - movq \tmp,PER_CPU_VAR(old_rsp) - movq EFLAGS+\offset(%rsp),\tmp - movq \tmp,R11+\offset(%rsp) - .endm - -/* * initial frame state for interrupts (and exceptions without error code) */ .macro EMPTY_FRAME start=1 offset=0 @@ -272,7 +243,6 @@ ENTRY(ret_from_fork) testl $_TIF_IA32, TI_flags(%rcx) # 32-bit compat task needs IRET jnz int_ret_from_sys_call - RESTORE_TOP_OF_STACK %rdi, -ARGOFFSET jmp ret_from_sys_call # go to the SYSRET fastpath 1: @@ -339,10 +309,24 @@ GLOBAL(system_call_after_swapgs) * and short: */ ENABLE_INTERRUPTS(CLBR_NONE) - SAVE_ARGS 6*8, 0, rax_enosys=1 /* skip: hardware stackframe and orig_rax */ + /* + * Save user mode rsp (temporarily saved above in old_rsp), + * rflags (%r11), rip (%rcx) and segments (fixed values) on + * the stack as a regular interrupt frame. + */ + pushq_cfi $__USER_DS + /* CFI_REL_OFFSET ss, 0 */ + pushq_cfi PER_CPU_VAR(old_rsp) + CFI_REL_OFFSET rsp, 0 + pushq_cfi %r11 /* %r11 clobbered (userspace %rflags) */ + /* CFI_REL_OFFSET rflags, 0 */ + pushq_cfi $__USER_CS + /* CFI_REL_OFFSET cs, 0 */ + pushq_cfi %rcx /* %rcx clobbered (userspace %rip) */ + CFI_REL_OFFSET rip, 0 + + SAVE_ARGS 8, rax_enosys=1 movq_cfi rax,(ORIG_RAX-ARGOFFSET) - movq %rcx,RIP-ARGOFFSET(%rsp) - CFI_REL_OFFSET rip,RIP-ARGOFFSET testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,ARGOFFSET) jnz tracesys system_call_fastpath: @@ -362,7 +346,7 @@ system_call_fastpath: */ ret_from_sys_call: testl $_TIF_ALLWORK_MASK,TI_flags+THREAD_INFO(%rsp,ARGOFFSET) - jnz int_ret_from_sys_call_fixup /* Go the the slow path */ + jnz int_ret_from_sys_call /* Go the the slow path */ LOCKDEP_SYS_EXIT DISABLE_INTERRUPTS(CLBR_NONE) @@ -372,19 +356,16 @@ ret_from_sys_call: * sysretq will re-enable interrupts: */ TRACE_IRQS_ON + RESTORE_ARGS addskip=-ARG_SKIP, rstor_rcx=0, rstor_r11=0 movq RIP-ARGOFFSET(%rsp),%rcx CFI_REGISTER rip,rcx - RESTORE_ARGS 1,-ARG_SKIP,0 + mov EFLAGS-ARGOFFSET(%rsp), %r11 /*CFI_REGISTER rflags,r11*/ - movq PER_CPU_VAR(old_rsp), %rsp + mov RSP-ARGOFFSET(%rsp), %rsp USERGS_SYSRET64 CFI_RESTORE_STATE -int_ret_from_sys_call_fixup: - FIXUP_TOP_OF_STACK %r11, -ARGOFFSET - jmp int_ret_from_sys_call - /* Do syscall tracing */ tracesys: leaq -REST_SKIP(%rsp), %rdi @@ -397,7 +378,6 @@ tracesys: tracesys_phase2: SAVE_REST - FIXUP_TOP_OF_STACK %rdi movq %rsp, %rdi movq $AUDIT_ARCH_X86_64, %rsi movq %rax,%rdx @@ -493,10 +473,8 @@ ENTRY(stub_\func) PARTIAL_FRAME 0 SAVE_REST pushq %r11 /* put it back on stack */ - FIXUP_TOP_OF_STACK %r11, 8 DEFAULT_FRAME 0 8 /* offset 8: return address */ call sys_\func - RESTORE_TOP_OF_STACK %r11, 8 ret $REST_SKIP /* pop extended registers */ CFI_ENDPROC END(stub_\func) @@ -506,9 +484,7 @@ END(stub_\func) ENTRY(\label) CFI_STARTPROC PARTIAL_FRAME 0 8 /* offset 8: return address */ - FIXUP_TOP_OF_STACK %r11, 8-ARGOFFSET call \func - RESTORE_TOP_OF_STACK %r11, 8-ARGOFFSET ret CFI_ENDPROC END(\label) @@ -524,7 +500,6 @@ ENTRY(stub_execve) addq $8, %rsp PARTIAL_FRAME 0 SAVE_REST - FIXUP_TOP_OF_STACK %r11 call sys_execve movq %rax,RAX(%rsp) RESTORE_REST @@ -537,9 +512,7 @@ ENTRY(stub_execveat) addq $8, %rsp PARTIAL_FRAME 0 SAVE_REST - FIXUP_TOP_OF_STACK %r11 call sys_execveat - RESTORE_TOP_OF_STACK %r11 movq %rax,RAX(%rsp) RESTORE_REST jmp int_ret_from_sys_call @@ -555,7 +528,6 @@ ENTRY(stub_rt_sigreturn) addq $8, %rsp PARTIAL_FRAME 0 SAVE_REST - FIXUP_TOP_OF_STACK %r11 call sys_rt_sigreturn movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer RESTORE_REST @@ -569,7 +541,6 @@ ENTRY(stub_x32_rt_sigreturn) addq $8, %rsp PARTIAL_FRAME 0 SAVE_REST - FIXUP_TOP_OF_STACK %r11 call sys32_x32_rt_sigreturn movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer RESTORE_REST @@ -582,9 +553,7 @@ ENTRY(stub_x32_execve) addq $8, %rsp PARTIAL_FRAME 0 SAVE_REST - FIXUP_TOP_OF_STACK %r11 call compat_sys_execve - RESTORE_TOP_OF_STACK %r11 movq %rax,RAX(%rsp) RESTORE_REST jmp int_ret_from_sys_call @@ -596,9 +565,7 @@ ENTRY(stub_x32_execveat) addq $8, %rsp PARTIAL_FRAME 0 SAVE_REST - FIXUP_TOP_OF_STACK %r11 call compat_sys_execveat - RESTORE_TOP_OF_STACK %r11 movq %rax,RAX(%rsp) RESTORE_REST jmp int_ret_from_sys_call -- 2.1.0 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/