When entering a syscall we're still using the user stack, so we can't reliably handle exceptions or interrupts, otherwise a user thread can easily crash the machine with an invalid stack. Instead, disable interrupts and (hopefullly) avoid traps in the fragments where we need to have the user stack in RSP.
* i386/i386/ldt.c: mask interrupts and IOPL on syscall entry * x86_64/locore.S: keep interrupts disabled when we use the user stack --- i386/i386/ldt.c | 3 ++- x86_64/locore.S | 4 +++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/i386/i386/ldt.c b/i386/i386/ldt.c index 4d7ec19a..5db36426 100644 --- a/i386/i386/ldt.c +++ b/i386/i386/ldt.c @@ -27,6 +27,7 @@ * "Local" descriptor table. At the moment, all tasks use the * same LDT. */ +#include <mach/machine/eflags.h> #include <mach/machine/vm_types.h> #include <mach/xen.h> @@ -75,7 +76,7 @@ ldt_fill(struct real_descriptor *myldt, struct real_descriptor *mygdt) wrmsr(MSR_REG_EFER, rdmsr(MSR_REG_EFER) | MSR_EFER_SCE); wrmsr(MSR_REG_LSTAR, (vm_offset_t)syscall64); wrmsr(MSR_REG_STAR, ((((long)USER_CS - 16) << 16) | (long)KERNEL_CS) << 32); - wrmsr(MSR_REG_FMASK, 0); // ? + wrmsr(MSR_REG_FMASK, EFL_IF | EFL_IOPL_USER); #else /* defined(__x86_64__) && ! defined(USER32) */ fill_ldt_gate(myldt, USER_SCALL, (vm_offset_t)&syscall, KERNEL_CS, diff --git a/x86_64/locore.S b/x86_64/locore.S index a6697fb9..16b0dde5 100644 --- a/x86_64/locore.S +++ b/x86_64/locore.S @@ -1405,9 +1405,10 @@ ENTRY(syscall64) mov %r11,%rbx /* prepare for error handling */ mov %r10,%rcx /* fix arg3 location according to C ABI */ - /* switch to kernel stack */ + /* switch to kernel stack, then we can enable interrupts */ CPU_NUMBER(%r11) movq CX(EXT(kernel_stack),%r11),%rsp + sti /* Now we have saved state and args 1-6 are in place. * Before invoking the syscall we do some bound checking and, @@ -1468,6 +1469,7 @@ _syscall64_check_for_ast: _syscall64_restore_state: /* Restore thread state and return to user using sysret. */ + cli /* block interrupts when using the user stack in kernel space */ movq CX(EXT(active_threads),%r11),%r11 /* point to current thread */ movq TH_PCB(%r11),%r11 /* point to pcb */ addq $ PCB_ISS,%r11 /* point to saved state */ -- 2.39.2