Set IBRS upon kernel entrance via syscall and interrupts. Clear it upon exit.
If NMI runs when exiting kernel between IBRS_DISABLE and SWAPGS, the NMI would have turned on IBRS bit 0 and then it would have left enabled when exiting the NMI. IBRS bit 0 would then be left enabled in userland until the next enter kernel. That is a minor inefficiency only, but we can eliminate it by saving the MSR when entering the NMI in save_paranoid and restoring it when exiting the NMI. Signed-off-by: Andrea Arcangeli <aarca...@redhat.com> Signed-off-by: Tim Chen <tim.c.c...@linux.intel.com> --- arch/x86/entry/entry_64.S | 24 ++++++++++++++++++++++++ arch/x86/entry/entry_64_compat.S | 9 +++++++++ 2 files changed, 33 insertions(+) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 3f72f5c..0c4d542 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -37,6 +37,7 @@ #include <asm/pgtable_types.h> #include <asm/export.h> #include <asm/frame.h> +#include <asm/spec_ctrl.h> #include <linux/err.h> #include "calling.h" @@ -170,6 +171,8 @@ ENTRY(entry_SYSCALL_64_trampoline) /* Load the top of the task stack into RSP */ movq CPU_ENTRY_AREA_tss + TSS_sp1 + CPU_ENTRY_AREA, %rsp + /* Stack is usable, use the non-clobbering IBRS enable: */ + ENABLE_IBRS /* Start building the simulated IRET frame. */ pushq $__USER_DS /* pt_regs->ss */ @@ -213,6 +216,8 @@ ENTRY(entry_SYSCALL_64) * is not required to switch CR3. */ movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp + /* Stack is usable, use the non-clobbering IBRS enable: */ + ENABLE_IBRS TRACE_IRQS_OFF @@ -407,6 +412,7 @@ syscall_return_via_sysret: * We are on the trampoline stack. All regs except RDI are live. * We can do future final exit work right here. */ + DISABLE_IBRS SWITCH_TO_USER_CR3_STACK scratch_reg=%rdi popq %rdi @@ -745,6 +751,7 @@ GLOBAL(swapgs_restore_regs_and_return_to_usermode) * We can do future final exit work right here. */ + DISABLE_IBRS SWITCH_TO_USER_CR3_STACK scratch_reg=%rdi /* Restore RDI. */ @@ -832,6 +839,14 @@ native_irq_return_ldt: SWAPGS /* to kernel GS */ SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi /* to kernel CR3 */ + /* + * Normally we enable IBRS when we switch to kernel's CR3. + * But we are going to switch back to user CR3 immediately + * in this routine after fixing ESPFIX stack. There is + * no vulnerable code branching for IBRS to protect. + * We don't toggle IBRS to avoid the cost of two MSR writes. + */ + movq PER_CPU_VAR(espfix_waddr), %rdi movq %rax, (0*8)(%rdi) /* user RAX */ movq (1*8)(%rsp), %rax /* user RIP */ @@ -965,6 +980,8 @@ ENTRY(switch_to_thread_stack) SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi movq %rsp, %rdi movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp + /* Stack is usable, use the non-clobbering IBRS enable: */ + ENABLE_IBRS UNWIND_HINT sp_offset=16 sp_reg=ORC_REG_DI pushq 7*8(%rdi) /* regs->ss */ @@ -1265,6 +1282,7 @@ ENTRY(paranoid_entry) 1: SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg=%rax save_reg=%r14 + ENABLE_IBRS_SAVE_AND_CLOBBER save_reg=%r13d ret END(paranoid_entry) @@ -1288,6 +1306,7 @@ ENTRY(paranoid_exit) testl %ebx, %ebx /* swapgs needed? */ jnz .Lparanoid_exit_no_swapgs TRACE_IRQS_IRETQ + RESTORE_IBRS_CLOBBER save_reg=%r13d RESTORE_CR3 scratch_reg=%rbx save_reg=%r14 SWAPGS_UNSAFE_STACK jmp .Lparanoid_exit_restore @@ -1318,6 +1337,7 @@ ENTRY(error_entry) SWAPGS /* We have user CR3. Change to kernel CR3. */ SWITCH_TO_KERNEL_CR3 scratch_reg=%rax + ENABLE_IBRS_CLOBBER .Lerror_entry_from_usermode_after_swapgs: /* Put us onto the real thread stack. */ @@ -1365,6 +1385,7 @@ ENTRY(error_entry) */ SWAPGS SWITCH_TO_KERNEL_CR3 scratch_reg=%rax + ENABLE_IBRS_CLOBBER jmp .Lerror_entry_done .Lbstep_iret: @@ -1379,6 +1400,7 @@ ENTRY(error_entry) */ SWAPGS SWITCH_TO_KERNEL_CR3 scratch_reg=%rax + ENABLE_IBRS /* * Pretend that the exception came from user mode: set up pt_regs @@ -1480,6 +1502,7 @@ ENTRY(nmi) SWITCH_TO_KERNEL_CR3 scratch_reg=%rdx movq %rsp, %rdx movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp + ENABLE_IBRS UNWIND_HINT_IRET_REGS base=%rdx offset=8 pushq 5*8(%rdx) /* pt_regs->ss */ pushq 4*8(%rdx) /* pt_regs->rsp */ @@ -1730,6 +1753,7 @@ end_repeat_nmi: movq $-1, %rsi call do_nmi + RESTORE_IBRS_CLOBBER save_reg=%r13d RESTORE_CR3 scratch_reg=%r15 save_reg=%r14 testl %ebx, %ebx /* swapgs needed? */ diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index 40f1700..88ee1c0 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -14,6 +14,7 @@ #include <asm/irqflags.h> #include <asm/asm.h> #include <asm/smap.h> +#include <asm/spec_ctrl.h> #include <linux/linkage.h> #include <linux/err.h> @@ -54,6 +55,7 @@ ENTRY(entry_SYSENTER_compat) SWITCH_TO_KERNEL_CR3 scratch_reg=%rsp movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp + ENABLE_IBRS /* * User tracing code (ptrace or signal handlers) might assume that @@ -224,6 +226,7 @@ GLOBAL(entry_SYSCALL_compat_after_hwframe) * preserved during the C calls inside TRACE_IRQS_OFF anyway. */ SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi + ENABLE_IBRS_CLOBBER /* clobbers %rax, %rcx, %rdx */ /* * User mode is traced as though IRQs are on, and SYSENTER @@ -240,6 +243,12 @@ GLOBAL(entry_SYSCALL_compat_after_hwframe) /* Opportunistic SYSRET */ sysret32_from_system_call: TRACE_IRQS_ON /* User mode traces as IRQs on. */ + /* + * Clobber of %rax, %rcx, %rdx is OK before register restoring. + * This is safe to do here because we have no indirect branches + * between here and the return to userspace (sysretl). + */ + DISABLE_IBRS_CLOBBER movq RBX(%rsp), %rbx /* pt_regs->rbx */ movq RBP(%rsp), %rbp /* pt_regs->rbp */ movq EFLAGS(%rsp), %r11 /* pt_regs->flags (in r11) */ -- 2.9.4