Author: gordon Date: Tue May 8 17:12:10 2018 New Revision: 333371 URL: https://svnweb.freebsd.org/changeset/base/333371
Log: Fix mishandling of x86 debug exceptions. [SA-18:06.debugreg] Bump newvers.sh and UPDATING for today's patches. Approved by: so Security: CVE-2018-8897 Security: FreeBSD-SA-18:06.debugreg Sponsored by: The FreeBSD Foundation Modified: releng/10.4/UPDATING releng/10.4/sys/amd64/amd64/exception.S releng/10.4/sys/amd64/amd64/machdep.c releng/10.4/sys/amd64/amd64/mp_machdep.c releng/10.4/sys/amd64/amd64/trap.c releng/10.4/sys/conf/newvers.sh releng/10.4/sys/i386/i386/trap.c releng/11.1/UPDATING releng/11.1/sys/amd64/amd64/exception.S releng/11.1/sys/amd64/amd64/machdep.c releng/11.1/sys/amd64/amd64/mp_machdep.c releng/11.1/sys/amd64/amd64/pmap.c releng/11.1/sys/amd64/amd64/trap.c releng/11.1/sys/conf/newvers.sh releng/11.1/sys/i386/i386/trap.c Modified: releng/10.4/UPDATING ============================================================================== --- releng/10.4/UPDATING Tue May 8 17:05:39 2018 (r333370) +++ releng/10.4/UPDATING Tue May 8 17:12:10 2018 (r333371) @@ -16,6 +16,16 @@ from older versions of FreeBSD, try WITHOUT_CLANG to b stable/10, and then rebuild without this option. The bootstrap process from older version of current is a bit fragile. +20180508 p9 FreeBSD-SA-18:06.debugreg + FreeBSD-EN-18:05.mem + FreeBSD-EN-18:06.tzdata + + Fix mishandling of x86 debug exceptions. [SA-18:06.debugreg] + + Fix multiple small kernel memory disclosures. [EN-18:05.mem] + + Update timezone database information. [EN-18:06.tzdata] + 20180404 p8 FreeBSD-SA-18:04.vt FreeBSD-SA-18:05.ipsec FreeBSD-EN-18:03.tzdata Modified: releng/10.4/sys/amd64/amd64/exception.S ============================================================================== --- releng/10.4/sys/amd64/amd64/exception.S Tue May 8 17:05:39 2018 (r333370) +++ releng/10.4/sys/amd64/amd64/exception.S Tue May 8 17:12:10 2018 (r333371) @@ -108,8 +108,6 @@ MCOUNT_LABEL(btrap) movq $0,TF_ADDR(%rsp) ; \ movq $0,TF_ERR(%rsp) ; \ jmp alltraps_noen -IDTVEC(dbg) - TRAP_NOEN(T_TRCTRAP) IDTVEC(bpt) TRAP_NOEN(T_BPTFLT) #ifdef KDTRACE_HOOKS @@ -434,6 +432,101 @@ IDTVEC(fast_syscall) */ IDTVEC(fast_syscall32) sysret + +/* + * DB# handler is very similar to NM#, because 'mov/pop %ss' delay + * generation of exception until the next instruction is executed, + * which might be a kernel entry. So we must execute the handler + * on IST stack and be ready for non-kernel GSBASE. + */ +IDTVEC(dbg) + subq $TF_RIP,%rsp + movl $(T_TRCTRAP),TF_TRAPNO(%rsp) + movq $0,TF_ADDR(%rsp) + movq $0,TF_ERR(%rsp) + movq %rdi,TF_RDI(%rsp) + movq %rsi,TF_RSI(%rsp) + movq %rdx,TF_RDX(%rsp) + movq %rcx,TF_RCX(%rsp) + movq %r8,TF_R8(%rsp) + movq %r9,TF_R9(%rsp) + movq %rax,TF_RAX(%rsp) + movq %rbx,TF_RBX(%rsp) + movq %rbp,TF_RBP(%rsp) + movq %r10,TF_R10(%rsp) + movq %r11,TF_R11(%rsp) + movq %r12,TF_R12(%rsp) + movq %r13,TF_R13(%rsp) + movq %r14,TF_R14(%rsp) + movq %r15,TF_R15(%rsp) + movw %fs,TF_FS(%rsp) + movw %gs,TF_GS(%rsp) + movw %es,TF_ES(%rsp) + movw %ds,TF_DS(%rsp) + movl $TF_HASSEGS,TF_FLAGS(%rsp) + cld + testb $SEL_RPL_MASK,TF_CS(%rsp) + jnz dbg_fromuserspace + /* + * We've interrupted the kernel. Preserve GS.base in %r12. + */ + movl $MSR_GSBASE,%ecx + rdmsr + movq %rax,%r12 + shlq $32,%rdx + orq %rdx,%r12 + /* Retrieve and load the canonical value for GS.base. */ + movq TF_SIZE(%rsp),%rdx + movl %edx,%eax + shrq $32,%rdx + wrmsr + FAKE_MCOUNT(TF_RIP(%rsp)) + movq %rsp,%rdi + call trap + MEXITCOUNT + /* + * Put back the preserved MSR_GSBASE value. + */ + movl $MSR_GSBASE,%ecx + movq %r12,%rdx + movl %edx,%eax + shrq $32,%rdx + wrmsr + movq TF_RDI(%rsp),%rdi + movq TF_RSI(%rsp),%rsi + movq TF_RDX(%rsp),%rdx + movq TF_RCX(%rsp),%rcx + movq TF_R8(%rsp),%r8 + movq TF_R9(%rsp),%r9 + movq TF_RAX(%rsp),%rax + movq TF_RBX(%rsp),%rbx + movq TF_RBP(%rsp),%rbp + movq TF_R10(%rsp),%r10 + movq TF_R11(%rsp),%r11 + movq TF_R12(%rsp),%r12 + movq TF_R13(%rsp),%r13 + movq TF_R14(%rsp),%r14 + movq TF_R15(%rsp),%r15 + addq $TF_RIP,%rsp + jmp doreti_iret +dbg_fromuserspace: + /* + * Switch to kernel GSBASE and kernel page table, and copy frame + * from the IST stack to the normal kernel stack, since trap() + * re-enables interrupts, and since we might trap on DB# while + * in trap(). + */ + swapgs + movq PCPU(RSP0),%rax + movl $TF_SIZE,%ecx + subq %rcx,%rax + movq %rax,%rdi + movq %rsp,%rsi + rep;movsb + movq %rax,%rsp + movq PCPU(CURPCB),%rdi + orl $PCB_FULL_IRET,PCB_FLAGS(%rdi) + jmp calltrap /* * NMI handling is special. Modified: releng/10.4/sys/amd64/amd64/machdep.c ============================================================================== --- releng/10.4/sys/amd64/amd64/machdep.c Tue May 8 17:05:39 2018 (r333370) +++ releng/10.4/sys/amd64/amd64/machdep.c Tue May 8 17:12:10 2018 (r333371) @@ -1023,6 +1023,7 @@ struct gate_descriptor *idt = &idt0[0]; /* interrupt d static char dblfault_stack[PAGE_SIZE] __aligned(16); static char nmi0_stack[PAGE_SIZE] __aligned(16); +static char dbg0_stack[PAGE_SIZE] __aligned(16); CTASSERT(sizeof(struct nmi_pcpu) == 16); struct amd64tss common_tss[MAXCPU]; @@ -1908,7 +1909,7 @@ hammer_time(u_int64_t modulep, u_int64_t physfree) for (x = 0; x < NIDT; x++) setidt(x, &IDTVEC(rsvd), SDT_SYSIGT, SEL_KPL, 0); setidt(IDT_DE, &IDTVEC(div), SDT_SYSIGT, SEL_KPL, 0); - setidt(IDT_DB, &IDTVEC(dbg), SDT_SYSIGT, SEL_KPL, 0); + setidt(IDT_DB, &IDTVEC(dbg), SDT_SYSIGT, SEL_KPL, 4); setidt(IDT_NMI, &IDTVEC(nmi), SDT_SYSIGT, SEL_KPL, 2); setidt(IDT_BP, &IDTVEC(bpt), SDT_SYSIGT, SEL_UPL, 0); setidt(IDT_OF, &IDTVEC(ofl), SDT_SYSIGT, SEL_KPL, 0); @@ -1965,6 +1966,13 @@ hammer_time(u_int64_t modulep, u_int64_t physfree) np = ((struct nmi_pcpu *) &nmi0_stack[sizeof(nmi0_stack)]) - 1; np->np_pcpu = (register_t) pc; common_tss[0].tss_ist2 = (long) np; + + /* + * DB# stack, runs on ist4. + */ + np = ((struct nmi_pcpu *) &dbg0_stack[sizeof(dbg0_stack)]) - 1; + np->np_pcpu = (register_t) pc; + common_tss[0].tss_ist4 = (long) np; /* Set the IO permission bitmap (empty due to tss seg limit) */ common_tss[0].tss_iobase = sizeof(struct amd64tss) + Modified: releng/10.4/sys/amd64/amd64/mp_machdep.c ============================================================================== --- releng/10.4/sys/amd64/amd64/mp_machdep.c Tue May 8 17:05:39 2018 (r333370) +++ releng/10.4/sys/amd64/amd64/mp_machdep.c Tue May 8 17:12:10 2018 (r333371) @@ -98,6 +98,7 @@ void *bootstacks[MAXCPU]; /* Temporary variables for init_secondary() */ char *doublefault_stack; char *nmi_stack; +char *dbg_stack; void *dpcpu; struct pcb stoppcbs[MAXCPU]; @@ -644,6 +645,10 @@ init_secondary(void) np = ((struct nmi_pcpu *) &nmi_stack[PAGE_SIZE]) - 1; common_tss[cpu].tss_ist2 = (long) np; + /* The DB# stack runs on IST4. */ + np = ((struct nmi_pcpu *) &dbg_stack[PAGE_SIZE]) - 1; + common_tss[cpu].tss_ist4 = (long) np; + /* Prepare private GDT */ gdt_segs[GPROC0_SEL].ssd_base = (long) &common_tss[cpu]; for (x = 0; x < NGDT; x++) { @@ -679,6 +684,10 @@ init_secondary(void) /* Save the per-cpu pointer for use by the NMI handler. */ np->np_pcpu = (register_t) pc; + /* Save the per-cpu pointer for use by the DB# handler. */ + np = ((struct nmi_pcpu *) &dbg_stack[PAGE_SIZE]) - 1; + np->np_pcpu = (register_t) pc; + wrmsr(MSR_FSBASE, 0); /* User value */ wrmsr(MSR_GSBASE, (u_int64_t)pc); wrmsr(MSR_KGSBASE, (u_int64_t)pc); /* XXX User value while we're in the kernel */ @@ -966,6 +975,8 @@ start_all_aps(void) doublefault_stack = (char *)kmem_malloc(kernel_arena, PAGE_SIZE, M_WAITOK | M_ZERO); nmi_stack = (char *)kmem_malloc(kernel_arena, PAGE_SIZE, + M_WAITOK | M_ZERO); + dbg_stack = (char *)kmem_malloc(kernel_arena, PAGE_SIZE, M_WAITOK | M_ZERO); dpcpu = (void *)kmem_malloc(kernel_arena, DPCPU_SIZE, M_WAITOK | M_ZERO); Modified: releng/10.4/sys/amd64/amd64/trap.c ============================================================================== --- releng/10.4/sys/amd64/amd64/trap.c Tue May 8 17:05:39 2018 (r333370) +++ releng/10.4/sys/amd64/amd64/trap.c Tue May 8 17:12:10 2018 (r333371) @@ -45,6 +45,7 @@ __FBSDID("$FreeBSD$"); */ #include "opt_clock.h" +#include "opt_compat.h" #include "opt_cpu.h" #include "opt_hwpmc_hooks.h" #include "opt_isa.h" @@ -98,6 +99,9 @@ PMC_SOFT_DEFINE( , , page_fault, write); #include <sys/dtrace_bsd.h> #endif +extern inthand_t IDTVEC(bpt), IDTVEC(dbg), IDTVEC(fast_syscall), + IDTVEC(fast_syscall32), IDTVEC(int0x80_syscall); + extern void trap(struct trapframe *frame); extern void syscall(struct trapframe *frame); void dblfault_handler(struct trapframe *frame); @@ -549,6 +553,39 @@ trap(struct trapframe *frame) load_dr6(rdr6() & 0xfffffff0); goto out; } + + /* + * Malicious user code can configure a debug + * register watchpoint to trap on data access + * to the top of stack and then execute 'pop + * %ss; int 3'. Due to exception deferral for + * 'pop %ss', the CPU will not interrupt 'int + * 3' to raise the DB# exception for the debug + * register but will postpone the DB# until + * execution of the first instruction of the + * BP# handler (in kernel mode). Normally the + * previous check would ignore DB# exceptions + * for watchpoints on user addresses raised in + * kernel mode. However, some CPU errata + * include cases where DB# exceptions do not + * properly set bits in %dr6, e.g. Haswell + * HSD23 and Skylake-X SKZ24. + * + * A deferred DB# can also be raised on the + * first instructions of system call entry + * points or single-step traps via similar use + * of 'pop %ss' or 'mov xxx, %ss'. + */ + if (frame->tf_rip == (uintptr_t)IDTVEC(fast_syscall) || +#ifdef COMPAT_FREEBSD32 + frame->tf_rip == + (uintptr_t)IDTVEC(int0x80_syscall) || +#endif + frame->tf_rip == (uintptr_t)IDTVEC(bpt) || + frame->tf_rip == (uintptr_t)IDTVEC(dbg) || + /* Needed for AMD. */ + frame->tf_rip == (uintptr_t)IDTVEC(fast_syscall32)) + return; /* * FALLTHROUGH (TRCTRAP kernel mode, kernel address) */ Modified: releng/10.4/sys/conf/newvers.sh ============================================================================== --- releng/10.4/sys/conf/newvers.sh Tue May 8 17:05:39 2018 (r333370) +++ releng/10.4/sys/conf/newvers.sh Tue May 8 17:12:10 2018 (r333371) @@ -32,7 +32,7 @@ TYPE="FreeBSD" REVISION="10.4" -BRANCH="RELEASE-p8" +BRANCH="RELEASE-p9" if [ "X${BRANCH_OVERRIDE}" != "X" ]; then BRANCH=${BRANCH_OVERRIDE} fi Modified: releng/10.4/sys/i386/i386/trap.c ============================================================================== --- releng/10.4/sys/i386/i386/trap.c Tue May 8 17:05:39 2018 (r333370) +++ releng/10.4/sys/i386/i386/trap.c Tue May 8 17:12:10 2018 (r333371) @@ -116,6 +116,8 @@ void dblfault_handler(void); extern inthand_t IDTVEC(lcall_syscall); +extern inthand_t IDTVEC(bpt), IDTVEC(dbg), IDTVEC(int0x80_syscall); + #define MAX_TRAP_MSG 32 static char *trap_msg[] = { "", /* 0 unused */ @@ -683,6 +685,34 @@ trap(struct trapframe *frame) load_dr6(rdr6() & 0xfffffff0); goto out; } + + /* + * Malicious user code can configure a debug + * register watchpoint to trap on data access + * to the top of stack and then execute 'pop + * %ss; int 3'. Due to exception deferral for + * 'pop %ss', the CPU will not interrupt 'int + * 3' to raise the DB# exception for the debug + * register but will postpone the DB# until + * execution of the first instruction of the + * BP# handler (in kernel mode). Normally the + * previous check would ignore DB# exceptions + * for watchpoints on user addresses raised in + * kernel mode. However, some CPU errata + * include cases where DB# exceptions do not + * properly set bits in %dr6, e.g. Haswell + * HSD23 and Skylake-X SKZ24. + * + * A deferred DB# can also be raised on the + * first instructions of system call entry + * points or single-step traps via similar use + * of 'pop %ss' or 'mov xxx, %ss'. + */ + if (frame->tf_eip == + (uintptr_t)IDTVEC(int0x80_syscall) || + frame->tf_eip == (uintptr_t)IDTVEC(bpt) || + frame->tf_eip == (uintptr_t)IDTVEC(dbg)) + return; /* * FALLTHROUGH (TRCTRAP kernel mode, kernel address) */ Modified: releng/11.1/UPDATING ============================================================================== --- releng/11.1/UPDATING Tue May 8 17:05:39 2018 (r333370) +++ releng/11.1/UPDATING Tue May 8 17:12:10 2018 (r333371) @@ -16,6 +16,16 @@ from older versions of FreeBSD, try WITHOUT_CLANG and the tip of head, and then rebuild without this option. The bootstrap process from older version of current across the gcc/clang cutover is a bit fragile. +20180508 p10 FreeBSD-SA-18:06.debugreg + FreeBSD-EN-18:05.mem + FreeBSD-EN-18:06.tzdata + + Fix mishandling of x86 debug exceptions. [SA-18:06.debugreg] + + Fix multiple small kernel memory disclosures. [EN-18:05.mem] + + Update timezone database information. [EN-18:06.tzdata] + 20180404 p9 FreeBSD-SA-18:04.vt FreeBSD-SA-18:05.ipsec FreeBSD-EN-18:03.tzdata Modified: releng/11.1/sys/amd64/amd64/exception.S ============================================================================== --- releng/11.1/sys/amd64/amd64/exception.S Tue May 8 17:05:39 2018 (r333370) +++ releng/11.1/sys/amd64/amd64/exception.S Tue May 8 17:12:10 2018 (r333371) @@ -116,7 +116,6 @@ X\l: subq $TF_RIP,%rsp jmp alltraps_noen .endm - TRAP_NOEN dbg, T_TRCTRAP TRAP_NOEN bpt, T_BPTFLT #ifdef KDTRACE_HOOKS TRAP_NOEN dtrace_ret, T_DTRACE_RET @@ -507,6 +506,121 @@ fast_syscall_common: */ IDTVEC(fast_syscall32) sysret + +/* + * DB# handler is very similar to NM#, because 'mov/pop %ss' delay + * generation of exception until the next instruction is executed, + * which might be a kernel entry. So we must execute the handler + * on IST stack and be ready for non-kernel GSBASE. + */ +IDTVEC(dbg) + subq $TF_RIP,%rsp + movl $(T_TRCTRAP),TF_TRAPNO(%rsp) + movq $0,TF_ADDR(%rsp) + movq $0,TF_ERR(%rsp) + movq %rdi,TF_RDI(%rsp) + movq %rsi,TF_RSI(%rsp) + movq %rdx,TF_RDX(%rsp) + movq %rcx,TF_RCX(%rsp) + movq %r8,TF_R8(%rsp) + movq %r9,TF_R9(%rsp) + movq %rax,TF_RAX(%rsp) + movq %rbx,TF_RBX(%rsp) + movq %rbp,TF_RBP(%rsp) + movq %r10,TF_R10(%rsp) + movq %r11,TF_R11(%rsp) + movq %r12,TF_R12(%rsp) + movq %r13,TF_R13(%rsp) + movq %r14,TF_R14(%rsp) + movq %r15,TF_R15(%rsp) + SAVE_SEGS + movl $TF_HASSEGS,TF_FLAGS(%rsp) + cld + testb $SEL_RPL_MASK,TF_CS(%rsp) + jnz dbg_fromuserspace + /* + * We've interrupted the kernel. Preserve GS.base in %r12, + * %cr3 in %r13, and possibly lower half of MSR_IA32_SPEC_CTL in %r14d. + */ + movl $MSR_GSBASE,%ecx + rdmsr + movq %rax,%r12 + shlq $32,%rdx + orq %rdx,%r12 + /* Retrieve and load the canonical value for GS.base. */ + movq TF_SIZE(%rsp),%rdx + movl %edx,%eax + shrq $32,%rdx + wrmsr + movq %cr3,%r13 + movq PCPU(KCR3),%rax + cmpq $~0,%rax + je 1f + movq %rax,%cr3 +1: testl $CPUID_STDEXT3_IBPB,cpu_stdext_feature3(%rip) + je 2f + movl $MSR_IA32_SPEC_CTRL,%ecx + rdmsr + movl %eax,%r14d + call handle_ibrs_entry +2: FAKE_MCOUNT(TF_RIP(%rsp)) + movq %rsp,%rdi + call trap + MEXITCOUNT + testl $CPUID_STDEXT3_IBPB,cpu_stdext_feature3(%rip) + je 3f + movl %r14d,%eax + xorl %edx,%edx + movl $MSR_IA32_SPEC_CTRL,%ecx + wrmsr + /* + * Put back the preserved MSR_GSBASE value. + */ +3: movl $MSR_GSBASE,%ecx + movq %r12,%rdx + movl %edx,%eax + shrq $32,%rdx + wrmsr + movq %r13,%cr3 + RESTORE_REGS + addq $TF_RIP,%rsp + jmp doreti_iret +dbg_fromuserspace: + /* + * Switch to kernel GSBASE and kernel page table, and copy frame + * from the IST stack to the normal kernel stack, since trap() + * re-enables interrupts, and since we might trap on DB# while + * in trap(). + */ + swapgs + movq PCPU(KCR3),%rax + cmpq $~0,%rax + je 1f + movq %rax,%cr3 +1: movq PCPU(RSP0),%rax + movl $TF_SIZE,%ecx + subq %rcx,%rax + movq %rax,%rdi + movq %rsp,%rsi + rep;movsb + movq %rax,%rsp + call handle_ibrs_entry + movq PCPU(CURPCB),%rdi + orl $PCB_FULL_IRET,PCB_FLAGS(%rdi) + testb $CPUID_STDEXT_FSGSBASE,cpu_stdext_feature(%rip) + jz 3f + cmpw $KUF32SEL,TF_FS(%rsp) + jne 2f + rdfsbase %rax + movq %rax,PCB_FSBASE(%rdi) +2: cmpw $KUG32SEL,TF_GS(%rsp) + jne 3f + movl $MSR_KGSBASE,%ecx + rdmsr + shlq $32,%rdx + orq %rdx,%rax + movq %rax,PCB_GSBASE(%rdi) +3: jmp calltrap /* * NMI handling is special. Modified: releng/11.1/sys/amd64/amd64/machdep.c ============================================================================== --- releng/11.1/sys/amd64/amd64/machdep.c Tue May 8 17:05:39 2018 (r333370) +++ releng/11.1/sys/amd64/amd64/machdep.c Tue May 8 17:12:10 2018 (r333371) @@ -675,6 +675,7 @@ struct gate_descriptor *idt = &idt0[0]; /* interrupt d static char dblfault_stack[PAGE_SIZE] __aligned(16); static char mce0_stack[PAGE_SIZE] __aligned(16); static char nmi0_stack[PAGE_SIZE] __aligned(16); +static char dbg0_stack[PAGE_SIZE] __aligned(16); CTASSERT(sizeof(struct nmi_pcpu) == 16); struct amd64tss common_tss[MAXCPU]; @@ -827,7 +828,7 @@ extern inthand_t IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot), IDTVEC(page), IDTVEC(mchk), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align), IDTVEC(xmm), IDTVEC(dblfault), - IDTVEC(div_pti), IDTVEC(dbg_pti), IDTVEC(bpt_pti), + IDTVEC(div_pti), IDTVEC(bpt_pti), IDTVEC(ofl_pti), IDTVEC(bnd_pti), IDTVEC(ill_pti), IDTVEC(dna_pti), IDTVEC(fpusegm_pti), IDTVEC(tss_pti), IDTVEC(missing_pti), IDTVEC(stk_pti), IDTVEC(prot_pti), IDTVEC(page_pti), @@ -1637,8 +1638,7 @@ hammer_time(u_int64_t modulep, u_int64_t physfree) SEL_KPL, 0); setidt(IDT_DE, pti ? &IDTVEC(div_pti) : &IDTVEC(div), SDT_SYSIGT, SEL_KPL, 0); - setidt(IDT_DB, pti ? &IDTVEC(dbg_pti) : &IDTVEC(dbg), SDT_SYSIGT, - SEL_KPL, 0); + setidt(IDT_DB, &IDTVEC(dbg), SDT_SYSIGT, SEL_KPL, 4); setidt(IDT_NMI, &IDTVEC(nmi), SDT_SYSIGT, SEL_KPL, 2); setidt(IDT_BP, pti ? &IDTVEC(bpt_pti) : &IDTVEC(bpt), SDT_SYSIGT, SEL_UPL, 0); @@ -1720,6 +1720,13 @@ hammer_time(u_int64_t modulep, u_int64_t physfree) np = ((struct nmi_pcpu *) &mce0_stack[sizeof(mce0_stack)]) - 1; np->np_pcpu = (register_t) pc; common_tss[0].tss_ist3 = (long) np; + + /* + * DB# stack, runs on ist4. + */ + np = ((struct nmi_pcpu *) &dbg0_stack[sizeof(dbg0_stack)]) - 1; + np->np_pcpu = (register_t) pc; + common_tss[0].tss_ist4 = (long) np; /* Set the IO permission bitmap (empty due to tss seg limit) */ common_tss[0].tss_iobase = sizeof(struct amd64tss) + IOPERM_BITMAP_SIZE; Modified: releng/11.1/sys/amd64/amd64/mp_machdep.c ============================================================================== --- releng/11.1/sys/amd64/amd64/mp_machdep.c Tue May 8 17:05:39 2018 (r333370) +++ releng/11.1/sys/amd64/amd64/mp_machdep.c Tue May 8 17:12:10 2018 (r333371) @@ -87,6 +87,7 @@ extern struct pcpu __pcpu[]; char *doublefault_stack; char *mce_stack; char *nmi_stack; +char *dbg_stack; /* * Local data and functions. @@ -225,6 +226,10 @@ init_secondary(void) np = ((struct nmi_pcpu *) &mce_stack[PAGE_SIZE]) - 1; common_tss[cpu].tss_ist3 = (long) np; + /* The DB# stack runs on IST4. */ + np = ((struct nmi_pcpu *) &dbg_stack[PAGE_SIZE]) - 1; + common_tss[cpu].tss_ist4 = (long) np; + /* Prepare private GDT */ gdt_segs[GPROC0_SEL].ssd_base = (long) &common_tss[cpu]; for (x = 0; x < NGDT; x++) { @@ -270,6 +275,10 @@ init_secondary(void) np = ((struct nmi_pcpu *) &mce_stack[PAGE_SIZE]) - 1; np->np_pcpu = (register_t) pc; + /* Save the per-cpu pointer for use by the DB# handler. */ + np = ((struct nmi_pcpu *) &dbg_stack[PAGE_SIZE]) - 1; + np->np_pcpu = (register_t) pc; + wrmsr(MSR_FSBASE, 0); /* User value */ wrmsr(MSR_GSBASE, (u_int64_t)pc); wrmsr(MSR_KGSBASE, (u_int64_t)pc); /* XXX User value while we're in the kernel */ @@ -367,6 +376,8 @@ native_start_all_aps(void) mce_stack = (char *)kmem_malloc(kernel_arena, PAGE_SIZE, M_WAITOK | M_ZERO); nmi_stack = (char *)kmem_malloc(kernel_arena, PAGE_SIZE, + M_WAITOK | M_ZERO); + dbg_stack = (char *)kmem_malloc(kernel_arena, PAGE_SIZE, M_WAITOK | M_ZERO); dpcpu = (void *)kmem_malloc(kernel_arena, DPCPU_SIZE, M_WAITOK | M_ZERO); Modified: releng/11.1/sys/amd64/amd64/pmap.c ============================================================================== --- releng/11.1/sys/amd64/amd64/pmap.c Tue May 8 17:05:39 2018 (r333370) +++ releng/11.1/sys/amd64/amd64/pmap.c Tue May 8 17:12:10 2018 (r333371) @@ -7565,6 +7565,9 @@ pmap_pti_init(void) /* MC# stack IST 3 */ va = common_tss[i].tss_ist3 + sizeof(struct nmi_pcpu); pmap_pti_add_kva_locked(va - PAGE_SIZE, va, false); + /* DB# stack IST 4 */ + va = common_tss[i].tss_ist4 + sizeof(struct nmi_pcpu); + pmap_pti_add_kva_locked(va - PAGE_SIZE, va, false); } pmap_pti_add_kva_locked((vm_offset_t)kernphys + KERNBASE, (vm_offset_t)etext, true); Modified: releng/11.1/sys/amd64/amd64/trap.c ============================================================================== --- releng/11.1/sys/amd64/amd64/trap.c Tue May 8 17:05:39 2018 (r333370) +++ releng/11.1/sys/amd64/amd64/trap.c Tue May 8 17:12:10 2018 (r333371) @@ -45,6 +45,7 @@ __FBSDID("$FreeBSD$"); */ #include "opt_clock.h" +#include "opt_compat.h" #include "opt_cpu.h" #include "opt_hwpmc_hooks.h" #include "opt_isa.h" @@ -99,6 +100,11 @@ PMC_SOFT_DEFINE( , , page_fault, write); #include <sys/dtrace_bsd.h> #endif +extern inthand_t IDTVEC(bpt), IDTVEC(bpt_pti), IDTVEC(dbg), + IDTVEC(fast_syscall), IDTVEC(fast_syscall_pti), IDTVEC(fast_syscall32), + IDTVEC(int0x80_syscall_pti), IDTVEC(int0x80_syscall); + + extern void __noinline trap(struct trapframe *frame); extern void trap_check(struct trapframe *frame); extern void syscall(struct trapframe *frame); @@ -536,6 +542,52 @@ trap(struct trapframe *frame) load_dr6(rdr6() & ~0xf); goto out; } + + /* + * Malicious user code can configure a debug + * register watchpoint to trap on data access + * to the top of stack and then execute 'pop + * %ss; int 3'. Due to exception deferral for + * 'pop %ss', the CPU will not interrupt 'int + * 3' to raise the DB# exception for the debug + * register but will postpone the DB# until + * execution of the first instruction of the + * BP# handler (in kernel mode). Normally the + * previous check would ignore DB# exceptions + * for watchpoints on user addresses raised in + * kernel mode. However, some CPU errata + * include cases where DB# exceptions do not + * properly set bits in %dr6, e.g. Haswell + * HSD23 and Skylake-X SKZ24. + * + * A deferred DB# can also be raised on the + * first instructions of system call entry + * points or single-step traps via similar use + * of 'pop %ss' or 'mov xxx, %ss'. + */ + if (pti) { + if (frame->tf_rip == + (uintptr_t)IDTVEC(fast_syscall_pti) || +#ifdef COMPAT_FREEBSD32 + frame->tf_rip == + (uintptr_t)IDTVEC(int0x80_syscall_pti) || +#endif + frame->tf_rip == (uintptr_t)IDTVEC(bpt_pti)) + return; + } else { + if (frame->tf_rip == + (uintptr_t)IDTVEC(fast_syscall) || +#ifdef COMPAT_FREEBSD32 + frame->tf_rip == + (uintptr_t)IDTVEC(int0x80_syscall) || +#endif + frame->tf_rip == (uintptr_t)IDTVEC(bpt)) + return; + } + if (frame->tf_rip == (uintptr_t)IDTVEC(dbg) || + /* Needed for AMD. */ + frame->tf_rip == (uintptr_t)IDTVEC(fast_syscall32)) + return; /* * FALLTHROUGH (TRCTRAP kernel mode, kernel address) */ Modified: releng/11.1/sys/conf/newvers.sh ============================================================================== --- releng/11.1/sys/conf/newvers.sh Tue May 8 17:05:39 2018 (r333370) +++ releng/11.1/sys/conf/newvers.sh Tue May 8 17:12:10 2018 (r333371) @@ -44,7 +44,7 @@ TYPE="FreeBSD" REVISION="11.1" -BRANCH="RELEASE-p9" +BRANCH="RELEASE-p10" if [ -n "${BRANCH_OVERRIDE}" ]; then BRANCH=${BRANCH_OVERRIDE} fi Modified: releng/11.1/sys/i386/i386/trap.c ============================================================================== --- releng/11.1/sys/i386/i386/trap.c Tue May 8 17:05:39 2018 (r333370) +++ releng/11.1/sys/i386/i386/trap.c Tue May 8 17:12:10 2018 (r333371) @@ -116,6 +116,8 @@ void dblfault_handler(void); extern inthand_t IDTVEC(lcall_syscall); +extern inthand_t IDTVEC(bpt), IDTVEC(dbg), IDTVEC(int0x80_syscall); + #define MAX_TRAP_MSG 32 static char *trap_msg[] = { "", /* 0 unused */ @@ -668,6 +670,34 @@ kernel_trctrap: load_dr6(rdr6() & ~0xf); goto out; } + + /* + * Malicious user code can configure a debug + * register watchpoint to trap on data access + * to the top of stack and then execute 'pop + * %ss; int 3'. Due to exception deferral for + * 'pop %ss', the CPU will not interrupt 'int + * 3' to raise the DB# exception for the debug + * register but will postpone the DB# until + * execution of the first instruction of the + * BP# handler (in kernel mode). Normally the + * previous check would ignore DB# exceptions + * for watchpoints on user addresses raised in + * kernel mode. However, some CPU errata + * include cases where DB# exceptions do not + * properly set bits in %dr6, e.g. Haswell + * HSD23 and Skylake-X SKZ24. + * + * A deferred DB# can also be raised on the + * first instructions of system call entry + * points or single-step traps via similar use + * of 'pop %ss' or 'mov xxx, %ss'. + */ + if (frame->tf_eip == + (uintptr_t)IDTVEC(int0x80_syscall) || + frame->tf_eip == (uintptr_t)IDTVEC(bpt) || + frame->tf_eip == (uintptr_t)IDTVEC(dbg)) + return; /* * FALLTHROUGH (TRCTRAP kernel mode, kernel address) */ _______________________________________________ svn-src-all@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-all To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"