Theo de Raadt <dera...@openbsd.org> wrote: > kettenis, mortimer, and I have had long discussions about the > uvm_map_inentry() checks in MD trap.c being excessively broad. We can > this check-function less often. > > I don't want to explain the full picture about how SP and PC checks > in syscall() and trap() hinder ROP, ROP stack pivoting, gadgets with > unfortunate memory access side-effects, etc. But... > > When a ROP chain pivots onto a non-MAP_STACK chunk of data, to use a > data/heap loaded secondary chain, it will eventually want to do a system > call. > > (a) Thus, we force the chain to pivot back to MAP_STACK memory before doing > the first syscall, otherwise the SP check in syscall() will kill it. > This pivot-back is additional labour not neccessary for attack on other > operating systems. > > (b) We also do sp checking at trap time. On most architectures it is > currently done for MANY types of userland traps. (Additionally, it can > tsleep, which exposed a bunch of old bugs on many architecturs we fixed > at k2k20). We now propose only performing uvm_map_inentry() for the > standard userland "page fault" case which goes to uvm_fault() > > It is pointless (and costly) to use interrupts, or other strange and > rare traps, hoping to catch a rop chain which is just chewing through > properly mapped memory. Those other traps tend to also get us into > issues like tsleep. Without a performance cost, and negligable security > benefit. > > However, unmapped memory is the achilles heel. > > A poorly written chain will eventually accesseses unmapped memory because > it is using a gadget which has a memory-access side effect (and > references to a not-yet-mapped page (either cow fault, or zfod, or > demand-from-vnode), or because it is trying to use memory-scanning or > sled methods). Additionally, such unmapped memory accesses maybe done as > ptr derefs against uncontrolled registers, and libc randomization plays > a powerful role (but the margin way too narrow) > > So for an attacker, this reduces gadget selection to ONLY THE BEST > GADGETS -- meaning, minimal or no side effects. We've reduced gadget > count by a lot, placed code in very random places, so this becomes a > further hinderance in selection. > > By narrowing the check, it will bring back a bit of performance from > before the addition of trap.c uvm_map_inentry()
Here are all the diffs. kettenis worked on a few with me, and visa wrote the mips64 one. arm64 and armv7 don't need changes. Index: alpha/alpha/trap.c =================================================================== RCS file: /cvs/src/sys/arch/alpha/alpha/trap.c,v retrieving revision 1.89 diff -u -p -u -r1.89 trap.c --- alpha/alpha/trap.c 19 Aug 2020 10:10:57 -0000 1.89 +++ alpha/alpha/trap.c 23 Sep 2020 20:30:45 -0000 @@ -244,10 +244,6 @@ trap(a0, a1, a2, entry, framep) if (user) { p->p_md.md_tf = framep; refreshcreds(p); - if (!uvm_map_inentry(p, &p->p_spinentry, PROC_STACK(p), - "[%s]%d/%d sp=%lx inside %lx-%lx: not MAP_STACK\n", - uvm_map_inentry_sp, p->p_vmspace->vm_map.sserial)) - goto out; } switch (entry) { @@ -370,6 +366,12 @@ trap(a0, a1, a2, entry, framep) break; case ALPHA_KENTRY_MM: + if (user && + !uvm_map_inentry(p, &p->p_spinentry, PROC_STACK(p), + "[%s]%d/%d sp=%lx inside %lx-%lx: not MAP_STACK\n", + uvm_map_inentry_sp, p->p_vmspace->vm_map.sserial)) + goto out; + switch (a1) { case ALPHA_MMCSR_FOR: case ALPHA_MMCSR_FOE: Index: amd64/amd64/trap.c =================================================================== RCS file: /cvs/src/sys/arch/amd64/amd64/trap.c,v retrieving revision 1.81 diff -u -p -u -r1.81 trap.c --- amd64/amd64/trap.c 14 Sep 2020 12:51:28 -0000 1.81 +++ amd64/amd64/trap.c 23 Sep 2020 12:17:10 -0000 @@ -343,11 +343,6 @@ usertrap(struct trapframe *frame) p->p_md.md_regs = frame; refreshcreds(p); - if (!uvm_map_inentry(p, &p->p_spinentry, PROC_STACK(p), - "[%s]%d/%d sp=%lx inside %lx-%lx: not MAP_STACK\n", - uvm_map_inentry_sp, p->p_vmspace->vm_map.sserial)) - goto out; - switch (type) { case T_PROTFLT: /* protection fault */ case T_TSSFLT: @@ -381,6 +376,10 @@ usertrap(struct trapframe *frame) break; case T_PAGEFLT: /* page fault */ + if (!uvm_map_inentry(p, &p->p_spinentry, PROC_STACK(p), + "[%s]%d/%d sp=%lx inside %lx-%lx: not MAP_STACK\n", + uvm_map_inentry_sp, p->p_vmspace->vm_map.sserial)) + goto out; if (pageflttrap(frame, cr2, 1)) goto out; /* FALLTHROUGH */ Index: hppa/hppa/trap.c =================================================================== RCS file: /cvs/src/sys/arch/hppa/hppa/trap.c,v retrieving revision 1.148 diff -u -p -u -r1.148 trap.c --- hppa/hppa/trap.c 14 Sep 2020 19:04:30 -0000 1.148 +++ hppa/hppa/trap.c 23 Sep 2020 14:12:17 -0000 @@ -213,13 +213,8 @@ trap(int type, struct trapframe *frame) mtctl(frame->tf_eiem, CR_EIEM); } - if (type & T_USER) { + if (type & T_USER) refreshcreds(p); - if (!uvm_map_inentry(p, &p->p_spinentry, PROC_STACK(p), - "[%s]%d/%d sp=%lx inside %lx-%lx: not MAP_STACK\n", - uvm_map_inentry_sp, p->p_vmspace->vm_map.sserial)) - goto out; - } switch (type) { case T_NONEXIST: @@ -462,6 +457,13 @@ datacc: case T_ITLBMISS | T_USER: case T_DTLBMISS: case T_DTLBMISS | T_USER: + if (type & T_USER) { + if (!uvm_map_inentry(p, &p->p_spinentry, PROC_STACK(p), + "[%s]%d/%d sp=%lx inside %lx-%lx: not MAP_STACK\n", + uvm_map_inentry_sp, p->p_vmspace->vm_map.sserial)) + goto out; + } + /* * it could be a kernel map for exec_map faults */ Index: i386/i386/trap.c =================================================================== RCS file: /cvs/src/sys/arch/i386/i386/trap.c,v retrieving revision 1.145 diff -u -p -u -r1.145 trap.c --- i386/i386/trap.c 15 Sep 2020 09:30:31 -0000 1.145 +++ i386/i386/trap.c 23 Sep 2020 12:16:23 -0000 @@ -153,10 +153,6 @@ trap(struct trapframe *frame) type |= T_USER; p->p_md.md_regs = frame; refreshcreds(p); - if (!uvm_map_inentry(p, &p->p_spinentry, PROC_STACK(p), - "[%s]%d/%d sp=%lx inside %lx-%lx: not MAP_STACK\n", - uvm_map_inentry_sp, p->p_vmspace->vm_map.sserial)) - goto out; } switch (type) { @@ -352,6 +348,10 @@ trap(struct trapframe *frame) int error; int signal, sicode; + if (!uvm_map_inentry(p, &p->p_spinentry, PROC_STACK(p), + "[%s]%d/%d sp=%lx inside %lx-%lx: not MAP_STACK\n", + uvm_map_inentry_sp, p->p_vmspace->vm_map.sserial)) + goto out; KERNEL_LOCK(); faultcommon: vm = p->p_vmspace; Index: m88k/m88k/trap.c =================================================================== RCS file: /cvs/src/sys/arch/m88k/m88k/trap.c,v retrieving revision 1.113 diff -u -p -u -r1.113 trap.c --- m88k/m88k/trap.c 23 Sep 2020 19:45:32 -0000 1.113 +++ m88k/m88k/trap.c 23 Sep 2020 19:45:35 -0000 @@ -239,10 +239,6 @@ m88100_trap(u_int type, struct trapframe type |= T_USER; p->p_md.md_tf = frame; /* for ptrace/signals */ refreshcreds(p); - if (!uvm_map_inentry(p, &p->p_spinentry, PROC_STACK(p), - "[%s]%d/%d sp=%lx inside %lx-%lx: not MAP_STACK\n", - uvm_map_inentry_sp, p->p_vmspace->vm_map.sserial)) - goto userexit; } fault_type = SI_NOINFO; fault_code = 0; @@ -862,6 +858,11 @@ lose: /* User mode instruction access fault */ /* FALLTHROUGH */ case T_DATAFLT+T_USER: + if (!uvm_map_inentry(p, &p->p_spinentry, PROC_STACK(p), + "[%s]%d/%d sp=%lx inside %lx-%lx: not MAP_STACK\n", + uvm_map_inentry_sp, p->p_vmspace->vm_map.sserial)) + goto userexit; + KERNEL_LOCK(); m88110_user_fault: if (type == T_INSTFLT+T_USER) { Index: mips64/mips64/trap.c =================================================================== RCS file: /cvs/src/sys/arch/mips64/mips64/trap.c,v retrieving revision 1.146 diff -u -p -u -r1.146 trap.c --- mips64/mips64/trap.c 19 Aug 2020 10:10:58 -0000 1.146 +++ mips64/mips64/trap.c 23 Sep 2020 19:21:51 -0000 @@ -261,16 +261,11 @@ trap(struct trapframe *trapframe) } #endif - if (type & T_USER) { + if (type & T_USER) refreshcreds(p); - if (!uvm_map_inentry(p, &p->p_spinentry, PROC_STACK(p), - "[%s]%d/%d sp=%lx inside %lx-%lx: not MAP_STACK\n", - uvm_map_inentry_sp, p->p_vmspace->vm_map.sserial)) - goto out; - } itsa(trapframe, ci, p, type); -out: + if (type & T_USER) userret(p); } @@ -394,6 +389,11 @@ itsa(struct trapframe *trapframe, struct ftype = PROT_WRITE; pcb = &p->p_addr->u_pcb; fault_common: + if ((type & T_USER) && + !uvm_map_inentry(p, &p->p_spinentry, PROC_STACK(p), + "[%s]%d/%d sp=%lx inside %lx-%lx: not MAP_STACK\n", + uvm_map_inentry_sp, p->p_vmspace->vm_map.sserial)) + return; #ifdef CPU_R4000 if (r4000_errata != 0) { Index: powerpc/powerpc/trap.c =================================================================== RCS file: /cvs/src/sys/arch/powerpc/powerpc/trap.c,v retrieving revision 1.115 diff -u -p -u -r1.115 trap.c --- powerpc/powerpc/trap.c 19 Aug 2020 10:10:58 -0000 1.115 +++ powerpc/powerpc/trap.c 23 Sep 2020 13:10:14 -0000 @@ -245,10 +245,6 @@ trap(struct trapframe *frame) if (frame->srr1 & PSL_PR) { type |= EXC_USER; refreshcreds(p); - if (!uvm_map_inentry(p, &p->p_spinentry, PROC_STACK(p), - "[%s]%d/%d sp=%lx inside %lx-%lx: not MAP_STACK\n", - uvm_map_inentry_sp, p->p_vmspace->vm_map.sserial)) - goto out; } switch (type) { @@ -309,6 +305,11 @@ trap(struct trapframe *frame) if (pte_spill_v(p->p_vmspace->vm_map.pmap, frame->dar, frame->dsisr, 0)) break; + + if (!uvm_map_inentry(p, &p->p_spinentry, PROC_STACK(p), + "[%s]%d/%d sp=%lx inside %lx-%lx: not MAP_STACK\n", + uvm_map_inentry_sp, p->p_vmspace->vm_map.sserial)) + goto out; KERNEL_LOCK(); if (frame->dsisr & DSISR_STORE) { Index: powerpc64/powerpc64/trap.c =================================================================== RCS file: /cvs/src/sys/arch/powerpc64/powerpc64/trap.c,v retrieving revision 1.37 diff -u -p -u -r1.37 trap.c --- powerpc64/powerpc64/trap.c 15 Sep 2020 07:47:24 -0000 1.37 +++ powerpc64/powerpc64/trap.c 23 Sep 2020 13:14:55 -0000 @@ -92,10 +92,6 @@ trap(struct trapframe *frame) type |= EXC_USER; p->p_md.md_regs = frame; refreshcreds(p); - if (!uvm_map_inentry(p, &p->p_spinentry, PROC_STACK(p), - "[%s]%d/%d sp=%lx inside %lx-%lx: not MAP_STACK\n", - uvm_map_inentry_sp, p->p_vmspace->vm_map.sserial)) - goto out; } switch (type) { @@ -172,6 +168,11 @@ trap(struct trapframe *frame) /* FALLTHROUGH */ case EXC_DSI|EXC_USER: + if (!uvm_map_inentry(p, &p->p_spinentry, PROC_STACK(p), + "[%s]%d/%d sp=%lx inside %lx-%lx: not MAP_STACK\n", + uvm_map_inentry_sp, p->p_vmspace->vm_map.sserial)) + goto out; + map = &p->p_vmspace->vm_map; va = frame->dar; if (frame->dsisr & DSISR_STORE) @@ -214,6 +215,11 @@ trap(struct trapframe *frame) /* FALLTHROUGH */ case EXC_ISI|EXC_USER: + if (!uvm_map_inentry(p, &p->p_spinentry, PROC_STACK(p), + "[%s]%d/%d sp=%lx inside %lx-%lx: not MAP_STACK\n", + uvm_map_inentry_sp, p->p_vmspace->vm_map.sserial)) + goto out; + map = &p->p_vmspace->vm_map; va = frame->srr0; ftype = PROT_READ | PROT_EXEC; Index: sh/sh/trap.c =================================================================== RCS file: /cvs/src/sys/arch/sh/sh/trap.c,v retrieving revision 1.42 diff -u -p -u -r1.42 trap.c --- sh/sh/trap.c 22 Sep 2020 15:50:20 -0000 1.42 +++ sh/sh/trap.c 23 Sep 2020 12:19:30 -0000 @@ -173,12 +173,6 @@ general_exception(struct proc *p, struct KDASSERT(p->p_md.md_regs == tf); /* check exception depth */ expevt |= EXP_USER; refreshcreds(p); - if (tra != _SH_TRA_SYSCALL << 2) { - if (!uvm_map_inentry(p, &p->p_spinentry, PROC_STACK(p), - "[%s]%d/%d sp=%lx inside %lx-%lx: not MAP_STACK\n", - uvm_map_inentry_sp, p->p_vmspace->vm_map.sserial)) - goto out; - } } switch (expevt) { @@ -389,6 +383,11 @@ tlb_exception(struct proc *p, struct tra /* Select address space */ if (usermode) { + if (!uvm_map_inentry(p, &p->p_spinentry, PROC_STACK(p), + "[%s]%d/%d sp=%lx inside %lx-%lx: not MAP_STACK\n", + uvm_map_inentry_sp, p->p_vmspace->vm_map.sserial)) + goto out; + TLB_ASSERT(p != NULL, "no curproc"); map = &p->p_vmspace->vm_map; pmap = map->pmap; Index: sparc64/sparc64/trap.c =================================================================== RCS file: /cvs/src/sys/arch/sparc64/sparc64/trap.c,v retrieving revision 1.103 diff -u -p -u -r1.103 trap.c --- sparc64/sparc64/trap.c 19 Aug 2020 10:10:58 -0000 1.103 +++ sparc64/sparc64/trap.c 23 Sep 2020 18:11:24 -0000 @@ -426,10 +426,6 @@ trap(struct trapframe64 *tf, unsigned ty pcb = &p->p_addr->u_pcb; p->p_md.md_tf = tf; /* for ptrace/signals */ refreshcreds(p); - if (!uvm_map_inentry(p, &p->p_spinentry, PROC_STACK(p), - "[%s]%d/%d sp=%lx inside %lx-%lx: not MAP_STACK\n", - uvm_map_inentry_sp, p->p_vmspace->vm_map.sserial)) - goto out; switch (type) { @@ -678,7 +674,7 @@ dopanic: trapsignal(p, SIGFPE, FPE_INTOVF_TRAP, FPE_INTOVF, sv); break; } -out: + userret(p); share_fpu(p, tf); #undef ADVANCE @@ -791,8 +787,13 @@ data_access_fault(struct trapframe64 *tf goto kfault; } } else { - KERNEL_LOCK(); p->p_md.md_tf = tf; + if (!uvm_map_inentry(p, &p->p_spinentry, PROC_STACK(p), + "[%s]%d/%d sp=%lx inside %lx-%lx: not MAP_STACK\n", + uvm_map_inentry_sp, p->p_vmspace->vm_map.sserial)) + goto out; + + KERNEL_LOCK(); } vm = p->p_vmspace; @@ -857,12 +858,12 @@ kfault: trapsignal(p, signal, access_type, sicode, sv); } + KERNEL_UNLOCK(); + +out: if ((tstate & TSTATE_PRIV) == 0) { - KERNEL_UNLOCK(); userret(p); share_fpu(p, tf); - } else { - KERNEL_UNLOCK(); } } @@ -934,8 +935,8 @@ data_access_error(struct trapframe64 *tf } trapsignal(p, SIGSEGV, PROT_READ | PROT_WRITE, SEGV_MAPERR, sv); -out: +out: if ((tstate & TSTATE_PRIV) == 0) { userret(p); share_fpu(p, tf); @@ -975,8 +976,13 @@ text_access_fault(struct trapframe64 *tf (void) splhigh(); panic("kernel text_access_fault: pc=%lx va=%lx", pc, va); /* NOTREACHED */ - } else + } else { p->p_md.md_tf = tf; + if (!uvm_map_inentry(p, &p->p_spinentry, PROC_STACK(p), + "[%s]%d/%d sp=%lx inside %lx-%lx: not MAP_STACK\n", + uvm_map_inentry_sp, p->p_vmspace->vm_map.sserial)) + goto out; + } KERNEL_LOCK(); @@ -1020,6 +1026,7 @@ text_access_fault(struct trapframe64 *tf KERNEL_UNLOCK(); +out: if ((tstate & TSTATE_PRIV) == 0) { userret(p); share_fpu(p, tf); @@ -1077,8 +1084,13 @@ text_access_error(struct trapframe64 *tf panic("kernel text error: pc=%lx sfsr=%lb", pc, sfsr, SFSR_BITS); /* NOTREACHED */ - } else + } else { p->p_md.md_tf = tf; + if (!uvm_map_inentry(p, &p->p_spinentry, PROC_STACK(p), + "[%s]%d/%d sp=%lx inside %lx-%lx: not MAP_STACK\n", + uvm_map_inentry_sp, p->p_vmspace->vm_map.sserial)) + goto out; + } KERNEL_LOCK();