Theo de Raadt <dera...@openbsd.org> wrote: > A few weeks ago a conversation about retguard (a diff is probably > coming) caused me re-consider & re-read the BROP paper > > https://www.scs.stanford.edu/brop/bittau-brop.pdf
new version of the diff. The small piece of locking has been improved by using a private mutex, and the check function has been renamed a bit. It is important to check things like coredump (this contains a workaround), and ptrace, and other special cases. As seen in subr_log.c, there will be other naked copyin() which need xonly() checks before them. I made an attempt to protect *all* copyin, by changing copyin() to do this itself. but it lacks the proc pointer so I tried curproc, but this doesn't seem to work right, doesn't feel right either. And furthermore I found there are copyin that occur inside map locks so that seems like a step too far. Index: sys/systm.h =================================================================== RCS file: /cvs/src/sys/sys/systm.h,v retrieving revision 1.159 diff -u -p -u -r1.159 systm.h --- sys/systm.h 3 Sep 2022 15:29:44 -0000 1.159 +++ sys/systm.h 21 Dec 2022 07:17:32 -0000 @@ -211,6 +211,8 @@ int copyin(const void *, void *, size_t) int copyout(const void *, void *, size_t); int copyin32(const uint32_t *, uint32_t *); +int xonly(struct proc *, const void *, size_t); + void random_start(int); void enqueue_randomness(unsigned int); void suspend_randomness(void); Index: kern/exec_subr.c =================================================================== RCS file: /cvs/src/sys/kern/exec_subr.c,v retrieving revision 1.64 diff -u -p -u -r1.64 exec_subr.c --- kern/exec_subr.c 5 Dec 2022 23:18:37 -0000 1.64 +++ kern/exec_subr.c 21 Dec 2022 06:58:19 -0000 @@ -215,6 +215,10 @@ vmcmd_map_pagedvn(struct proc *p, struct if (cmd->ev_flags & VMCMD_IMMUTABLE) uvm_map_immutable(&p->p_vmspace->vm_map, cmd->ev_addr, round_page(cmd->ev_addr + cmd->ev_len), 1); + if ((flags & UVM_FLAG_SYSCALL) || + ((cmd->ev_flags & VMCMD_IMMUTABLE) && (cmd->ev_prot & PROT_EXEC))) + uvm_map_xonly(&p->p_vmspace->vm_map, + cmd->ev_addr, round_page(cmd->ev_addr + cmd->ev_len)); } return (error); Index: kern/kern_sig.c =================================================================== RCS file: /cvs/src/sys/kern/kern_sig.c,v retrieving revision 1.301 diff -u -p -u -r1.301 kern_sig.c --- kern/kern_sig.c 16 Oct 2022 16:27:02 -0000 1.301 +++ kern/kern_sig.c 21 Dec 2022 06:58:19 -0000 @@ -1642,6 +1642,9 @@ coredump(struct proc *p) atomic_setbits_int(&pr->ps_flags, PS_COREDUMP); + /* disable xonly checks, so we can write out text sections if needed */ + p->p_vmspace->vm_map.xonly_count = 0; + /* Don't dump if will exceed file size limit. */ if (USPACE + ptoa(vm->vm_dsize + vm->vm_ssize) >= lim_cur(RLIMIT_CORE)) return (EFBIG); Index: kern/kern_subr.c =================================================================== RCS file: /cvs/src/sys/kern/kern_subr.c,v retrieving revision 1.51 diff -u -p -u -r1.51 kern_subr.c --- kern/kern_subr.c 14 Aug 2022 01:58:27 -0000 1.51 +++ kern/kern_subr.c 21 Dec 2022 15:50:34 -0000 @@ -78,8 +78,12 @@ uiomove(void *cp, size_t n, struct uio * sched_pause(preempt); if (uio->uio_rw == UIO_READ) error = copyout(cp, iov->iov_base, cnt); - else - error = copyin(iov->iov_base, cp, cnt); + else { + error = xonly(uio->uio_procp, + iov->iov_base, cnt); + if (error == 0) + error = copyin(iov->iov_base, cp, cnt); + } if (error) return (error); break; Index: kern/subr_log.c =================================================================== RCS file: /cvs/src/sys/kern/subr_log.c,v retrieving revision 1.75 diff -u -p -u -r1.75 subr_log.c --- kern/subr_log.c 2 Jul 2022 08:50:42 -0000 1.75 +++ kern/subr_log.c 21 Dec 2022 15:16:41 -0000 @@ -644,6 +644,8 @@ dosendsyslog(struct proc *p, const char */ len = MIN(nbyte, sizeof(pri)); if (sflg == UIO_USERSPACE) { + if ((error = xonly(p, buf, len))) + return (error); if ((error = copyin(buf, pri, len))) return (error); } else Index: uvm/uvm_map.c =================================================================== RCS file: /cvs/src/sys/uvm/uvm_map.c,v retrieving revision 1.305 diff -u -p -u -r1.305 uvm_map.c --- uvm/uvm_map.c 18 Dec 2022 23:41:17 -0000 1.305 +++ uvm/uvm_map.c 21 Dec 2022 07:17:52 -0000 @@ -2500,6 +2500,7 @@ uvm_map_setup(struct vm_map *map, pmap_t else rw_init(&map->lock, "kmmaplk"); mtx_init(&map->mtx, IPL_VM); + mtx_init(&map->xonly_mtx, IPL_VM); mtx_init(&map->flags_lock, IPL_VM); /* Configure the allocators. */ @@ -3472,6 +3473,7 @@ uvmspace_exec(struct proc *p, vaddr_t st uvmspace_free(ovm); } + p->p_vmspace->vm_map.xonly_count = 0; /* Release dead entries */ uvm_unmap_detach(&dead_entries, 0); @@ -4258,9 +4260,75 @@ uvm_map_syscall(struct vm_map *map, vadd entry = RBT_NEXT(uvm_map_addr, entry); } + /* Add libc's text segment to the XONLY list */ + mtx_enter(&map->xonly_mtx); + if (map->xonly_count < UVM_MAP_XONLY_MAX) { + //printf("%d xsysc %lx-%lx\n", map->xonly_count, start, end); + map->xonly[map->xonly_count].start = start; + map->xonly[map->xonly_count].end = end; + map->xonly_count++; + } + mtx_leave(&map->xonly_mtx); + map->wserial++; map->flags |= VM_MAP_SYSCALL_ONCE; vm_map_unlock(map); + return (0); +} + +/* + * xonly: if the address is in an x-only region, return EFAULT + */ +int +xonly(struct proc *p, const void *vstart, size_t len) +{ + struct vm_map *map = &p->p_vmspace->vm_map; + const vaddr_t start = (vaddr_t)vstart; + const vaddr_t end = start + len; + int i, r = 0; + + /* + * When system calls are registered and msyscall(2) is blocked, + * there are no new calls to setup xonly regions + */ + if ((map->flags & VM_MAP_SYSCALL_ONCE) == 0) + mtx_enter(&map->xonly_mtx); + for (i = 0; i < map->xonly_count; i++) { + vaddr_t s = map->xonly[i].start, e = map->xonly[i].end; + + if ((start >= s && start < e) || (end >= s && end < e)) { + r = EFAULT; + break; + } + } + if ((map->flags & VM_MAP_SYSCALL_ONCE) == 0) + mtx_leave(&map->xonly_mtx); + return (r); +} + +/* + * uvm_map_xonly: remember regions which are X-only for uiomove() + * + * => map must be unlocked + */ +int +uvm_map_xonly(struct vm_map *map, vaddr_t start, vaddr_t end) +{ + if (start > end) + return EINVAL; + start = MAX(start, map->min_offset); + end = MIN(end, map->max_offset); + if (start >= end) + return 0; + + mtx_enter(&map->xonly_mtx); + if (map->xonly_count < UVM_MAP_XONLY_MAX) { + //printf("%d xonly %lx-%lx\n", map->xonly_count, start, end); + map->xonly[map->xonly_count].start = start; + map->xonly[map->xonly_count].end = end; + map->xonly_count++; + } + mtx_leave(&map->xonly_mtx); return (0); } Index: uvm/uvm_map.h =================================================================== RCS file: /cvs/src/sys/uvm/uvm_map.h,v retrieving revision 1.81 diff -u -p -u -r1.81 uvm_map.h --- uvm/uvm_map.h 17 Nov 2022 23:26:07 -0000 1.81 +++ uvm/uvm_map.h 21 Dec 2022 07:17:18 -0000 @@ -168,6 +168,12 @@ struct vm_map_entry { vsize_t fspace_augment; /* max(fspace) in subtree */ }; +struct uvm_xonly { + vaddr_t start; + vaddr_t end; +}; +#define UVM_MAP_XONLY_MAX 4 /* main, sigtramp, ld.so, libc.so */ + #define VM_MAPENT_ISWIRED(entry) ((entry)->wired_count != 0) TAILQ_HEAD(uvm_map_deadq, vm_map_entry); /* dead entry queue */ @@ -309,11 +315,15 @@ struct vm_map { struct uvm_addr_state *uaddr_any[4]; /* More selectors. */ struct uvm_addr_state *uaddr_brk_stack; /* Brk/stack selector. */ + struct uvm_xonly xonly[UVM_MAP_XONLY_MAX]; + int xonly_count; + /* * XXX struct mutex changes size because of compile options, so place * place after fields which are inspected by libkvm / procmap(8) */ struct rwlock lock; /* Non-intrsafe lock */ + struct mutex xonly_mtx; /* for xonly variables */ struct mutex mtx; /* Intrsafe lock */ struct mutex flags_lock; /* flags lock */ }; @@ -354,6 +364,7 @@ int uvm_map_extract(struct vm_map *, va struct vm_map * uvm_map_create(pmap_t, vaddr_t, vaddr_t, int); vaddr_t uvm_map_pie(vaddr_t); vaddr_t uvm_map_hint(struct vmspace *, vm_prot_t, vaddr_t, vaddr_t); +int uvm_map_xonly(struct vm_map *, vaddr_t, vaddr_t); int uvm_map_syscall(struct vm_map *, vaddr_t, vaddr_t); int uvm_map_immutable(struct vm_map *, vaddr_t, vaddr_t, int); int uvm_map_inherit(struct vm_map *, vaddr_t, vaddr_t, vm_inherit_t);