Module Name: src Committed By: riastradh Date: Sun Mar 13 17:21:29 UTC 2022
Modified Files: src/sys/kern: kern_proc.c Log Message: kern: Fix ordering of loads for pid_table and pid_tbl_mask. This introduces a load-acquire where there was none before. This is a simple correctness change. We could avoid the load-acquire, and use only load-consume, if we used a pointer indirection for _both_ pid_table and pid_tbl_mask. Takes a little more work, and probably costs an additional cache line of memory traffic, but might be worth it to avoid the load-acquire for pid lookup. Reported-by: syzbot+c49e405d0b977aeed...@syzkaller.appspotmail.com Reported-by: syzbot+1c88ee7086f93607c...@syzkaller.appspotmail.com Reported-by: syzbot+da4e9ed1319b75fe2...@syzkaller.appspotmail.com To generate a diff of this commit: cvs rdiff -u -r1.264 -r1.265 src/sys/kern/kern_proc.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/sys/kern/kern_proc.c diff -u src/sys/kern/kern_proc.c:1.264 src/sys/kern/kern_proc.c:1.265 --- src/sys/kern/kern_proc.c:1.264 Thu Mar 10 12:21:35 2022 +++ src/sys/kern/kern_proc.c Sun Mar 13 17:21:29 2022 @@ -1,4 +1,4 @@ -/* $NetBSD: kern_proc.c,v 1.264 2022/03/10 12:21:35 riastradh Exp $ */ +/* $NetBSD: kern_proc.c,v 1.265 2022/03/13 17:21:29 riastradh Exp $ */ /*- * Copyright (c) 1999, 2006, 2007, 2008, 2020 The NetBSD Foundation, Inc. @@ -62,7 +62,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: kern_proc.c,v 1.264 2022/03/10 12:21:35 riastradh Exp $"); +__KERNEL_RCSID(0, "$NetBSD: kern_proc.c,v 1.265 2022/03/13 17:21:29 riastradh Exp $"); #ifdef _KERNEL_OPT #include "opt_kstack.h" @@ -682,6 +682,7 @@ struct lwp * proc_find_lwp(proc_t *p, pid_t pid) { struct pid_table *pt; + unsigned pt_mask; struct lwp *l = NULL; uintptr_t slot; int s; @@ -689,13 +690,22 @@ proc_find_lwp(proc_t *p, pid_t pid) KASSERT(mutex_owned(p->p_lock)); /* - * Look in the pid_table. This is done unlocked inside a pserialize - * read section covering pid_table's memory allocation only, so take - * care to read the slot atomically and only once. This issues a - * memory barrier for dependent loads on alpha. + * Look in the pid_table. This is done unlocked inside a + * pserialize read section covering pid_table's memory + * allocation only, so take care to read things in the correct + * order: + * + * 1. First read the table mask -- this only ever increases, in + * expand_pid_table, so a stale value is safely + * conservative. + * + * 2. Next read the pid table -- this is always set _before_ + * the mask increases, so if we see a new table and stale + * mask, the mask is still valid for the table. */ s = pserialize_read_enter(); - pt = &atomic_load_consume(&pid_table)[pid & pid_tbl_mask]; + pt_mask = atomic_load_acquire(&pid_tbl_mask); + pt = &atomic_load_consume(&pid_table)[pid & pt_mask]; slot = atomic_load_consume(&pt->pt_slot); if (__predict_false(!PT_IS_LWP(slot))) { pserialize_read_exit(s); @@ -742,18 +752,28 @@ struct lwp * proc_find_lwp_unlocked(proc_t *p, pid_t pid) { struct pid_table *pt; + unsigned pt_mask; struct lwp *l = NULL; uintptr_t slot; KASSERT(pserialize_in_read_section()); /* - * Look in the pid_table. This is done unlocked inside a pserialize - * read section covering pid_table's memory allocation only, so take - * care to read the slot atomically and only once. This issues a - * memory barrier for dependent loads on alpha. + * Look in the pid_table. This is done unlocked inside a + * pserialize read section covering pid_table's memory + * allocation only, so take care to read things in the correct + * order: + * + * 1. First read the table mask -- this only ever increases, in + * expand_pid_table, so a stale value is safely + * conservative. + * + * 2. Next read the pid table -- this is always set _before_ + * the mask increases, so if we see a new table and stale + * mask, the mask is still valid for the table. */ - pt = &atomic_load_consume(&pid_table)[pid & pid_tbl_mask]; + pt_mask = atomic_load_acquire(&pid_tbl_mask); + pt = &atomic_load_consume(&pid_table)[pid & pt_mask]; slot = atomic_load_consume(&pt->pt_slot); if (__predict_false(!PT_IS_LWP(slot))) { return NULL; @@ -1004,7 +1024,8 @@ expand_pid_table(void) tsz = pt_size * sizeof(struct pid_table); n_pt = pid_table; atomic_store_release(&pid_table, new_pt); - pid_tbl_mask = new_pt_mask; + KASSERT(new_pt_mask >= pid_tbl_mask); + atomic_store_release(&pid_tbl_mask, new_pt_mask); /* * pid_max starts as PID_MAX (= 30000), once we have 16384 @@ -1183,6 +1204,8 @@ proc_free_pid_internal(pid_t pid, uintpt { struct pid_table *pt; + KASSERT(mutex_owned(&proc_lock)); + pt = &pid_table[pid & pid_tbl_mask]; KASSERT(PT_GET_TYPE(pt->pt_slot) == type);