Module Name:    src
Committed By:   riastradh
Date:           Sun Mar 13 17:21:29 UTC 2022

Modified Files:
        src/sys/kern: kern_proc.c

Log Message:
kern: Fix ordering of loads for pid_table and pid_tbl_mask.

This introduces a load-acquire where there was none before.  This is
a simple correctness change.  We could avoid the load-acquire, and
use only load-consume, if we used a pointer indirection for _both_
pid_table and pid_tbl_mask.  Takes a little more work, and probably
costs an additional cache line of memory traffic, but might be worth
it to avoid the load-acquire for pid lookup.

Reported-by: syzbot+c49e405d0b977aeed...@syzkaller.appspotmail.com
Reported-by: syzbot+1c88ee7086f93607c...@syzkaller.appspotmail.com
Reported-by: syzbot+da4e9ed1319b75fe2...@syzkaller.appspotmail.com


To generate a diff of this commit:
cvs rdiff -u -r1.264 -r1.265 src/sys/kern/kern_proc.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/kern/kern_proc.c
diff -u src/sys/kern/kern_proc.c:1.264 src/sys/kern/kern_proc.c:1.265
--- src/sys/kern/kern_proc.c:1.264	Thu Mar 10 12:21:35 2022
+++ src/sys/kern/kern_proc.c	Sun Mar 13 17:21:29 2022
@@ -1,4 +1,4 @@
-/*	$NetBSD: kern_proc.c,v 1.264 2022/03/10 12:21:35 riastradh Exp $	*/
+/*	$NetBSD: kern_proc.c,v 1.265 2022/03/13 17:21:29 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 1999, 2006, 2007, 2008, 2020 The NetBSD Foundation, Inc.
@@ -62,7 +62,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: kern_proc.c,v 1.264 2022/03/10 12:21:35 riastradh Exp $");
+__KERNEL_RCSID(0, "$NetBSD: kern_proc.c,v 1.265 2022/03/13 17:21:29 riastradh Exp $");
 
 #ifdef _KERNEL_OPT
 #include "opt_kstack.h"
@@ -682,6 +682,7 @@ struct lwp *
 proc_find_lwp(proc_t *p, pid_t pid)
 {
 	struct pid_table *pt;
+	unsigned pt_mask;
 	struct lwp *l = NULL;
 	uintptr_t slot;
 	int s;
@@ -689,13 +690,22 @@ proc_find_lwp(proc_t *p, pid_t pid)
 	KASSERT(mutex_owned(p->p_lock));
 
 	/*
-	 * Look in the pid_table.  This is done unlocked inside a pserialize
-	 * read section covering pid_table's memory allocation only, so take
-	 * care to read the slot atomically and only once.  This issues a
-	 * memory barrier for dependent loads on alpha.
+	 * Look in the pid_table.  This is done unlocked inside a
+	 * pserialize read section covering pid_table's memory
+	 * allocation only, so take care to read things in the correct
+	 * order:
+	 *
+	 * 1. First read the table mask -- this only ever increases, in
+	 *    expand_pid_table, so a stale value is safely
+	 *    conservative.
+	 *
+	 * 2. Next read the pid table -- this is always set _before_
+	 *    the mask increases, so if we see a new table and stale
+	 *    mask, the mask is still valid for the table.
 	 */
 	s = pserialize_read_enter();
-	pt = &atomic_load_consume(&pid_table)[pid & pid_tbl_mask];
+	pt_mask = atomic_load_acquire(&pid_tbl_mask);
+	pt = &atomic_load_consume(&pid_table)[pid & pt_mask];
 	slot = atomic_load_consume(&pt->pt_slot);
 	if (__predict_false(!PT_IS_LWP(slot))) {
 		pserialize_read_exit(s);
@@ -742,18 +752,28 @@ struct lwp *
 proc_find_lwp_unlocked(proc_t *p, pid_t pid)
 {
 	struct pid_table *pt;
+	unsigned pt_mask;
 	struct lwp *l = NULL;
 	uintptr_t slot;
 
 	KASSERT(pserialize_in_read_section());
 
 	/*
-	 * Look in the pid_table.  This is done unlocked inside a pserialize
-	 * read section covering pid_table's memory allocation only, so take
-	 * care to read the slot atomically and only once.  This issues a
-	 * memory barrier for dependent loads on alpha.
+	 * Look in the pid_table.  This is done unlocked inside a
+	 * pserialize read section covering pid_table's memory
+	 * allocation only, so take care to read things in the correct
+	 * order:
+	 *
+	 * 1. First read the table mask -- this only ever increases, in
+	 *    expand_pid_table, so a stale value is safely
+	 *    conservative.
+	 *
+	 * 2. Next read the pid table -- this is always set _before_
+	 *    the mask increases, so if we see a new table and stale
+	 *    mask, the mask is still valid for the table.
 	 */
-	pt = &atomic_load_consume(&pid_table)[pid & pid_tbl_mask];
+	pt_mask = atomic_load_acquire(&pid_tbl_mask);
+	pt = &atomic_load_consume(&pid_table)[pid & pt_mask];
 	slot = atomic_load_consume(&pt->pt_slot);
 	if (__predict_false(!PT_IS_LWP(slot))) {
 		return NULL;
@@ -1004,7 +1024,8 @@ expand_pid_table(void)
 	tsz = pt_size * sizeof(struct pid_table);
 	n_pt = pid_table;
 	atomic_store_release(&pid_table, new_pt);
-	pid_tbl_mask = new_pt_mask;
+	KASSERT(new_pt_mask >= pid_tbl_mask);
+	atomic_store_release(&pid_tbl_mask, new_pt_mask);
 
 	/*
 	 * pid_max starts as PID_MAX (= 30000), once we have 16384
@@ -1183,6 +1204,8 @@ proc_free_pid_internal(pid_t pid, uintpt
 {
 	struct pid_table *pt;
 
+	KASSERT(mutex_owned(&proc_lock));
+
 	pt = &pid_table[pid & pid_tbl_mask];
 
 	KASSERT(PT_GET_TYPE(pt->pt_slot) == type);

Reply via email to