Module Name:    src
Committed By:   riastradh
Date:           Sun Feb 27 14:16:43 UTC 2022

Modified Files:
        src/sys/kern: subr_pool.c

Log Message:
pool(9): Membar audit.

- Use atomic_store_release and atomic_load_consume for associating a
  freshly constructed pool_cache with its underlying pool.  The pool
  gets published in various ways before the pool cache is fully
  constructed.

  => Nix membar_sync -- no store-before-load is needed here.

- Take pool_head_lock around sysctl kern.pool TAILQ_FOREACH.  Then take
  a reference count, and drop the lock, around copyout.

  => Otherwise, pools could be partially initialized or freed while
     we're still trying to read from them -- and in the worst case,
     we might see a corrupted view of the tailq.

  => If we kept the lock around copyout, this could deadlock in memory
     allocation.

  => If we didn't take a reference count while releasing the lock, the
     pool could be destroyed while we're trying to traverse the list,
     sending us into oblivion instead of the next element.


To generate a diff of this commit:
cvs rdiff -u -r1.280 -r1.281 src/sys/kern/subr_pool.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/kern/subr_pool.c
diff -u src/sys/kern/subr_pool.c:1.280 src/sys/kern/subr_pool.c:1.281
--- src/sys/kern/subr_pool.c:1.280	Fri Dec 24 00:13:53 2021
+++ src/sys/kern/subr_pool.c	Sun Feb 27 14:16:43 2022
@@ -1,4 +1,4 @@
-/*	$NetBSD: subr_pool.c,v 1.280 2021/12/24 00:13:53 riastradh Exp $	*/
+/*	$NetBSD: subr_pool.c,v 1.281 2022/02/27 14:16:43 riastradh Exp $	*/
 
 /*
  * Copyright (c) 1997, 1999, 2000, 2002, 2007, 2008, 2010, 2014, 2015, 2018,
@@ -33,7 +33,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: subr_pool.c,v 1.280 2021/12/24 00:13:53 riastradh Exp $");
+__KERNEL_RCSID(0, "$NetBSD: subr_pool.c,v 1.281 2022/02/27 14:16:43 riastradh Exp $");
 
 #ifdef _KERNEL_OPT
 #include "opt_ddb.h"
@@ -1648,6 +1648,7 @@ pool_reclaim(struct pool *pp)
 {
 	struct pool_item_header *ph, *phnext;
 	struct pool_pagelist pq;
+	struct pool_cache *pc;
 	uint32_t curtime;
 	bool klock;
 	int rv;
@@ -1673,8 +1674,8 @@ pool_reclaim(struct pool *pp)
 		klock = false;
 
 	/* Reclaim items from the pool's cache (if any). */
-	if (pp->pr_cache != NULL)
-		pool_cache_invalidate(pp->pr_cache);
+	if ((pc = atomic_load_consume(&pp->pr_cache)) != NULL)
+		pool_cache_invalidate(pc);
 
 	if (mutex_tryenter(&pp->pr_lock) == 0) {
 		if (klock) {
@@ -1883,7 +1884,7 @@ pool_print1(struct pool *pp, const char 
 	if (skip_empty && pp->pr_nget == 0)
 		return;
 
-	if ((pc = pp->pr_cache) != NULL) {
+	if ((pc = atomic_load_consume(&pp->pr_cache)) != NULL) {
 		(*pr)("POOLCACHE");
 	} else {
 		(*pr)("POOL");
@@ -1895,7 +1896,6 @@ pool_print1(struct pool *pp, const char 
 		    pp->pr_wchan, pp, pp->pr_size, pp->pr_align, pp->pr_npages,
 		    pp->pr_nitems, pp->pr_nout, pp->pr_nget, pp->pr_nput,
 		    pp->pr_npagealloc, pp->pr_npagefree, pp->pr_nidle);
-		
 		return;
 	}
 
@@ -2184,8 +2184,7 @@ pool_cache_bootstrap(pool_cache_t pc, si
 	if (__predict_true(!cold))
 		mutex_exit(&pool_head_lock);
 
-	membar_sync();
-	pp->pr_cache = pc;
+	atomic_store_release(&pp->pr_cache, pc);
 }
 
 /*
@@ -2224,7 +2223,7 @@ pool_cache_bootstrap_destroy(pool_cache_
 
 	/* Disassociate it from the pool. */
 	mutex_enter(&pp->pr_lock);
-	pp->pr_cache = NULL;
+	atomic_store_relaxed(&pp->pr_cache, NULL);
 	mutex_exit(&pp->pr_lock);
 
 	/* Destroy per-CPU data */
@@ -3339,6 +3338,7 @@ pool_whatis(uintptr_t addr, void (*pr)(c
 
 	TAILQ_FOREACH(pp, &pool_head, pr_poollist) {
 		struct pool_item_header *ph;
+		struct pool_cache *pc;
 		uintptr_t item;
 		bool allocated = true;
 		bool incache = false;
@@ -3373,8 +3373,8 @@ pool_whatis(uintptr_t addr, void (*pr)(c
 			allocated = pool_allocated(pp, ph, addr);
 		}
 found:
-		if (allocated && pp->pr_cache) {
-			pool_cache_t pc = pp->pr_cache;
+		if (allocated &&
+		    (pc = atomic_load_consume(&pp->pr_cache)) != NULL) {
 			struct pool_cache_group *pcg;
 			int i;
 
@@ -3437,9 +3437,11 @@ pool_sysctl(SYSCTLFN_ARGS)
 	memset(&data, 0, sizeof(data));
 	error = 0;
 	written = 0;
+	mutex_enter(&pool_head_lock);
 	TAILQ_FOREACH(pp, &pool_head, pr_poollist) {
 		if (written + sizeof(data) > *oldlenp)
 			break;
+		pp->pr_refcnt++;
 		strlcpy(data.pr_wchan, pp->pr_wchan, sizeof(data.pr_wchan));
 		data.pr_pagesize = pp->pr_alloc->pa_pagesz;
 		data.pr_flags = pp->pr_roflags | pp->pr_flags;
@@ -3469,9 +3471,8 @@ pool_sysctl(SYSCTLFN_ARGS)
 		data.pr_cache_nempty = 0;
 		data.pr_cache_ncontended = 0;
 		data.pr_cache_npartial = 0;
-		if (pp->pr_cache) {
+		if ((pc = atomic_load_consume(&pp->pr_cache)) != NULL) {
 			uint32_t nfull = 0;
-			pc = pp->pr_cache;
 			data.pr_cache_meta_size = pc->pc_pcgsize;
 			for (i = 0; i < pc->pc_ncpu; ++i) {
 				cc = pc->pc_cpus[i];
@@ -3492,12 +3493,19 @@ pool_sysctl(SYSCTLFN_ARGS)
 		data.pr_cache_nhit_global = data.pr_cache_nmiss_pcpu -
 		    data.pr_cache_nmiss_global;
 
+		if (pp->pr_refcnt == UINT_MAX) /* XXX possible? */
+			continue;
+		mutex_exit(&pool_head_lock);
 		error = sysctl_copyout(l, &data, oldp, sizeof(data));
+		mutex_enter(&pool_head_lock);
+		if (--pp->pr_refcnt == 0)
+			cv_broadcast(&pool_busy);
 		if (error)
 			break;
 		written += sizeof(data);
 		oldp = (char *)oldp + sizeof(data);
 	}
+	mutex_exit(&pool_head_lock);
 
 	*oldlenp = written;
 	return error;

Reply via email to