Module Name:    src
Committed By:   riastradh
Date:           Thu Mar 10 12:21:25 UTC 2022

Modified Files:
        src/sys/kern: kern_lwp.c kern_synch.c

Log Message:
kern: Fix synchronization of clearing LP_RUNNING and lwp_free.

1. membar_sync is not necessary here -- only a store-release is
   required.

2. membar_consumer _before_ loading l->l_pflag is not enough; a
   load-acquire is required.

Actually it's not really clear to me why any barriers are needed, since
the store-release and load-acquire should be implied by releasing and
acquiring the lwp lock (and maybe we could spin with the lock instead
of reading l->l_pflag unlocked).  But maybe there's something subtle
about access to l->l_mutex that's not obvious here.


To generate a diff of this commit:
cvs rdiff -u -r1.246 -r1.247 src/sys/kern/kern_lwp.c
cvs rdiff -u -r1.349 -r1.350 src/sys/kern/kern_synch.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/kern/kern_lwp.c
diff -u src/sys/kern/kern_lwp.c:1.246 src/sys/kern/kern_lwp.c:1.247
--- src/sys/kern/kern_lwp.c:1.246	Wed Dec 22 16:57:28 2021
+++ src/sys/kern/kern_lwp.c	Thu Mar 10 12:21:25 2022
@@ -1,4 +1,4 @@
-/*	$NetBSD: kern_lwp.c,v 1.246 2021/12/22 16:57:28 thorpej Exp $	*/
+/*	$NetBSD: kern_lwp.c,v 1.247 2022/03/10 12:21:25 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2001, 2006, 2007, 2008, 2009, 2019, 2020
@@ -217,7 +217,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: kern_lwp.c,v 1.246 2021/12/22 16:57:28 thorpej Exp $");
+__KERNEL_RCSID(0, "$NetBSD: kern_lwp.c,v 1.247 2022/03/10 12:21:25 riastradh Exp $");
 
 #include "opt_ddb.h"
 #include "opt_lockdebug.h"
@@ -1017,16 +1017,19 @@ lwp_startup(struct lwp *prev, struct lwp
 	KASSERT(curcpu()->ci_mtx_count == -2);
 
 	/*
-	 * Immediately mark the previous LWP as no longer running and unlock
-	 * (to keep lock wait times short as possible).  If a zombie, don't
-	 * touch after clearing LP_RUNNING as it could be reaped by another
-	 * CPU.  Issue a memory barrier to ensure this.
+	 * Immediately mark the previous LWP as no longer running and
+	 * unlock (to keep lock wait times short as possible).  If a
+	 * zombie, don't touch after clearing LP_RUNNING as it could be
+	 * reaped by another CPU.  Use atomic_store_release to ensure
+	 * this -- matches atomic_load_acquire in lwp_free.
 	 */
 	lock = prev->l_mutex;
 	if (__predict_false(prev->l_stat == LSZOMB)) {
-		membar_sync();
+		atomic_store_release(&prev->l_pflag,
+		    prev->l_pflag & ~LP_RUNNING);
+	} else {
+		prev->l_pflag &= ~LP_RUNNING;
 	}
-	prev->l_pflag &= ~LP_RUNNING;
 	mutex_spin_exit(lock);
 
 	/* Correct spin mutex count after mi_switch(). */
@@ -1246,9 +1249,12 @@ lwp_free(struct lwp *l, bool recycle, bo
 	/*
 	 * In the unlikely event that the LWP is still on the CPU,
 	 * then spin until it has switched away.
+	 *
+	 * atomic_load_acquire matches atomic_store_release in
+	 * lwp_startup and mi_switch.
 	 */
-	membar_consumer();
-	while (__predict_false((l->l_pflag & LP_RUNNING) != 0)) {
+	while (__predict_false((atomic_load_acquire(&l->l_pflag) & LP_RUNNING)
+		!= 0)) {
 		SPINLOCK_BACKOFF_HOOK;
 	}
 

Index: src/sys/kern/kern_synch.c
diff -u src/sys/kern/kern_synch.c:1.349 src/sys/kern/kern_synch.c:1.350
--- src/sys/kern/kern_synch.c:1.349	Sat May 23 23:42:43 2020
+++ src/sys/kern/kern_synch.c	Thu Mar 10 12:21:25 2022
@@ -1,4 +1,4 @@
-/*	$NetBSD: kern_synch.c,v 1.349 2020/05/23 23:42:43 ad Exp $	*/
+/*	$NetBSD: kern_synch.c,v 1.350 2022/03/10 12:21:25 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 1999, 2000, 2004, 2006, 2007, 2008, 2009, 2019, 2020
@@ -69,7 +69,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: kern_synch.c,v 1.349 2020/05/23 23:42:43 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: kern_synch.c,v 1.350 2022/03/10 12:21:25 riastradh Exp $");
 
 #include "opt_kstack.h"
 #include "opt_dtrace.h"
@@ -783,18 +783,23 @@ mi_switch(lwp_t *l)
 
 		/*
 		 * Immediately mark the previous LWP as no longer running
-		 * and unlock (to keep lock wait times short as possible). 
+		 * and unlock (to keep lock wait times short as possible).
 		 * We'll still be at IPL_SCHED afterwards.  If a zombie,
 		 * don't touch after clearing LP_RUNNING as it could be
 		 * reaped by another CPU.  Issue a memory barrier to ensure
 		 * this.
+		 *
+		 * atomic_store_release matches atomic_load_acquire in
+		 * lwp_free.
 		 */
 		KASSERT((prevlwp->l_pflag & LP_RUNNING) != 0);
 		lock = prevlwp->l_mutex;
 		if (__predict_false(prevlwp->l_stat == LSZOMB)) {
-			membar_sync();
+			atomic_store_release(&prevlwp->l_pflag,
+			    prevlwp->l_pflag & ~LP_RUNNING);
+		} else {
+			prevlwp->l_pflag &= ~LP_RUNNING;
 		}
-		prevlwp->l_pflag &= ~LP_RUNNING;
 		mutex_spin_exit(lock);
 
 		/*

Reply via email to