Module Name:    src
Committed By:   chs
Date:           Mon May 15 01:42:42 UTC 2023

Modified Files:
        src/sys/uvm: uvm_map.c

Log Message:
uvm: avoid a deadlock in uvm_map_clean()

The locking order between map locks and page "busy" locks
is that the page "busy" lock comes first, but uvm_map_clean()
breaks this rule by holding a map locked (as reader) while
waiting for page "busy" locks.

If another thread is in the page-fault path holding a page
"busy" lock while waiting for the map lock (as a reader)
and at the same time a third thread is blocked waiting for
the map lock as a writer (which blocks the page-fault thread),
then these three threads will all deadlock with each other.

Fix this by marking the map "busy" (to block any modifications)
and unlocking the map lock before possibly waiting for any
page "busy" locks.

Martin Pieuchot reported that the same problem existed in OpenBSD
he applied this fix there after several people tested it.

fixes PR 56952


To generate a diff of this commit:
cvs rdiff -u -r1.405 -r1.406 src/sys/uvm/uvm_map.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/uvm/uvm_map.c
diff -u src/sys/uvm/uvm_map.c:1.405 src/sys/uvm/uvm_map.c:1.406
--- src/sys/uvm/uvm_map.c:1.405	Fri Mar 24 07:26:21 2023
+++ src/sys/uvm/uvm_map.c	Mon May 15 01:42:42 2023
@@ -1,4 +1,4 @@
-/*	$NetBSD: uvm_map.c,v 1.405 2023/03/24 07:26:21 skrll Exp $	*/
+/*	$NetBSD: uvm_map.c,v 1.406 2023/05/15 01:42:42 chs Exp $	*/
 
 /*
  * Copyright (c) 1997 Charles D. Cranor and Washington University.
@@ -66,7 +66,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: uvm_map.c,v 1.405 2023/03/24 07:26:21 skrll Exp $");
+__KERNEL_RCSID(0, "$NetBSD: uvm_map.c,v 1.406 2023/05/15 01:42:42 chs Exp $");
 
 #include "opt_ddb.h"
 #include "opt_pax.h"
@@ -3887,8 +3887,7 @@ uvm_map_pageable_all(struct vm_map *map,
  * => never a need to flush amap layer since the anonymous memory has
  *	no permanent home, but may deactivate pages there
  * => called from sys_msync() and sys_madvise()
- * => caller must not write-lock map (read OK).
- * => we may sleep while cleaning if SYNCIO [with map read-locked]
+ * => caller must not have map locked
  */
 
 int
@@ -3910,10 +3909,10 @@ uvm_map_clean(struct vm_map *map, vaddr_
 	KASSERT((flags & (PGO_FREE|PGO_DEACTIVATE)) !=
 		(PGO_FREE|PGO_DEACTIVATE));
 
-	vm_map_lock_read(map);
+	vm_map_lock(map);
 	VM_MAP_RANGE_CHECK(map, start, end);
-	if (uvm_map_lookup_entry(map, start, &entry) == false) {
-		vm_map_unlock_read(map);
+	if (!uvm_map_lookup_entry(map, start, &entry)) {
+		vm_map_unlock(map);
 		return EFAULT;
 	}
 
@@ -3923,22 +3922,24 @@ uvm_map_clean(struct vm_map *map, vaddr_
 
 	for (current = entry; current->start < end; current = current->next) {
 		if (UVM_ET_ISSUBMAP(current)) {
-			vm_map_unlock_read(map);
+			vm_map_unlock(map);
 			return EINVAL;
 		}
 		if ((flags & PGO_FREE) != 0 && VM_MAPENT_ISWIRED(entry)) {
-			vm_map_unlock_read(map);
+			vm_map_unlock(map);
 			return EBUSY;
 		}
 		if (end <= current->end) {
 			break;
 		}
 		if (current->end != current->next->start) {
-			vm_map_unlock_read(map);
+			vm_map_unlock(map);
 			return EFAULT;
 		}
 	}
 
+	vm_map_busy(map);
+	vm_map_unlock(map);
 	error = 0;
 	for (current = entry; start < end; current = current->next) {
 		amap = current->aref.ar_amap;	/* upper layer */
@@ -4044,8 +4045,8 @@ uvm_map_clean(struct vm_map *map, vaddr_
 		}
 		start += size;
 	}
-	vm_map_unlock_read(map);
-	return (error);
+	vm_map_unbusy(map);
+	return error;
 }
 
 

Reply via email to