Hi,
There is an lock ordering problem with the new tdb_sadb_mtx mutex.
panic: acquiring blockable sleep lock with spinlock or critical section h
eld (kernel_lock) &kernel_lock
Stopped at db_enter+0x4: popl %ebp
TID PID UID PRFLAGS PFLAGS CPU COMMAND
*270299 46739 0 0x3 0 22 ipsecctl
355250 66710 0 0x14000 0x40000200 0 softclock
db_enter() at db_enter+0x4
panic(d0be7299) at panic+0x7a
witness_checkorder(d0e96df0,9,0) at witness_checkorder+0x94e
__mp_lock(d0e96ce8) at __mp_lock+0x4a
_kernel_lock() at _kernel_lock+0x3a
ah_zeroize(dc1bd978) at ah_zeroize+0x4a
tdb_free(dc1bd978) at tdb_free+0x47
pfkeyv2_sa_flush(dc1bd978,d9783e23,0) at pfkeyv2_sa_flush+0x27
tdb_walk(0,d0990270,d9783e23) at tdb_walk+0x79
pfkeyv2_send(d7edd990,d9783e20,10) at pfkeyv2_send+0x411
pfkeyv2_output(d9897700,d7edd990,0,0) at pfkeyv2_output+0x61
pfkeyv2_usrreq(d7edd990,9,d9897700,0,0,d7df61b4) at pfkeyv2_usrreq+0x15a
sosend(d7edd990,0,f5ad9d60,d9897700,0,0) at sosend+0x2e3
soo_write(d7fec9a0,f5ad9d60,0) at soo_write+0x21
Currently the crypto layer needs kernel lock. As tdb_walk() does
not know whether walker() needs it, grab it in advance.
ok?
bluhm
Index: netinet/ip_ipsp.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_ipsp.c,v
retrieving revision 1.248
diff -u -p -r1.248 ip_ipsp.c
--- netinet/ip_ipsp.c 25 Oct 2021 18:25:01 -0000 1.248
+++ netinet/ip_ipsp.c 26 Oct 2021 16:34:49 -0000
@@ -540,6 +540,11 @@ tdb_walk(u_int rdomain, int (*walker)(st
int i, rval = 0;
struct tdb *tdbp, *next;
+ /*
+ * The walker may aquire the kernel lock. Grab it here to keep
+ * the lock order.
+ */
+ KERNEL_LOCK();
mtx_enter(&tdb_sadb_mtx);
for (i = 0; i <= tdb_hashmask; i++) {
for (tdbp = tdbh[i]; rval == 0 && tdbp != NULL; tdbp = next) {
@@ -555,6 +560,7 @@ tdb_walk(u_int rdomain, int (*walker)(st
}
}
mtx_leave(&tdb_sadb_mtx);
+ KERNEL_UNLOCK();
return rval;
}