Hi,
As I want a read-only net lock for forwarding, all paths should be
checked for the correct net lock and asserts. I found code in
in_pcbhashlookup() where reading the PCB table results in a write
to optimize the cache.
Porperly protecting PCB hashes is out of scope for parallel forwarding.
Can we get away with this hack? Only update the cache when we are
in TCP oder UDP stack with the write lock. The access from pf is
read-only.
NET_WLOCKED() indicates whether we may change data structures.
Also move the assert from pf to in_pcb where the critical section
is.
bluhm
Index: net/pf.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/net/pf.c,v
retrieving revision 1.1122
diff -u -p -r1.1122 pf.c
--- net/pf.c 7 Jul 2021 18:38:25 -0000 1.1122
+++ net/pf.c 3 Dec 2021 22:20:32 -0000
@@ -3317,14 +3317,12 @@ pf_socket_lookup(struct pf_pdesc *pd)
sport = pd->hdr.tcp.th_sport;
dport = pd->hdr.tcp.th_dport;
PF_ASSERT_LOCKED();
- NET_ASSERT_LOCKED();
tb = &tcbtable;
break;
case IPPROTO_UDP:
sport = pd->hdr.udp.uh_sport;
dport = pd->hdr.udp.uh_dport;
PF_ASSERT_LOCKED();
- NET_ASSERT_LOCKED();
tb = &udbtable;
break;
default:
Index: netinet/in_pcb.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/in_pcb.c,v
retrieving revision 1.256
diff -u -p -r1.256 in_pcb.c
--- netinet/in_pcb.c 25 Oct 2021 22:20:47 -0000 1.256
+++ netinet/in_pcb.c 3 Dec 2021 22:20:32 -0000
@@ -1069,6 +1069,8 @@ in_pcbhashlookup(struct inpcbtable *tabl
u_int16_t fport = fport_arg, lport = lport_arg;
u_int rdomain;
+ NET_ASSERT_LOCKED();
+
rdomain = rtable_l2(rtable);
head = in_pcbhash(table, rdomain, &faddr, fport, &laddr, lport);
LIST_FOREACH(inp, head, inp_hash) {
@@ -1085,7 +1087,7 @@ in_pcbhashlookup(struct inpcbtable *tabl
* repeated accesses are quicker. This is analogous to
* the historic single-entry PCB cache.
*/
- if (inp != LIST_FIRST(head)) {
+ if (NET_WLOCKED() && inp != LIST_FIRST(head)) {
LIST_REMOVE(inp, inp_hash);
LIST_INSERT_HEAD(head, inp, inp_hash);
}
@@ -1119,6 +1121,8 @@ in_pcblookup_listen(struct inpcbtable *t
u_int16_t lport = lport_arg;
u_int rdomain;
+ NET_ASSERT_LOCKED();
+
key1 = &laddr;
key2 = &zeroin_addr;
#if NPF > 0
@@ -1185,7 +1189,7 @@ in_pcblookup_listen(struct inpcbtable *t
* repeated accesses are quicker. This is analogous to
* the historic single-entry PCB cache.
*/
- if (inp != NULL && inp != LIST_FIRST(head)) {
+ if (NET_WLOCKED() && inp != NULL && inp != LIST_FIRST(head)) {
LIST_REMOVE(inp, inp_hash);
LIST_INSERT_HEAD(head, inp, inp_hash);
}
Index: sys/systm.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/sys/systm.h,v
retrieving revision 1.154
diff -u -p -r1.154 systm.h
--- sys/systm.h 2 Jun 2021 00:39:25 -0000 1.154
+++ sys/systm.h 3 Dec 2021 22:20:32 -0000
@@ -344,6 +344,8 @@ extern struct rwlock netlock;
#define NET_RLOCK_IN_IOCTL() do { rw_enter_read(&netlock); } while
(0)
#define NET_RUNLOCK_IN_IOCTL() do { rw_exit_read(&netlock); } while (0)
+#define NET_WLOCKED() (rw_status(&netlock) == RW_WRITE)
+
#ifdef DIAGNOSTIC
#define NET_ASSERT_UNLOCKED()
\