Each pflow(4) interface has associated socket, referenced as sc->so. We
set this socket in pflowioctl() which is called with both kernel and net
locks held. In the pflow_output_process() task we do sc->so dereference,
which is protected by kernel lock. But the sosend(), called deeper by
pflow_output_process(), has sleep points, introduced by solock(). So
this kernel lock protection doesn't work as expected, and concurrent
pflowioctl() could override this sc->so.
The diff below introduces per pflow(4) instance `sc_lock' rwlock(9) to
protect sc->so. Since the solock() of udp(4) sockets uses netlock as
backend, the `sc_lock' should be taken first. This expands a little
netlock relocking within pflowioctl().
Also, pflow_sendout_mbuf() called by pflow_output_process(), now called
without kernel lock held, so the mp safe counters_pkt(9) used instead
of manual `if_opackets' increment.
Since if_detach() does some ifnet destruction, now it can't be called
before we finish pflow_output_process() task, otherwise we introduce use
after free for interface counters. In other hand, we need to deny
pflowioctl() to reschedule pflow_output_process() task. The `sc_dyind'
flag introduced for that.
Hrvoje, could you test this diff please?
Index: sys/net/if_pflow.c
===================================================================
RCS file: /cvs/src/sys/net/if_pflow.c,v
retrieving revision 1.96
diff -u -p -r1.96 if_pflow.c
--- sys/net/if_pflow.c 12 Aug 2022 16:38:50 -0000 1.96
+++ sys/net/if_pflow.c 4 Nov 2022 18:23:27 -0000
@@ -132,11 +132,11 @@ pflow_output_process(void *arg)
struct mbuf *m;
mq_delist(&sc->sc_outputqueue, &ml);
- KERNEL_LOCK();
+ rw_enter_read(&sc->sc_lock);
while ((m = ml_dequeue(&ml)) != NULL) {
pflow_sendout_mbuf(sc, m);
}
- KERNEL_UNLOCK();
+ rw_exit_read(&sc->sc_lock);
}
int
@@ -146,6 +146,7 @@ pflow_clone_create(struct if_clone *ifc,
struct pflow_softc *pflowif;
pflowif = malloc(sizeof(*pflowif), M_DEVBUF, M_WAITOK|M_ZERO);
+ rw_init(&pflowif->sc_lock, "pflowlk");
MGET(pflowif->send_nam, M_WAIT, MT_SONAME);
pflowif->sc_version = PFLOW_PROTO_DEFAULT;
@@ -254,6 +255,7 @@ pflow_clone_create(struct if_clone *ifc,
mq_init(&pflowif->sc_outputqueue, 8192, IPL_SOFTNET);
pflow_setmtu(pflowif, ETHERMTU);
pflow_init_timeouts(pflowif);
+ if_counters_alloc(ifp);
if_attach(ifp);
if_alloc_sadl(ifp);
@@ -275,6 +277,7 @@ pflow_clone_destroy(struct ifnet *ifp)
error = 0;
NET_LOCK();
+ sc->sc_dying = 1;
SLIST_REMOVE(&pflowif_list, sc, pflow_softc, sc_next);
NET_UNLOCK();
@@ -475,10 +478,17 @@ pflowioctl(struct ifnet *ifp, u_long cmd
struct pflowreq pflowr;
int error;
+ if (sc->sc_dying)
+ return ENXIO;
+
switch (cmd) {
case SIOCSIFADDR:
case SIOCSIFDSTADDR:
case SIOCSIFFLAGS:
+ /* XXXSMP: enforce lock order */
+ NET_UNLOCK();
+ rw_enter_read(&sc->sc_lock);
+ NET_LOCK();
if ((ifp->if_flags & IFF_UP) && sc->so != NULL) {
ifp->if_flags |= IFF_RUNNING;
sc->sc_gcounter=pflowstats.pflow_flows;
@@ -487,6 +497,7 @@ pflowioctl(struct ifnet *ifp, u_long cmd
pflow_sendout_ipfix_tmpl(sc);
} else
ifp->if_flags &= ~IFF_RUNNING;
+ rw_exit_read(&sc->sc_lock);
break;
case SIOCSIFMTU:
if (ifr->ifr_mtu < PFLOW_MINMTU)
@@ -523,10 +534,13 @@ pflowioctl(struct ifnet *ifp, u_long cmd
/* XXXSMP breaks atomicity */
NET_UNLOCK();
+ rw_enter_write(&sc->sc_lock);
error = pflow_set(sc, &pflowr);
NET_LOCK();
- if (error != 0)
+ if (error != 0) {
+ rw_exit_write(&sc->sc_lock);
return (error);
+ }
if ((ifp->if_flags & IFF_UP) && sc->so != NULL) {
ifp->if_flags |= IFF_RUNNING;
@@ -535,6 +549,7 @@ pflowioctl(struct ifnet *ifp, u_long cmd
pflow_sendout_ipfix_tmpl(sc);
} else
ifp->if_flags &= ~IFF_RUNNING;
+ rw_exit_write(&sc->sc_lock);
break;
@@ -1196,8 +1211,8 @@ pflow_sendout_ipfix_tmpl(struct pflow_so
int
pflow_sendout_mbuf(struct pflow_softc *sc, struct mbuf *m)
{
- sc->sc_if.if_opackets++;
- sc->sc_if.if_obytes += m->m_pkthdr.len;
+ counters_pkt(sc->sc_if.if_counters,
+ ifc_opackets, ifc_obytes, m->m_pkthdr.len);
if (sc->so == NULL) {
m_freem(m);
Index: sys/net/if_pflow.h
===================================================================
RCS file: /cvs/src/sys/net/if_pflow.h,v
retrieving revision 1.18
diff -u -p -r1.18 if_pflow.h
--- sys/net/if_pflow.h 12 Aug 2022 16:38:50 -0000 1.18
+++ sys/net/if_pflow.h 4 Nov 2022 18:23:27 -0000
@@ -169,7 +169,16 @@ struct pflow_ipfix_flow6 {
#ifdef _KERNEL
+/*
+ * Locks used to protect struct members and global data
+ * N net lock
+ * p this pflow_softc' `sc_lock'
+ */
+
struct pflow_softc {
+ struct rwlock sc_lock;
+
+ int sc_dying; /* N */
struct ifnet sc_if;
unsigned int sc_count;
@@ -185,7 +194,7 @@ struct pflow_softc {
struct timeout sc_tmo_tmpl;
struct mbuf_queue sc_outputqueue;
struct task sc_outputtask;
- struct socket *so;
+ struct socket *so; /* [p] */
struct mbuf *send_nam;
struct sockaddr *sc_flowsrc;
struct sockaddr *sc_flowdst;