On Fri, Dec 03, 2021 at 08:35:45PM +0100, Alexander Bluhm wrote:
> Note that IPsec still has the workaround to disable multiple queues.
I think we can remove the ipsec_in_use workaround now. The IPsec
path is protected with the kernel lock.
There are some issues left:
- npppd l2pt ipsecflowinfo is not MP safe
- the acquire SA feature is not MP safe
- Hrvoje has seen a panic with sasync
If you use one of these, the diff below should trigger crashes faster.
If you use only regular IPsec or forwarding, I hope it is stable.
bluhm
Index: net/if.c
===================================================================
RCS file: /cvs/src/sys/net/if.c,v
retrieving revision 1.644
diff -u -p -r1.644 if.c
--- net/if.c 11 Nov 2021 10:03:10 -0000 1.644
+++ net/if.c 23 Dec 2021 23:11:20 -0000
@@ -108,6 +108,10 @@
#include <netinet6/ip6_var.h>
#endif
+#ifdef IPSEC
+#include <netinet/ip_ipsp.h>
+#endif
+
#ifdef MPLS
#include <netmpls/mpls.h>
#endif
@@ -237,7 +241,7 @@ int ifq_congestion;
int netisr;
-#define NET_TASKQ 1
+#define NET_TASKQ 4
struct taskq *nettqmp[NET_TASKQ];
struct task if_input_task_locked = TASK_INITIALIZER(if_netisr, NULL);
@@ -834,15 +838,10 @@ if_input_process(struct ifnet *ifp, stru
* lists and the socket layer.
*/
- /*
- * XXXSMP IPsec data structures are not ready to be accessed
- * by multiple network threads in parallel. In this case
- * use an exclusive lock.
- */
- NET_LOCK();
+ NET_RLOCK_IN_SOFTNET();
while ((m = ml_dequeue(ml)) != NULL)
(*ifp->if_input)(ifp, m);
- NET_UNLOCK();
+ NET_RUNLOCK_IN_SOFTNET();
}
void
@@ -899,6 +898,12 @@ if_netisr(void *unused)
arpintr();
KERNEL_UNLOCK();
}
+#endif
+ if (n & (1 << NETISR_IP))
+ ipintr();
+#ifdef INET6
+ if (n & (1 << NETISR_IPV6))
+ ip6intr();
#endif
#if NPPP > 0
if (n & (1 << NETISR_PPP)) {
Index: net/if_ethersubr.c
===================================================================
RCS file: /cvs/src/sys/net/if_ethersubr.c,v
retrieving revision 1.276
diff -u -p -r1.276 if_ethersubr.c
--- net/if_ethersubr.c 19 Aug 2021 10:22:00 -0000 1.276
+++ net/if_ethersubr.c 23 Dec 2021 23:11:20 -0000
@@ -222,7 +222,10 @@ ether_resolve(struct ifnet *ifp, struct
switch (af) {
case AF_INET:
+ KERNEL_LOCK();
+ /* XXXSMP there is a MP race in arpresolve() */
error = arpresolve(ifp, rt, m, dst, eh->ether_dhost);
+ KERNEL_UNLOCK();
if (error)
return (error);
eh->ether_type = htons(ETHERTYPE_IP);
@@ -245,7 +248,10 @@ ether_resolve(struct ifnet *ifp, struct
break;
#ifdef INET6
case AF_INET6:
+ KERNEL_LOCK();
+ /* XXXSMP there is a MP race in nd6_resolve() */
error = nd6_resolve(ifp, rt, m, dst, eh->ether_dhost);
+ KERNEL_UNLOCK();
if (error)
return (error);
eh->ether_type = htons(ETHERTYPE_IPV6);
@@ -271,13 +277,19 @@ ether_resolve(struct ifnet *ifp, struct
break;
#ifdef INET6
case AF_INET6:
+ KERNEL_LOCK();
+ /* XXXSMP there is a MP race in nd6_resolve() */
error = nd6_resolve(ifp, rt, m, dst, eh->ether_dhost);
+ KERNEL_UNLOCK();
if (error)
return (error);
break;
#endif
case AF_INET:
+ KERNEL_LOCK();
+ /* XXXSMP there is a MP race in arpresolve() */
error = arpresolve(ifp, rt, m, dst, eh->ether_dhost);
+ KERNEL_UNLOCK();
if (error)
return (error);
break;
@@ -529,12 +541,14 @@ ether_input(struct ifnet *ifp, struct mb
case ETHERTYPE_PPPOE:
if (m->m_flags & (M_MCAST | M_BCAST))
goto dropanyway;
+ KERNEL_LOCK();
#ifdef PIPEX
if (pipex_enable) {
struct pipex_session *session;
if ((session = pipex_pppoe_lookup_session(m)) != NULL) {
pipex_pppoe_input(m, session);
+ KERNEL_UNLOCK();
return;
}
}
@@ -543,6 +557,7 @@ ether_input(struct ifnet *ifp, struct mb
pppoe_disc_input(m);
else
pppoe_data_input(m);
+ KERNEL_UNLOCK();
return;
#endif
#ifdef MPLS
Index: net/ifq.c
===================================================================
RCS file: /cvs/src/sys/net/ifq.c,v
retrieving revision 1.44
diff -u -p -r1.44 ifq.c
--- net/ifq.c 9 Jul 2021 01:22:05 -0000 1.44
+++ net/ifq.c 23 Dec 2021 23:11:20 -0000
@@ -243,7 +243,7 @@ void
ifq_init(struct ifqueue *ifq, struct ifnet *ifp, unsigned int idx)
{
ifq->ifq_if = ifp;
- ifq->ifq_softnet = net_tq(ifp->if_index); /* + idx */
+ ifq->ifq_softnet = net_tq(ifp->if_index + idx);
ifq->ifq_softc = NULL;
mtx_init(&ifq->ifq_mtx, IPL_NET);
@@ -620,7 +620,7 @@ void
ifiq_init(struct ifiqueue *ifiq, struct ifnet *ifp, unsigned int idx)
{
ifiq->ifiq_if = ifp;
- ifiq->ifiq_softnet = net_tq(ifp->if_index); /* + idx */
+ ifiq->ifiq_softnet = net_tq(ifp->if_index + idx);
ifiq->ifiq_softc = NULL;
mtx_init(&ifiq->ifiq_mtx, IPL_NET);
Index: net/netisr.h
===================================================================
RCS file: /cvs/src/sys/net/netisr.h,v
retrieving revision 1.55
diff -u -p -r1.55 netisr.h
--- net/netisr.h 5 Jan 2021 20:43:36 -0000 1.55
+++ net/netisr.h 23 Dec 2021 23:11:20 -0000
@@ -41,8 +41,10 @@
* interrupt used for scheduling the network code to calls
* on the lowest level routine of each protocol.
*/
+#define NETISR_IP 2 /* same as AF_INET */
#define NETISR_PFSYNC 5 /* for pfsync "immediate" tx */
#define NETISR_ARP 18 /* same as AF_LINK */
+#define NETISR_IPV6 24 /* same as AF_INET6 */
#define NETISR_PPP 28 /* for PPP processing */
#define NETISR_BRIDGE 29 /* for bridge processing */
#define NETISR_SWITCH 31 /* for switch dataplane */
@@ -57,6 +59,8 @@ extern int netisr; /* scheduling bits
extern struct task if_input_task_locked;
void arpintr(void);
+void ipintr(void);
+void ip6intr(void);
void pppintr(void);
void bridgeintr(void);
void switchintr(void);
Index: netinet/ip_input.c
===================================================================
RCS file: /cvs/src/sys/netinet/ip_input.c,v
retrieving revision 1.364
diff -u -p -r1.364 ip_input.c
--- netinet/ip_input.c 22 Nov 2021 13:47:10 -0000 1.364
+++ netinet/ip_input.c 23 Dec 2021 23:11:21 -0000
@@ -130,6 +130,8 @@ const struct sysctl_bounded_args ipctl_v
{ IPCTL_ARPDOWN, &arpt_down, 0, INT_MAX },
};
+struct niqueue ipintrq = NIQUEUE_INITIALIZER(IPQ_MAXLEN, NETISR_IP);
+
struct pool ipqent_pool;
struct pool ipq_pool;
@@ -143,6 +145,7 @@ static struct mbuf_queue ipsendraw_mq;
extern struct niqueue arpinq;
int ip_ours(struct mbuf **, int *, int, int);
+int ip_local(struct mbuf **, int *, int, int);
int ip_dooptions(struct mbuf *, struct ifnet *);
int in_ouraddr(struct mbuf *, struct ifnet *, struct rtentry **);
@@ -230,6 +233,43 @@ ip_init(void)
}
/*
+ * Enqueue packet for local delivery. Queuing is used as a boundary
+ * between the network layer (input/forward path) running with shared
+ * NET_RLOCK_IN_SOFTNET() and the transport layer needing it exclusively.
+ */
+int
+ip_ours(struct mbuf **mp, int *offp, int nxt, int af)
+{
+ /* We are already in a IPv4/IPv6 local deliver loop. */
+ if (af != AF_UNSPEC)
+ return ip_local(mp, offp, nxt, af);
+
+ niq_enqueue(&ipintrq, *mp);
+ *mp = NULL;
+ return IPPROTO_DONE;
+}
+
+/*
+ * Dequeue and process locally delivered packets.
+ */
+void
+ipintr(void)
+{
+ struct mbuf *m;
+ int off, nxt;
+
+ while ((m = niq_dequeue(&ipintrq)) != NULL) {
+#ifdef DIAGNOSTIC
+ if ((m->m_flags & M_PKTHDR) == 0)
+ panic("ipintr no HDR");
+#endif
+ off = 0;
+ nxt = ip_local(&m, &off, IPPROTO_IPV4, AF_UNSPEC);
+ KASSERT(nxt == IPPROTO_DONE);
+ }
+}
+
+/*
* IPv4 input routine.
*
* Checksum and byte swap header. Process options. Forward or deliver.
@@ -514,7 +554,7 @@ ip_input_if(struct mbuf **mp, int *offp,
* If fragmented try to reassemble. Pass to next level.
*/
int
-ip_ours(struct mbuf **mp, int *offp, int nxt, int af)
+ip_local(struct mbuf **mp, int *offp, int nxt, int af)
{
struct mbuf *m = *mp;
struct ip *ip = mtod(m, struct ip *);
@@ -522,6 +562,8 @@ ip_ours(struct mbuf **mp, int *offp, int
struct ipqent *ipqe;
int mff, hlen;
+ NET_ASSERT_WLOCKED();
+
hlen = ip->ip_hl << 2;
/*
@@ -1663,7 +1705,8 @@ ip_sysctl(int *name, u_int namelen, void
newlen));
#endif
case IPCTL_IFQUEUE:
- return (EOPNOTSUPP);
+ return (sysctl_niq(name + 1, namelen - 1,
+ oldp, oldlenp, newp, newlen, &ipintrq));
case IPCTL_ARPQUEUE:
return (sysctl_niq(name + 1, namelen - 1,
oldp, oldlenp, newp, newlen, &arpinq));
Index: netinet/ip_var.h
===================================================================
RCS file: /cvs/src/sys/netinet/ip_var.h,v
retrieving revision 1.88
diff -u -p -r1.88 ip_var.h
--- netinet/ip_var.h 30 Mar 2021 08:37:11 -0000 1.88
+++ netinet/ip_var.h 23 Dec 2021 23:11:21 -0000
@@ -248,7 +248,6 @@ void ip_stripoptions(struct mbuf *);
int ip_sysctl(int *, u_int, void *, size_t *, void *, size_t);
void ip_savecontrol(struct inpcb *, struct mbuf **, struct ip *,
struct mbuf *);
-void ipintr(void);
int ip_input_if(struct mbuf **, int *, int, int, struct ifnet *);
int ip_deliver(struct mbuf **, int *, int, int);
void ip_forward(struct mbuf *, struct ifnet *, struct rtentry *, int);
Index: netinet6/ip6_input.c
===================================================================
RCS file: /cvs/src/sys/netinet6/ip6_input.c,v
retrieving revision 1.237
diff -u -p -r1.237 ip6_input.c
--- netinet6/ip6_input.c 3 Jun 2021 04:47:54 -0000 1.237
+++ netinet6/ip6_input.c 23 Dec 2021 23:11:22 -0000
@@ -115,11 +115,14 @@
#include <netinet/ip_carp.h>
#endif
+struct niqueue ip6intrq = NIQUEUE_INITIALIZER(IPQ_MAXLEN, NETISR_IPV6);
+
struct cpumem *ip6counters;
uint8_t ip6_soiikey[IP6_SOIIKEY_LEN];
int ip6_ours(struct mbuf **, int *, int, int);
+int ip6_local(struct mbuf **, int *, int, int);
int ip6_check_rh0hdr(struct mbuf *, int *);
int ip6_hbhchcheck(struct mbuf *, int *, int *, int *);
int ip6_hopopts_input(u_int32_t *, u_int32_t *, struct mbuf **, int *);
@@ -162,6 +165,43 @@ ip6_init(void)
ip6counters = counters_alloc(ip6s_ncounters);
}
+/*
+ * Enqueue packet for local delivery. Queuing is used as a boundary
+ * between the network layer (input/forward path) running with shared
+ * NET_RLOCK_IN_SOFTNET() and the transport layer needing it exclusively.
+ */
+int
+ip6_ours(struct mbuf **mp, int *offp, int nxt, int af)
+{
+ /* We are already in a IPv4/IPv6 local deliver loop. */
+ if (af != AF_UNSPEC)
+ return ip6_local(mp, offp, nxt, af);
+
+ niq_enqueue(&ip6intrq, *mp);
+ *mp = NULL;
+ return IPPROTO_DONE;
+}
+
+/*
+ * Dequeue and process locally delivered packets.
+ */
+void
+ip6intr(void)
+{
+ struct mbuf *m;
+ int off, nxt;
+
+ while ((m = niq_dequeue(&ip6intrq)) != NULL) {
+#ifdef DIAGNOSTIC
+ if ((m->m_flags & M_PKTHDR) == 0)
+ panic("ip6intr no HDR");
+#endif
+ off = 0;
+ nxt = ip6_local(&m, &off, IPPROTO_IPV6, AF_UNSPEC);
+ KASSERT(nxt == IPPROTO_DONE);
+ }
+}
+
void
ipv6_input(struct ifnet *ifp, struct mbuf *m)
{
@@ -544,8 +584,10 @@ ip6_input_if(struct mbuf **mp, int *offp
}
int
-ip6_ours(struct mbuf **mp, int *offp, int nxt, int af)
+ip6_local(struct mbuf **mp, int *offp, int nxt, int af)
{
+ NET_ASSERT_WLOCKED();
+
if (ip6_hbhchcheck(*mp, offp, &nxt, NULL))
return IPPROTO_DONE;
@@ -1470,7 +1512,8 @@ ip6_sysctl(int *name, u_int namelen, voi
NET_UNLOCK();
return (error);
case IPV6CTL_IFQUEUE:
- return (EOPNOTSUPP);
+ return (sysctl_niq(name + 1, namelen - 1,
+ oldp, oldlenp, newp, newlen, &ip6intrq));
case IPV6CTL_SOIIKEY:
return (ip6_sysctl_soiikey(oldp, oldlenp, newp, newlen));
default: