On Mon, Jul 19, 2021 at 08:02:30PM +0300, Vitaliy Makkoveev wrote:
> I mean the case when ip_local() called by ip_ours(). Unfortunately, I'm
> not familiar with PPTP but it looks affected because it don't use tcp or
> udp as transport but encapsulates them into ip frames. Sorry for noise
> if I'm wrong.
>
> +ip_ours(struct mbuf **mp, int *offp, int nxt, int af)
> +{
> + /* We are already in a IPv4/IPv6 local deliver loop. */
> + if (af != AF_UNSPEC)
> + return ip_local(mp, offp, nxt, af);
> +
> + niq_enqueue(&ipintrq, *mp);
> + *mp = NULL;
> + return IPPROTO_DONE;
> +}
The af != AF_UNSPEC case already has the exclusive net lock.
ipv4_input() sets AF_UNSPEC, the other case is for IP in IP header.
The latter is called from ipintr().
I can put a NET_ASSERT_WLOCKED() into ip_local(). Still running a
full regress with that.
bluhm
Index: net/if.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/net/if.c,v
retrieving revision 1.642
diff -u -p -r1.642 if.c
--- net/if.c 30 Jun 2021 13:23:33 -0000 1.642
+++ net/if.c 19 Jul 2021 14:51:31 -0000
@@ -109,6 +109,10 @@
#include <netinet6/ip6_var.h>
#endif
+#ifdef IPSEC
+#include <netinet/ip_ipsp.h>
+#endif
+
#ifdef MPLS
#include <netmpls/mpls.h>
#endif
@@ -238,7 +242,7 @@ int ifq_congestion;
int netisr;
-#define NET_TASKQ 1
+#define NET_TASKQ 4
struct taskq *nettqmp[NET_TASKQ];
struct task if_input_task_locked = TASK_INITIALIZER(if_netisr, NULL);
@@ -815,6 +819,7 @@ void
if_input_process(struct ifnet *ifp, struct mbuf_list *ml)
{
struct mbuf *m;
+ int exclusive_lock = 0;
if (ml_empty(ml))
return;
@@ -834,10 +839,25 @@ if_input_process(struct ifnet *ifp, stru
* to PF globals, pipex globals, unicast and multicast addresses
* lists and the socket layer.
*/
- NET_LOCK();
+
+ /*
+ * XXXSMP IPsec data structures are not ready to be
+ * accessed by multiple Network threads in parallel.
+ */
+ if (ipsec_in_use)
+ exclusive_lock = 1;
+ if (exclusive_lock)
+ NET_LOCK();
+ else
+ NET_RLOCK_IN_SOFTNET();
+
while ((m = ml_dequeue(ml)) != NULL)
(*ifp->if_input)(ifp, m);
- NET_UNLOCK();
+
+ if (exclusive_lock)
+ NET_UNLOCK();
+ else
+ NET_RUNLOCK_IN_SOFTNET();
}
void
@@ -895,6 +915,12 @@ if_netisr(void *unused)
KERNEL_UNLOCK();
}
#endif
+ if (n & (1 << NETISR_IP))
+ ipintr();
+#ifdef INET6
+ if (n & (1 << NETISR_IPV6))
+ ip6intr();
+#endif
#if NPPP > 0
if (n & (1 << NETISR_PPP)) {
KERNEL_LOCK();
@@ -3311,17 +3337,14 @@ unhandled_af(int af)
panic("unhandled af %d", af);
}
-/*
- * XXXSMP This tunable is here to work around the fact that IPsec
- * globals aren't ready to be accessed by multiple threads in
- * parallel.
- */
-int nettaskqs = NET_TASKQ;
-
struct taskq *
net_tq(unsigned int ifindex)
{
struct taskq *t = NULL;
+ static int nettaskqs;
+
+ if (nettaskqs == 0)
+ nettaskqs = min(NET_TASKQ, ncpus);
t = nettqmp[ifindex % nettaskqs];
Index: net/if_ethersubr.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/net/if_ethersubr.c,v
retrieving revision 1.275
diff -u -p -r1.275 if_ethersubr.c
--- net/if_ethersubr.c 7 Jul 2021 20:19:01 -0000 1.275
+++ net/if_ethersubr.c 19 Jul 2021 14:32:48 -0000
@@ -222,7 +222,10 @@ ether_resolve(struct ifnet *ifp, struct
switch (af) {
case AF_INET:
+ KERNEL_LOCK();
+ /* XXXSMP there is a MP race in arpresolve() */
error = arpresolve(ifp, rt, m, dst, eh->ether_dhost);
+ KERNEL_UNLOCK();
if (error)
return (error);
eh->ether_type = htons(ETHERTYPE_IP);
@@ -245,7 +248,10 @@ ether_resolve(struct ifnet *ifp, struct
break;
#ifdef INET6
case AF_INET6:
+ KERNEL_LOCK();
+ /* XXXSMP there is a MP race in nd6_resolve() */
error = nd6_resolve(ifp, rt, m, dst, eh->ether_dhost);
+ KERNEL_UNLOCK();
if (error)
return (error);
eh->ether_type = htons(ETHERTYPE_IPV6);
Index: net/ifq.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/net/ifq.c,v
retrieving revision 1.44
diff -u -p -r1.44 ifq.c
--- net/ifq.c 9 Jul 2021 01:22:05 -0000 1.44
+++ net/ifq.c 19 Jul 2021 14:32:48 -0000
@@ -243,7 +243,7 @@ void
ifq_init(struct ifqueue *ifq, struct ifnet *ifp, unsigned int idx)
{
ifq->ifq_if = ifp;
- ifq->ifq_softnet = net_tq(ifp->if_index); /* + idx */
+ ifq->ifq_softnet = net_tq(ifp->if_index + idx);
ifq->ifq_softc = NULL;
mtx_init(&ifq->ifq_mtx, IPL_NET);
@@ -620,7 +620,7 @@ void
ifiq_init(struct ifiqueue *ifiq, struct ifnet *ifp, unsigned int idx)
{
ifiq->ifiq_if = ifp;
- ifiq->ifiq_softnet = net_tq(ifp->if_index); /* + idx */
+ ifiq->ifiq_softnet = net_tq(ifp->if_index + idx);
ifiq->ifiq_softc = NULL;
mtx_init(&ifiq->ifiq_mtx, IPL_NET);
Index: net/netisr.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/net/netisr.h,v
retrieving revision 1.55
diff -u -p -r1.55 netisr.h
--- net/netisr.h 5 Jan 2021 20:43:36 -0000 1.55
+++ net/netisr.h 19 Jul 2021 14:32:48 -0000
@@ -41,8 +41,10 @@
* interrupt used for scheduling the network code to calls
* on the lowest level routine of each protocol.
*/
+#define NETISR_IP 2 /* same as AF_INET */
#define NETISR_PFSYNC 5 /* for pfsync "immediate" tx */
#define NETISR_ARP 18 /* same as AF_LINK */
+#define NETISR_IPV6 24 /* same as AF_INET6 */
#define NETISR_PPP 28 /* for PPP processing */
#define NETISR_BRIDGE 29 /* for bridge processing */
#define NETISR_SWITCH 31 /* for switch dataplane */
@@ -57,6 +59,8 @@ extern int netisr; /* scheduling bits
extern struct task if_input_task_locked;
void arpintr(void);
+void ipintr(void);
+void ip6intr(void);
void pppintr(void);
void bridgeintr(void);
void switchintr(void);
Index: net/pfkeyv2.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/net/pfkeyv2.c,v
retrieving revision 1.218
diff -u -p -r1.218 pfkeyv2.c
--- net/pfkeyv2.c 14 Jul 2021 22:39:26 -0000 1.218
+++ net/pfkeyv2.c 19 Jul 2021 14:48:34 -0000
@@ -2019,14 +2019,6 @@ pfkeyv2_send(struct socket *so, void *me
}
TAILQ_INSERT_HEAD(&ipsec_policy_head, ipo, ipo_list);
ipsec_in_use++;
- /*
- * XXXSMP IPsec data structures are not ready to be
- * accessed by multiple Network threads in parallel,
- * so force all packets to be processed by the first
- * one.
- */
- extern int nettaskqs;
- nettaskqs = 1;
} else {
ipo->ipo_last_searched = ipo->ipo_flags = 0;
}
Index: netinet/ip_input.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_input.c,v
retrieving revision 1.363
diff -u -p -r1.363 ip_input.c
--- netinet/ip_input.c 21 Jun 2021 22:09:14 -0000 1.363
+++ netinet/ip_input.c 19 Jul 2021 20:32:15 -0000
@@ -130,6 +130,8 @@ const struct sysctl_bounded_args ipctl_v
{ IPCTL_ARPDOWN, &arpt_down, 0, INT_MAX },
};
+struct niqueue ipintrq = NIQUEUE_INITIALIZER(IPQ_MAXLEN, NETISR_IP);
+
struct pool ipqent_pool;
struct pool ipq_pool;
@@ -143,6 +145,7 @@ static struct mbuf_queue ipsendraw_mq;
extern struct niqueue arpinq;
int ip_ours(struct mbuf **, int *, int, int);
+int ip_local(struct mbuf **, int *, int, int);
int ip_dooptions(struct mbuf *, struct ifnet *);
int in_ouraddr(struct mbuf *, struct ifnet *, struct rtentry **);
@@ -230,6 +233,43 @@ ip_init(void)
}
/*
+ * Enqueue packet for local delivery. Queuing is used as a boundary
+ * between the network layer (input/forward path) running with shared
+ * NET_RLOCK_IN_SOFTNET() and the transport layer needing it exclusively.
+ */
+int
+ip_ours(struct mbuf **mp, int *offp, int nxt, int af)
+{
+ /* We are already in a IPv4/IPv6 local deliver loop. */
+ if (af != AF_UNSPEC)
+ return ip_local(mp, offp, nxt, af);
+
+ niq_enqueue(&ipintrq, *mp);
+ *mp = NULL;
+ return IPPROTO_DONE;
+}
+
+/*
+ * Dequeue and process locally delivered packets.
+ */
+void
+ipintr(void)
+{
+ struct mbuf *m;
+ int off, nxt;
+
+ while ((m = niq_dequeue(&ipintrq)) != NULL) {
+#ifdef DIAGNOSTIC
+ if ((m->m_flags & M_PKTHDR) == 0)
+ panic("ipintr no HDR");
+#endif
+ off = 0;
+ nxt = ip_local(&m, &off, IPPROTO_IPV4, AF_UNSPEC);
+ KASSERT(nxt == IPPROTO_DONE);
+ }
+}
+
+/*
* IPv4 input routine.
*
* Checksum and byte swap header. Process options. Forward or deliver.
@@ -514,7 +554,7 @@ ip_input_if(struct mbuf **mp, int *offp,
* If fragmented try to reassemble. Pass to next level.
*/
int
-ip_ours(struct mbuf **mp, int *offp, int nxt, int af)
+ip_local(struct mbuf **mp, int *offp, int nxt, int af)
{
struct mbuf *m = *mp;
struct ip *ip = mtod(m, struct ip *);
@@ -522,6 +562,8 @@ ip_ours(struct mbuf **mp, int *offp, int
struct ipqent *ipqe;
int mff, hlen;
+ NET_ASSERT_WLOCKED();
+
hlen = ip->ip_hl << 2;
/*
@@ -1665,7 +1707,8 @@ ip_sysctl(int *name, u_int namelen, void
newlen));
#endif
case IPCTL_IFQUEUE:
- return (EOPNOTSUPP);
+ return (sysctl_niq(name + 1, namelen - 1,
+ oldp, oldlenp, newp, newlen, &ipintrq));
case IPCTL_ARPQUEUE:
return (sysctl_niq(name + 1, namelen - 1,
oldp, oldlenp, newp, newlen, &arpinq));
Index: netinet/ip_var.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_var.h,v
retrieving revision 1.88
diff -u -p -r1.88 ip_var.h
--- netinet/ip_var.h 30 Mar 2021 08:37:11 -0000 1.88
+++ netinet/ip_var.h 19 Jul 2021 14:32:48 -0000
@@ -248,7 +248,6 @@ void ip_stripoptions(struct mbuf *);
int ip_sysctl(int *, u_int, void *, size_t *, void *, size_t);
void ip_savecontrol(struct inpcb *, struct mbuf **, struct ip *,
struct mbuf *);
-void ipintr(void);
int ip_input_if(struct mbuf **, int *, int, int, struct ifnet *);
int ip_deliver(struct mbuf **, int *, int, int);
void ip_forward(struct mbuf *, struct ifnet *, struct rtentry *, int);
Index: netinet6/ip6_input.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/ip6_input.c,v
retrieving revision 1.237
diff -u -p -r1.237 ip6_input.c
--- netinet6/ip6_input.c 3 Jun 2021 04:47:54 -0000 1.237
+++ netinet6/ip6_input.c 19 Jul 2021 20:31:14 -0000
@@ -115,11 +115,14 @@
#include <netinet/ip_carp.h>
#endif
+struct niqueue ip6intrq = NIQUEUE_INITIALIZER(IPQ_MAXLEN, NETISR_IPV6);
+
struct cpumem *ip6counters;
uint8_t ip6_soiikey[IP6_SOIIKEY_LEN];
int ip6_ours(struct mbuf **, int *, int, int);
+int ip6_local(struct mbuf **, int *, int, int);
int ip6_check_rh0hdr(struct mbuf *, int *);
int ip6_hbhchcheck(struct mbuf *, int *, int *, int *);
int ip6_hopopts_input(u_int32_t *, u_int32_t *, struct mbuf **, int *);
@@ -162,6 +165,43 @@ ip6_init(void)
ip6counters = counters_alloc(ip6s_ncounters);
}
+/*
+ * Enqueue packet for local delivery. Queuing is used as a boundary
+ * between the network layer (input/forward path) running with shared
+ * NET_RLOCK_IN_SOFTNET() and the transport layer needing it exclusively.
+ */
+int
+ip6_ours(struct mbuf **mp, int *offp, int nxt, int af)
+{
+ /* We are already in a IPv4/IPv6 local deliver loop. */
+ if (af != AF_UNSPEC)
+ return ip6_local(mp, offp, nxt, af);
+
+ niq_enqueue(&ip6intrq, *mp);
+ *mp = NULL;
+ return IPPROTO_DONE;
+}
+
+/*
+ * Dequeue and process locally delivered packets.
+ */
+void
+ip6intr(void)
+{
+ struct mbuf *m;
+ int off, nxt;
+
+ while ((m = niq_dequeue(&ip6intrq)) != NULL) {
+#ifdef DIAGNOSTIC
+ if ((m->m_flags & M_PKTHDR) == 0)
+ panic("ip6intr no HDR");
+#endif
+ off = 0;
+ nxt = ip6_local(&m, &off, IPPROTO_IPV6, AF_UNSPEC);
+ KASSERT(nxt == IPPROTO_DONE);
+ }
+}
+
void
ipv6_input(struct ifnet *ifp, struct mbuf *m)
{
@@ -544,8 +584,10 @@ ip6_input_if(struct mbuf **mp, int *offp
}
int
-ip6_ours(struct mbuf **mp, int *offp, int nxt, int af)
+ip6_local(struct mbuf **mp, int *offp, int nxt, int af)
{
+ NET_ASSERT_WLOCKED();
+
if (ip6_hbhchcheck(*mp, offp, &nxt, NULL))
return IPPROTO_DONE;
@@ -1470,7 +1512,8 @@ ip6_sysctl(int *name, u_int namelen, voi
NET_UNLOCK();
return (error);
case IPV6CTL_IFQUEUE:
- return (EOPNOTSUPP);
+ return (sysctl_niq(name + 1, namelen - 1,
+ oldp, oldlenp, newp, newlen, &ip6intrq));
case IPV6CTL_SOIIKEY:
return (ip6_sysctl_soiikey(oldp, oldlenp, newp, newlen));
default: