On Thu, Apr 22, 2021 at 08:34:35PM +0200, Hrvoje Popovski wrote:
> r620-1# ppupaavnamn_iinccif::ca :u p
> lpptooo(ooo0llx__lcc_faacfcafccfhhhfeef__eii_fttifet8eme2m_m2__4m
> m3aamgga9iig0ci8cc,__ cc_hhc0eehxcebcc,kk ::k0 :mm ,bbm ub1ufu)fpf
> pp-ll> lcc ppceup
> fkfrerfererneee e e llli:ils sitptsa t m mgomoeod
> ddififafiuifieieldedtd: : t :iri atitetepmem,m a
> ca dodaddddrdre r = 000x
> fxfStopped at ml_dequeue+0x19: movq 0x8(%rax),%rcx
> TID PID UID PRFLAGS PFLAGS CPU COMMAND
> 376415 75094 0 0x14000 0x200 2 softnet
> *419998 20034 0 0x14000 0x200 1K softnet
> ml_dequeue(fffffd83b3332b98) at ml_dequeue+0x19
> arpresolve(ffff800000087048,fffffd83b2b61b68,fffffd80cd2a2500,ffff800000031d90,
> ffff800024868188) at arpresolve+0x36a
> ether_resolve(ffff800000087048,fffffd80cd2a2500,ffff800000031d90,fffffd83b2b61b
> 68,ffff800024868188) at ether_resolve+0x1c1
> ether_output(ffff800000087048,fffffd80cd2a2500,ffff800000031d90,fffffd83b2b61b6
> 8) at ether_output+0x2c
> ip_output(fffffd80cd2a2500,0,ffff800024868400,1,0,0) at ip_output+0xa2e
> ip_forward(fffffd80cd2a2500,ffff800000082048,fffffd83b2b61b68,0) at
> ip_forward+0x261
> ip_input_if(ffff800024868538,ffff800024868544,4,0,ffff800000082048) at
> ip_input_if+0x608
> ipv4_input(ffff800000082048,fffffd80cd2a2500) at ipv4_input+0x39
> if_input_process(ffff800000082048,ffff8000248685b8) at if_input_process+0x6f
> ifiq_process(ffff800000080f00) at ifiq_process+0x69
> taskq_thread(ffff80000002f100) at taskq_thread+0x81
> end trace frame: 0x0, count: 4
Looking into ARP code it does not look MP ready. ml_dequeue() is
writing next pointer without lock into mbuf. I have replaced mbuf
list with mbuf queue and made la_hold_total atomic. Does that fix
your panic with ARP?
There are more global variables in ARP to fix. But let's see if
this is an improvement.
bluhm
Index: net/if.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/net/if.c,v
retrieving revision 1.640
diff -u -p -r1.640 if.c
--- net/if.c 26 Mar 2021 22:41:06 -0000 1.640
+++ net/if.c 23 Apr 2021 14:41:10 -0000
@@ -238,7 +238,7 @@ int ifq_congestion;
int netisr;
-#define NET_TASKQ 1
+#define NET_TASKQ 4
struct taskq *nettqmp[NET_TASKQ];
struct task if_input_task_locked = TASK_INITIALIZER(if_netisr, NULL);
@@ -834,10 +834,10 @@ if_input_process(struct ifnet *ifp, stru
* to PF globals, pipex globals, unicast and multicast addresses
* lists and the socket layer.
*/
- NET_LOCK();
+ NET_RLOCK_IN_SOFTNET();
while ((m = ml_dequeue(ml)) != NULL)
(*ifp->if_input)(ifp, m);
- NET_UNLOCK();
+ NET_RUNLOCK_IN_SOFTNET();
}
void
@@ -895,6 +895,12 @@ if_netisr(void *unused)
KERNEL_UNLOCK();
}
#endif
+ if (n & (1 << NETISR_IP))
+ ipintr();
+#ifdef INET6
+ if (n & (1 << NETISR_IPV6))
+ ip6intr();
+#endif
#if NPPP > 0
if (n & (1 << NETISR_PPP)) {
KERNEL_LOCK();
@@ -3316,12 +3322,15 @@ unhandled_af(int af)
* globals aren't ready to be accessed by multiple threads in
* parallel.
*/
-int nettaskqs = NET_TASKQ;
+int nettaskqs;
struct taskq *
net_tq(unsigned int ifindex)
{
struct taskq *t = NULL;
+
+ if (nettaskqs == 0)
+ nettaskqs = min(NET_TASKQ, ncpus);
t = nettqmp[ifindex % nettaskqs];
Index: net/if_ethersubr.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/net/if_ethersubr.c,v
retrieving revision 1.274
diff -u -p -r1.274 if_ethersubr.c
--- net/if_ethersubr.c 7 Mar 2021 06:02:32 -0000 1.274
+++ net/if_ethersubr.c 23 Apr 2021 16:20:01 -0000
@@ -245,7 +245,10 @@ ether_resolve(struct ifnet *ifp, struct
break;
#ifdef INET6
case AF_INET6:
+ KERNEL_LOCK();
+ /* XXXSMP there is a MP race in nd6_resolve() */
error = nd6_resolve(ifp, rt, m, dst, eh->ether_dhost);
+ KERNEL_UNLOCK();
if (error)
return (error);
eh->ether_type = htons(ETHERTYPE_IPV6);
Index: net/ifq.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/net/ifq.c,v
retrieving revision 1.43
diff -u -p -r1.43 ifq.c
--- net/ifq.c 20 Feb 2021 04:37:26 -0000 1.43
+++ net/ifq.c 23 Apr 2021 14:41:10 -0000
@@ -243,7 +243,7 @@ void
ifq_init(struct ifqueue *ifq, struct ifnet *ifp, unsigned int idx)
{
ifq->ifq_if = ifp;
- ifq->ifq_softnet = net_tq(ifp->if_index); /* + idx */
+ ifq->ifq_softnet = net_tq(ifp->if_index + idx);
ifq->ifq_softc = NULL;
mtx_init(&ifq->ifq_mtx, IPL_NET);
@@ -617,7 +617,7 @@ void
ifiq_init(struct ifiqueue *ifiq, struct ifnet *ifp, unsigned int idx)
{
ifiq->ifiq_if = ifp;
- ifiq->ifiq_softnet = net_tq(ifp->if_index); /* + idx */
+ ifiq->ifiq_softnet = net_tq(ifp->if_index + idx);
ifiq->ifiq_softc = NULL;
mtx_init(&ifiq->ifiq_mtx, IPL_NET);
Index: net/netisr.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/net/netisr.h,v
retrieving revision 1.55
diff -u -p -r1.55 netisr.h
--- net/netisr.h 5 Jan 2021 20:43:36 -0000 1.55
+++ net/netisr.h 23 Apr 2021 14:41:10 -0000
@@ -41,8 +41,10 @@
* interrupt used for scheduling the network code to calls
* on the lowest level routine of each protocol.
*/
+#define NETISR_IP 2 /* same as AF_INET */
#define NETISR_PFSYNC 5 /* for pfsync "immediate" tx */
#define NETISR_ARP 18 /* same as AF_LINK */
+#define NETISR_IPV6 24 /* same as AF_INET6 */
#define NETISR_PPP 28 /* for PPP processing */
#define NETISR_BRIDGE 29 /* for bridge processing */
#define NETISR_SWITCH 31 /* for switch dataplane */
@@ -57,6 +59,8 @@ extern int netisr; /* scheduling bits
extern struct task if_input_task_locked;
void arpintr(void);
+void ipintr(void);
+void ip6intr(void);
void pppintr(void);
void bridgeintr(void);
void switchintr(void);
Index: netinet/if_ether.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/if_ether.c,v
retrieving revision 1.243
diff -u -p -r1.243 if_ether.c
--- netinet/if_ether.c 24 Jun 2020 22:03:43 -0000 1.243
+++ netinet/if_ether.c 23 Apr 2021 17:03:09 -0000
@@ -67,7 +67,7 @@
struct llinfo_arp {
LIST_ENTRY(llinfo_arp) la_list;
struct rtentry *la_rt; /* backpointer to rtentry */
- struct mbuf_list la_ml; /* packet hold queue */
+ struct mbuf_queue la_mq; /* packet hold queue */
time_t la_refreshed; /* when was refresh sent */
int la_asked; /* number of queries sent */
};
@@ -188,7 +188,7 @@ arp_rtrequest(struct ifnet *ifp, int req
break;
}
- ml_init(&la->la_ml);
+ mq_init(&la->la_mq, LA_HOLD_QUEUE, IPL_SOFTNET);
la->la_rt = rt;
rt->rt_flags |= RTF_LLINFO;
if ((rt->rt_flags & RTF_LOCAL) == 0)
@@ -202,7 +202,7 @@ arp_rtrequest(struct ifnet *ifp, int req
LIST_REMOVE(la, la_list);
rt->rt_llinfo = NULL;
rt->rt_flags &= ~RTF_LLINFO;
- la_hold_total -= ml_purge(&la->la_ml);
+ atomic_sub_int(&la_hold_total, mq_purge(&la->la_mq));
pool_put(&arp_pool, la);
break;
@@ -373,18 +373,11 @@ arpresolve(struct ifnet *ifp, struct rte
* response yet. Insert mbuf in hold queue if below limit
* if above the limit free the queue without queuing the new packet.
*/
- if (la_hold_total < LA_HOLD_TOTAL) {
- struct mbuf *mh;
-
- if (ml_len(&la->la_ml) >= LA_HOLD_QUEUE) {
- mh = ml_dequeue(&la->la_ml);
- la_hold_total--;
- m_freem(mh);
- }
- ml_enqueue(&la->la_ml, m);
- la_hold_total++;
+ if (atomic_inc_int_nv(&la_hold_total) <= LA_HOLD_TOTAL) {
+ if (mq_push(&la->la_mq, m) != 0)
+ atomic_dec_int(&la_hold_total);
} else {
- la_hold_total -= ml_purge(&la->la_ml);
+ atomic_sub_int(&la_hold_total, mq_purge(&la->la_mq) + 1);
m_freem(m);
}
@@ -413,7 +406,8 @@ arpresolve(struct ifnet *ifp, struct rte
rt->rt_expire += arpt_down;
la->la_asked = 0;
la->la_refreshed = 0;
- la_hold_total -= ml_purge(&la->la_ml);
+ atomic_sub_int(&la_hold_total,
+ mq_purge(&la->la_mq));
}
}
}
@@ -599,6 +593,7 @@ arpcache(struct ifnet *ifp, struct ether
struct in_addr *spa = (struct in_addr *)ea->arp_spa;
char addr[INET_ADDRSTRLEN];
struct ifnet *rifp;
+ struct mbuf *m;
unsigned int len;
int changed = 0;
@@ -671,20 +666,16 @@ arpcache(struct ifnet *ifp, struct ether
la->la_asked = 0;
la->la_refreshed = 0;
- while ((len = ml_len(&la->la_ml)) != 0) {
- struct mbuf *mh;
+ while ((m = mq_dequeue(&la->la_mq)) != NULL) {
+ atomic_dec_int(&la_hold_total);
+ len = mq_len(&la->la_mq);
- mh = ml_dequeue(&la->la_ml);
- la_hold_total--;
+ ifp->if_output(ifp, m, rt_key(rt), rt);
- ifp->if_output(ifp, mh, rt_key(rt), rt);
-
- if (ml_len(&la->la_ml) == len) {
+ /* XXXSMP we discard if other CPU enqueues */
+ if (mq_len(&la->la_mq) >= len) {
/* mbuf is back in queue. Discard. */
- while ((mh = ml_dequeue(&la->la_ml)) != NULL) {
- la_hold_total--;
- m_freem(mh);
- }
+ atomic_sub_int(&la_hold_total, mq_purge(&la->la_mq));
break;
}
}
@@ -698,7 +689,7 @@ arpinvalidate(struct rtentry *rt)
struct llinfo_arp *la = (struct llinfo_arp *)rt->rt_llinfo;
struct sockaddr_dl *sdl = satosdl(rt->rt_gateway);
- la_hold_total -= ml_purge(&la->la_ml);
+ atomic_sub_int(&la_hold_total, mq_purge(&la->la_mq));
sdl->sdl_alen = 0;
la->la_asked = 0;
}
Index: netinet/ip_input.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_input.c,v
retrieving revision 1.356
diff -u -p -r1.356 ip_input.c
--- netinet/ip_input.c 30 Mar 2021 08:37:10 -0000 1.356
+++ netinet/ip_input.c 23 Apr 2021 14:41:10 -0000
@@ -131,6 +131,8 @@ const struct sysctl_bounded_args ipctl_v
{ IPCTL_ARPDOWN, &arpt_down, 0, INT_MAX },
};
+struct niqueue ipintrq = NIQUEUE_INITIALIZER(IPQ_MAXLEN, NETISR_IP);
+
struct pool ipqent_pool;
struct pool ipq_pool;
@@ -144,6 +146,7 @@ static struct mbuf_queue ipsendraw_mq;
extern struct niqueue arpinq;
int ip_ours(struct mbuf **, int *, int, int);
+int ip_local(struct mbuf **, int *, int, int);
int ip_dooptions(struct mbuf *, struct ifnet *);
int in_ouraddr(struct mbuf *, struct ifnet *, struct rtentry **);
@@ -230,6 +233,43 @@ ip_init(void)
}
/*
+ * Enqueue packet for local delivery. Queuing is used as a boundary
+ * between the network layer (input/forward path) running with shared
+ * NET_RLOCK_IN_SOFTNET() and the transport layer needing it exclusively.
+ */
+int
+ip_ours(struct mbuf **mp, int *offp, int nxt, int af)
+{
+ /* We are already in a IPv4/IPv6 local deliver loop. */
+ if (af != AF_UNSPEC)
+ return ip_local(mp, offp, nxt, af);
+
+ niq_enqueue(&ipintrq, *mp);
+ *mp = NULL;
+ return IPPROTO_DONE;
+}
+
+/*
+ * Dequeue and process locally delivered packets.
+ */
+void
+ipintr(void)
+{
+ struct mbuf *m;
+ int off, nxt;
+
+ while ((m = niq_dequeue(&ipintrq)) != NULL) {
+#ifdef DIAGNOSTIC
+ if ((m->m_flags & M_PKTHDR) == 0)
+ panic("ipintr no HDR");
+#endif
+ off = 0;
+ nxt = ip_local(&m, &off, IPPROTO_IPV4, AF_UNSPEC);
+ KASSERT(nxt == IPPROTO_DONE);
+ }
+}
+
+/*
* IPv4 input routine.
*
* Checksum and byte swap header. Process options. Forward or deliver.
@@ -488,7 +528,7 @@ ip_input_if(struct mbuf **mp, int *offp,
* If fragmented try to reassemble. Pass to next level.
*/
int
-ip_ours(struct mbuf **mp, int *offp, int nxt, int af)
+ip_local(struct mbuf **mp, int *offp, int nxt, int af)
{
struct mbuf *m = *mp;
struct ip *ip = mtod(m, struct ip *);
@@ -1639,7 +1679,8 @@ ip_sysctl(int *name, u_int namelen, void
newlen));
#endif
case IPCTL_IFQUEUE:
- return (EOPNOTSUPP);
+ return (sysctl_niq(name + 1, namelen - 1,
+ oldp, oldlenp, newp, newlen, &ipintrq));
case IPCTL_ARPQUEUE:
return (sysctl_niq(name + 1, namelen - 1,
oldp, oldlenp, newp, newlen, &arpinq));
Index: netinet/ip_var.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_var.h,v
retrieving revision 1.88
diff -u -p -r1.88 ip_var.h
--- netinet/ip_var.h 30 Mar 2021 08:37:11 -0000 1.88
+++ netinet/ip_var.h 23 Apr 2021 16:39:18 -0000
@@ -248,7 +248,6 @@ void ip_stripoptions(struct mbuf *);
int ip_sysctl(int *, u_int, void *, size_t *, void *, size_t);
void ip_savecontrol(struct inpcb *, struct mbuf **, struct ip *,
struct mbuf *);
-void ipintr(void);
int ip_input_if(struct mbuf **, int *, int, int, struct ifnet *);
int ip_deliver(struct mbuf **, int *, int, int);
void ip_forward(struct mbuf *, struct ifnet *, struct rtentry *, int);
Index: netinet6/ip6_input.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/ip6_input.c,v
retrieving revision 1.232
diff -u -p -r1.232 ip6_input.c
--- netinet6/ip6_input.c 10 Mar 2021 10:21:49 -0000 1.232
+++ netinet6/ip6_input.c 23 Apr 2021 14:41:10 -0000
@@ -115,11 +115,14 @@
#include <netinet/ip_carp.h>
#endif
+struct niqueue ip6intrq = NIQUEUE_INITIALIZER(IPQ_MAXLEN, NETISR_IPV6);
+
struct cpumem *ip6counters;
uint8_t ip6_soiikey[IP6_SOIIKEY_LEN];
int ip6_ours(struct mbuf **, int *, int, int);
+int ip6_local(struct mbuf **, int *, int, int);
int ip6_check_rh0hdr(struct mbuf *, int *);
int ip6_hbhchcheck(struct mbuf *, int *, int *, int *);
int ip6_hopopts_input(u_int32_t *, u_int32_t *, struct mbuf **, int *);
@@ -162,6 +165,43 @@ ip6_init(void)
ip6counters = counters_alloc(ip6s_ncounters);
}
+/*
+ * Enqueue packet for local delivery. Queuing is used as a boundary
+ * between the network layer (input/forward path) running with shared
+ * NET_RLOCK_IN_SOFTNET() and the transport layer needing it exclusively.
+ */
+int
+ip6_ours(struct mbuf **mp, int *offp, int nxt, int af)
+{
+ /* We are already in a IPv4/IPv6 local deliver loop. */
+ if (af != AF_UNSPEC)
+ return ip6_local(mp, offp, nxt, af);
+
+ niq_enqueue(&ip6intrq, *mp);
+ *mp = NULL;
+ return IPPROTO_DONE;
+}
+
+/*
+ * Dequeue and process locally delivered packets.
+ */
+void
+ip6intr(void)
+{
+ struct mbuf *m;
+ int off, nxt;
+
+ while ((m = niq_dequeue(&ip6intrq)) != NULL) {
+#ifdef DIAGNOSTIC
+ if ((m->m_flags & M_PKTHDR) == 0)
+ panic("ip6intr no HDR");
+#endif
+ off = 0;
+ nxt = ip6_local(&m, &off, IPPROTO_IPV6, AF_UNSPEC);
+ KASSERT(nxt == IPPROTO_DONE);
+ }
+}
+
void
ipv6_input(struct ifnet *ifp, struct mbuf *m)
{
@@ -526,7 +566,7 @@ ip6_input_if(struct mbuf **mp, int *offp
}
int
-ip6_ours(struct mbuf **mp, int *offp, int nxt, int af)
+ip6_local(struct mbuf **mp, int *offp, int nxt, int af)
{
if (ip6_hbhchcheck(*mp, offp, &nxt, NULL))
return IPPROTO_DONE;
@@ -1452,7 +1492,8 @@ ip6_sysctl(int *name, u_int namelen, voi
NET_UNLOCK();
return (error);
case IPV6CTL_IFQUEUE:
- return (EOPNOTSUPP);
+ return (sysctl_niq(name + 1, namelen - 1,
+ oldp, oldlenp, newp, newlen, &ip6intrq));
case IPV6CTL_SOIIKEY:
return (ip6_sysctl_soiikey(oldp, oldlenp, newp, newlen));
default: