On Fri, Aug 19, 2022 at 10:54:42PM +0200, Alexander Bluhm wrote: > This diff allows to run udp_input() in parallel.
Parts have been commited, below is the diff for -current. With this diff UDP socket splicing does not work yet as udp_output() is not MP safe. Also calls from udp_input() to anywhere with shared netlock may have unexpected effects. So I doubt that this part will make it into 7.2 release. Tests are welcome anyway so I know about possible bugs and can fix them soon. bluhm Index: net/if_bridge.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/net/if_bridge.c,v retrieving revision 1.364 diff -u -p -r1.364 if_bridge.c --- net/if_bridge.c 7 Aug 2022 00:57:43 -0000 1.364 +++ net/if_bridge.c 6 Sep 2022 19:39:24 -0000 @@ -1590,7 +1590,7 @@ bridge_ipsec(struct ifnet *ifp, struct e off); tdb_unref(tdb); if (prot != IPPROTO_DONE) - ip_deliver(&m, &hlen, prot, af); + ip_deliver(&m, &hlen, prot, af, 0); return (1); } else { tdb_unref(tdb); Index: netinet/in_proto.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/in_proto.c,v retrieving revision 1.99 diff -u -p -r1.99 in_proto.c --- netinet/in_proto.c 15 Aug 2022 09:11:38 -0000 1.99 +++ netinet/in_proto.c 6 Sep 2022 19:39:24 -0000 @@ -185,7 +185,7 @@ const struct protosw inetsw[] = { .pr_type = SOCK_DGRAM, .pr_domain = &inetdomain, .pr_protocol = IPPROTO_UDP, - .pr_flags = PR_ATOMIC|PR_ADDR|PR_SPLICE, + .pr_flags = PR_ATOMIC|PR_ADDR|PR_SPLICE|PR_MPSAFE, .pr_input = udp_input, .pr_ctlinput = udp_ctlinput, .pr_ctloutput = ip_ctloutput, Index: netinet/ip_input.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_input.c,v retrieving revision 1.381 diff -u -p -r1.381 ip_input.c --- netinet/ip_input.c 29 Aug 2022 14:43:56 -0000 1.381 +++ netinet/ip_input.c 6 Sep 2022 19:39:24 -0000 @@ -230,6 +230,11 @@ ip_init(void) #endif } +struct ip_offnxt { + int ion_off; + int ion_nxt; +}; + /* * Enqueue packet for local delivery. Queuing is used as a boundary * between the network layer (input/forward path) running with @@ -246,6 +251,30 @@ ip_ours(struct mbuf **mp, int *offp, int if (af != AF_UNSPEC) return nxt; + nxt = ip_deliver(mp, offp, nxt, AF_INET, 1); + if (nxt == IPPROTO_DONE) + return IPPROTO_DONE; + + /* save values for later, use after dequeue */ + if (*offp != sizeof(struct ip)) { + struct m_tag *mtag; + struct ip_offnxt *ion; + + /* mbuf tags are expensive, but only used for header options */ + mtag = m_tag_get(PACKET_TAG_IP_OFFNXT, sizeof(*ion), + M_NOWAIT); + if (mtag == NULL) { + ipstat_inc(ips_idropped); + m_freemp(mp); + return IPPROTO_DONE; + } + ion = (struct ip_offnxt *)(mtag + 1); + ion->ion_off = *offp; + ion->ion_nxt = nxt; + + m_tag_prepend(*mp, mtag); + } + niq_enqueue(&ipintrq, *mp); *mp = NULL; return IPPROTO_DONE; @@ -261,18 +290,31 @@ ipintr(void) struct mbuf *m; while ((m = niq_dequeue(&ipintrq)) != NULL) { - struct ip *ip; + struct m_tag *mtag; int off, nxt; #ifdef DIAGNOSTIC if ((m->m_flags & M_PKTHDR) == 0) panic("ipintr no HDR"); #endif - ip = mtod(m, struct ip *); - off = ip->ip_hl << 2; - nxt = ip->ip_p; + mtag = m_tag_find(m, PACKET_TAG_IP_OFFNXT, NULL); + if (mtag != NULL) { + struct ip_offnxt *ion; + + ion = (struct ip_offnxt *)(mtag + 1); + off = ion->ion_off; + nxt = ion->ion_nxt; - nxt = ip_deliver(&m, &off, nxt, AF_INET); + m_tag_delete(m, mtag); + } else { + struct ip *ip; + + ip = mtod(m, struct ip *); + off = ip->ip_hl << 2; + nxt = ip->ip_p; + } + + nxt = ip_deliver(&m, &off, nxt, AF_INET, 0); KASSERT(nxt == IPPROTO_DONE); } } @@ -673,7 +715,7 @@ ip_fragcheck(struct mbuf **mp, int *offp #endif int -ip_deliver(struct mbuf **mp, int *offp, int nxt, int af) +ip_deliver(struct mbuf **mp, int *offp, int nxt, int af, int shared) { const struct protosw *psw; int naf = af; @@ -681,14 +723,24 @@ ip_deliver(struct mbuf **mp, int *offp, int nest = 0; #endif /* INET6 */ - NET_ASSERT_LOCKED_EXCLUSIVE(); - /* * Tell launch routine the next header */ IPSTAT_INC(delivered); while (nxt != IPPROTO_DONE) { + switch (af) { + case AF_INET: + psw = &inetsw[ip_protox[nxt]]; + break; +#ifdef INET6 + case AF_INET6: + psw = &inet6sw[ip6_protox[nxt]]; + break; +#endif /* INET6 */ + } + if (shared && !ISSET(psw->pr_flags, PR_MPSAFE)) + break; #ifdef INET6 if (af == AF_INET6 && ip6_hdrnestlimit && (++nest > ip6_hdrnestlimit)) { @@ -725,16 +777,6 @@ ip_deliver(struct mbuf **mp, int *offp, case IPPROTO_IPV6: naf = AF_INET6; ip6stat_inc(ip6s_delivered); - break; -#endif /* INET6 */ - } - switch (af) { - case AF_INET: - psw = &inetsw[ip_protox[nxt]]; - break; -#ifdef INET6 - case AF_INET6: - psw = &inet6sw[ip6_protox[nxt]]; break; #endif /* INET6 */ } Index: netinet/ip_var.h =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_var.h,v retrieving revision 1.104 diff -u -p -r1.104 ip_var.h --- netinet/ip_var.h 3 Sep 2022 22:43:38 -0000 1.104 +++ netinet/ip_var.h 6 Sep 2022 19:39:24 -0000 @@ -249,7 +249,7 @@ int ip_sysctl(int *, u_int, void *, siz void ip_savecontrol(struct inpcb *, struct mbuf **, struct ip *, struct mbuf *); int ip_input_if(struct mbuf **, int *, int, int, struct ifnet *); -int ip_deliver(struct mbuf **, int *, int, int); +int ip_deliver(struct mbuf **, int *, int, int, int); void ip_forward(struct mbuf *, struct ifnet *, struct rtentry *, int); int rip_ctloutput(int, struct socket *, int, int, struct mbuf *); void rip_init(void); Index: netinet6/in6_proto.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/in6_proto.c,v retrieving revision 1.111 diff -u -p -r1.111 in6_proto.c --- netinet6/in6_proto.c 2 Sep 2022 13:12:32 -0000 1.111 +++ netinet6/in6_proto.c 6 Sep 2022 19:39:24 -0000 @@ -136,7 +136,7 @@ const struct protosw inet6sw[] = { .pr_type = SOCK_DGRAM, .pr_domain = &inet6domain, .pr_protocol = IPPROTO_UDP, - .pr_flags = PR_ATOMIC|PR_ADDR|PR_SPLICE, + .pr_flags = PR_ATOMIC|PR_ADDR|PR_SPLICE|PR_MPSAFE, .pr_input = udp_input, .pr_ctlinput = udp6_ctlinput, .pr_ctloutput = ip6_ctloutput, Index: netinet6/ip6_input.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/ip6_input.c,v retrieving revision 1.254 diff -u -p -r1.254 ip6_input.c --- netinet6/ip6_input.c 21 Aug 2022 14:15:55 -0000 1.254 +++ netinet6/ip6_input.c 6 Sep 2022 19:39:24 -0000 @@ -190,6 +190,10 @@ ip6_ours(struct mbuf **mp, int *offp, in if (af != AF_UNSPEC) return nxt; + nxt = ip_deliver(mp, offp, nxt, AF_INET6, 1); + if (nxt == IPPROTO_DONE) + return IPPROTO_DONE; + /* save values for later, use after dequeue */ if (*offp != sizeof(struct ip6_hdr)) { struct m_tag *mtag; @@ -248,7 +252,7 @@ ip6intr(void) off = sizeof(struct ip6_hdr); nxt = ip6->ip6_nxt; } - nxt = ip_deliver(&m, &off, nxt, AF_INET6); + nxt = ip_deliver(&m, &off, nxt, AF_INET6, 0); KASSERT(nxt == IPPROTO_DONE); } } Index: sys/mbuf.h =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/sys/mbuf.h,v retrieving revision 1.255 diff -u -p -r1.255 mbuf.h --- sys/mbuf.h 15 Aug 2022 16:15:37 -0000 1.255 +++ sys/mbuf.h 6 Sep 2022 19:39:24 -0000 @@ -471,6 +471,8 @@ struct m_tag *m_tag_next(struct mbuf *, #define PACKET_TAG_IPSEC_IN_DONE 0x0001 /* IPsec applied, in */ #define PACKET_TAG_IPSEC_OUT_DONE 0x0002 /* IPsec applied, out */ #define PACKET_TAG_IPSEC_FLOWINFO 0x0004 /* IPsec flowinfo */ +#define PACKET_TAG_IP_OFFNXT 0x0010 /* IPv4 offset and next proto */ +#define PACKET_TAG_IP6_OFFNXT 0x0020 /* IPv6 offset and next proto */ #define PACKET_TAG_WIREGUARD 0x0040 /* WireGuard data */ #define PACKET_TAG_GRE 0x0080 /* GRE processing done */ #define PACKET_TAG_DLT 0x0100 /* data link layer type */ @@ -479,7 +481,6 @@ struct m_tag *m_tag_next(struct mbuf *, #define PACKET_TAG_SRCROUTE 0x1000 /* IPv4 source routing options */ #define PACKET_TAG_TUNNEL 0x2000 /* Tunnel endpoint address */ #define PACKET_TAG_CARP_BAL_IP 0x4000 /* carp(4) ip balanced marker */ -#define PACKET_TAG_IP6_OFFNXT 0x8000 /* IPv6 offset and next proto */ #define MTAG_BITS \ ("\20\1IPSEC_IN_DONE\2IPSEC_OUT_DONE\3IPSEC_FLOWINFO" \ Index: sys/protosw.h =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/sys/protosw.h,v retrieving revision 1.55 diff -u -p -r1.55 protosw.h --- sys/protosw.h 5 Sep 2022 14:56:09 -0000 1.55 +++ sys/protosw.h 6 Sep 2022 19:39:24 -0000 @@ -128,6 +128,7 @@ struct protosw { #define PR_ABRTACPTDIS 0x20 /* abort on accept(2) to disconnected socket */ #define PR_SPLICE 0x40 /* socket splicing is possible */ +#define PR_MPSAFE 0x80 /* input runs with shared netlock */ /* * The arguments to usrreq are: