On Fri, Aug 19, 2022 at 10:54:42PM +0200, Alexander Bluhm wrote:
> This diff allows to run udp_input() in parallel.

Parts have been commited, below is the diff for -current.

With this diff UDP socket splicing does not work yet as udp_output()
is not MP safe.  Also calls from udp_input() to anywhere with shared
netlock may have unexpected effects.  So I doubt that this part
will make it into 7.2 release.

Tests are welcome anyway so I know about possible bugs and can fix
them soon.

bluhm

Index: net/if_bridge.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/net/if_bridge.c,v
retrieving revision 1.364
diff -u -p -r1.364 if_bridge.c
--- net/if_bridge.c     7 Aug 2022 00:57:43 -0000       1.364
+++ net/if_bridge.c     6 Sep 2022 19:39:24 -0000
@@ -1590,7 +1590,7 @@ bridge_ipsec(struct ifnet *ifp, struct e
                            off);
                        tdb_unref(tdb);
                        if (prot != IPPROTO_DONE)
-                               ip_deliver(&m, &hlen, prot, af);
+                               ip_deliver(&m, &hlen, prot, af, 0);
                        return (1);
                } else {
                        tdb_unref(tdb);
Index: netinet/in_proto.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/in_proto.c,v
retrieving revision 1.99
diff -u -p -r1.99 in_proto.c
--- netinet/in_proto.c  15 Aug 2022 09:11:38 -0000      1.99
+++ netinet/in_proto.c  6 Sep 2022 19:39:24 -0000
@@ -185,7 +185,7 @@ const struct protosw inetsw[] = {
   .pr_type     = SOCK_DGRAM,
   .pr_domain   = &inetdomain,
   .pr_protocol = IPPROTO_UDP,
-  .pr_flags    = PR_ATOMIC|PR_ADDR|PR_SPLICE,
+  .pr_flags    = PR_ATOMIC|PR_ADDR|PR_SPLICE|PR_MPSAFE,
   .pr_input    = udp_input,
   .pr_ctlinput = udp_ctlinput,
   .pr_ctloutput        = ip_ctloutput,
Index: netinet/ip_input.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_input.c,v
retrieving revision 1.381
diff -u -p -r1.381 ip_input.c
--- netinet/ip_input.c  29 Aug 2022 14:43:56 -0000      1.381
+++ netinet/ip_input.c  6 Sep 2022 19:39:24 -0000
@@ -230,6 +230,11 @@ ip_init(void)
 #endif
 }
 
+struct ip_offnxt {
+       int     ion_off;
+       int     ion_nxt;
+};
+
 /*
  * Enqueue packet for local delivery.  Queuing is used as a boundary
  * between the network layer (input/forward path) running with
@@ -246,6 +251,30 @@ ip_ours(struct mbuf **mp, int *offp, int
        if (af != AF_UNSPEC)
                return nxt;
 
+       nxt = ip_deliver(mp, offp, nxt, AF_INET, 1);
+       if (nxt == IPPROTO_DONE)
+               return IPPROTO_DONE;
+
+        /* save values for later, use after dequeue */
+       if (*offp != sizeof(struct ip)) {
+               struct m_tag *mtag;
+               struct ip_offnxt *ion;
+
+               /* mbuf tags are expensive, but only used for header options */
+               mtag = m_tag_get(PACKET_TAG_IP_OFFNXT, sizeof(*ion),
+                   M_NOWAIT);
+               if (mtag == NULL) {
+                       ipstat_inc(ips_idropped);
+                       m_freemp(mp);
+                       return IPPROTO_DONE;
+               }
+               ion = (struct ip_offnxt *)(mtag + 1);
+               ion->ion_off = *offp;
+               ion->ion_nxt = nxt;
+
+               m_tag_prepend(*mp, mtag);
+       }
+
        niq_enqueue(&ipintrq, *mp);
        *mp = NULL;
        return IPPROTO_DONE;
@@ -261,18 +290,31 @@ ipintr(void)
        struct mbuf *m;
 
        while ((m = niq_dequeue(&ipintrq)) != NULL) {
-               struct ip *ip;
+               struct m_tag *mtag;
                int off, nxt;
 
 #ifdef DIAGNOSTIC
                if ((m->m_flags & M_PKTHDR) == 0)
                        panic("ipintr no HDR");
 #endif
-               ip = mtod(m, struct ip *);
-               off = ip->ip_hl << 2;
-               nxt = ip->ip_p;
+               mtag = m_tag_find(m, PACKET_TAG_IP_OFFNXT, NULL);
+               if (mtag != NULL) {
+                       struct ip_offnxt *ion;
+
+                       ion = (struct ip_offnxt *)(mtag + 1);
+                       off = ion->ion_off;
+                       nxt = ion->ion_nxt;
 
-               nxt = ip_deliver(&m, &off, nxt, AF_INET);
+                       m_tag_delete(m, mtag);
+               } else {
+                       struct ip *ip;
+
+                       ip = mtod(m, struct ip *);
+                       off = ip->ip_hl << 2;
+                       nxt = ip->ip_p;
+               }
+
+               nxt = ip_deliver(&m, &off, nxt, AF_INET, 0);
                KASSERT(nxt == IPPROTO_DONE);
        }
 }
@@ -673,7 +715,7 @@ ip_fragcheck(struct mbuf **mp, int *offp
 #endif
 
 int
-ip_deliver(struct mbuf **mp, int *offp, int nxt, int af)
+ip_deliver(struct mbuf **mp, int *offp, int nxt, int af, int shared)
 {
        const struct protosw *psw;
        int naf = af;
@@ -681,14 +723,24 @@ ip_deliver(struct mbuf **mp, int *offp, 
        int nest = 0;
 #endif /* INET6 */
 
-       NET_ASSERT_LOCKED_EXCLUSIVE();
-
        /*
         * Tell launch routine the next header
         */
        IPSTAT_INC(delivered);
 
        while (nxt != IPPROTO_DONE) {
+               switch (af) {
+               case AF_INET:
+                       psw = &inetsw[ip_protox[nxt]];
+                       break;
+#ifdef INET6
+               case AF_INET6:
+                       psw = &inet6sw[ip6_protox[nxt]];
+                       break;
+#endif /* INET6 */
+               }
+               if (shared && !ISSET(psw->pr_flags, PR_MPSAFE))
+                       break;
 #ifdef INET6
                if (af == AF_INET6 &&
                    ip6_hdrnestlimit && (++nest > ip6_hdrnestlimit)) {
@@ -725,16 +777,6 @@ ip_deliver(struct mbuf **mp, int *offp, 
                case IPPROTO_IPV6:
                        naf = AF_INET6;
                        ip6stat_inc(ip6s_delivered);
-                       break;
-#endif /* INET6 */
-               }
-               switch (af) {
-               case AF_INET:
-                       psw = &inetsw[ip_protox[nxt]];
-                       break;
-#ifdef INET6
-               case AF_INET6:
-                       psw = &inet6sw[ip6_protox[nxt]];
                        break;
 #endif /* INET6 */
                }
Index: netinet/ip_var.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_var.h,v
retrieving revision 1.104
diff -u -p -r1.104 ip_var.h
--- netinet/ip_var.h    3 Sep 2022 22:43:38 -0000       1.104
+++ netinet/ip_var.h    6 Sep 2022 19:39:24 -0000
@@ -249,7 +249,7 @@ int  ip_sysctl(int *, u_int, void *, siz
 void    ip_savecontrol(struct inpcb *, struct mbuf **, struct ip *,
            struct mbuf *);
 int     ip_input_if(struct mbuf **, int *, int, int, struct ifnet *);
-int     ip_deliver(struct mbuf **, int *, int, int);
+int     ip_deliver(struct mbuf **, int *, int, int, int);
 void    ip_forward(struct mbuf *, struct ifnet *, struct rtentry *, int);
 int     rip_ctloutput(int, struct socket *, int, int, struct mbuf *);
 void    rip_init(void);
Index: netinet6/in6_proto.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/in6_proto.c,v
retrieving revision 1.111
diff -u -p -r1.111 in6_proto.c
--- netinet6/in6_proto.c        2 Sep 2022 13:12:32 -0000       1.111
+++ netinet6/in6_proto.c        6 Sep 2022 19:39:24 -0000
@@ -136,7 +136,7 @@ const struct protosw inet6sw[] = {
   .pr_type     = SOCK_DGRAM,
   .pr_domain   = &inet6domain,
   .pr_protocol = IPPROTO_UDP,
-  .pr_flags    = PR_ATOMIC|PR_ADDR|PR_SPLICE,
+  .pr_flags    = PR_ATOMIC|PR_ADDR|PR_SPLICE|PR_MPSAFE,
   .pr_input    = udp_input,
   .pr_ctlinput = udp6_ctlinput,
   .pr_ctloutput        = ip6_ctloutput,
Index: netinet6/ip6_input.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/ip6_input.c,v
retrieving revision 1.254
diff -u -p -r1.254 ip6_input.c
--- netinet6/ip6_input.c        21 Aug 2022 14:15:55 -0000      1.254
+++ netinet6/ip6_input.c        6 Sep 2022 19:39:24 -0000
@@ -190,6 +190,10 @@ ip6_ours(struct mbuf **mp, int *offp, in
        if (af != AF_UNSPEC)
                return nxt;
 
+       nxt = ip_deliver(mp, offp, nxt, AF_INET6, 1);
+       if (nxt == IPPROTO_DONE)
+               return IPPROTO_DONE;
+
        /* save values for later, use after dequeue */
        if (*offp != sizeof(struct ip6_hdr)) {
                struct m_tag *mtag;
@@ -248,7 +252,7 @@ ip6intr(void)
                        off = sizeof(struct ip6_hdr);
                        nxt = ip6->ip6_nxt;
                }
-               nxt = ip_deliver(&m, &off, nxt, AF_INET6);
+               nxt = ip_deliver(&m, &off, nxt, AF_INET6, 0);
                KASSERT(nxt == IPPROTO_DONE);
        }
 }
Index: sys/mbuf.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/sys/mbuf.h,v
retrieving revision 1.255
diff -u -p -r1.255 mbuf.h
--- sys/mbuf.h  15 Aug 2022 16:15:37 -0000      1.255
+++ sys/mbuf.h  6 Sep 2022 19:39:24 -0000
@@ -471,6 +471,8 @@ struct m_tag *m_tag_next(struct mbuf *, 
 #define PACKET_TAG_IPSEC_IN_DONE       0x0001  /* IPsec applied, in */
 #define PACKET_TAG_IPSEC_OUT_DONE      0x0002  /* IPsec applied, out */
 #define PACKET_TAG_IPSEC_FLOWINFO      0x0004  /* IPsec flowinfo */
+#define PACKET_TAG_IP_OFFNXT           0x0010  /* IPv4 offset and next proto */
+#define PACKET_TAG_IP6_OFFNXT          0x0020  /* IPv6 offset and next proto */
 #define PACKET_TAG_WIREGUARD           0x0040  /* WireGuard data */
 #define PACKET_TAG_GRE                 0x0080  /* GRE processing done */
 #define PACKET_TAG_DLT                 0x0100 /* data link layer type */
@@ -479,7 +481,6 @@ struct m_tag *m_tag_next(struct mbuf *, 
 #define PACKET_TAG_SRCROUTE            0x1000 /* IPv4 source routing options */
 #define PACKET_TAG_TUNNEL              0x2000  /* Tunnel endpoint address */
 #define PACKET_TAG_CARP_BAL_IP         0x4000  /* carp(4) ip balanced marker */
-#define PACKET_TAG_IP6_OFFNXT          0x8000  /* IPv6 offset and next proto */
 
 #define MTAG_BITS \
     ("\20\1IPSEC_IN_DONE\2IPSEC_OUT_DONE\3IPSEC_FLOWINFO" \
Index: sys/protosw.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/sys/protosw.h,v
retrieving revision 1.55
diff -u -p -r1.55 protosw.h
--- sys/protosw.h       5 Sep 2022 14:56:09 -0000       1.55
+++ sys/protosw.h       6 Sep 2022 19:39:24 -0000
@@ -128,6 +128,7 @@ struct protosw {
 #define        PR_ABRTACPTDIS  0x20            /* abort on accept(2) to 
disconnected
                                           socket */
 #define        PR_SPLICE       0x40            /* socket splicing is possible 
*/
+#define        PR_MPSAFE       0x80            /* input runs with shared 
netlock */
 
 /*
  * The arguments to usrreq are:

Reply via email to