On Fri, Aug 19, 2022 at 10:54:42PM +0200, Alexander Bluhm wrote: > This diff allows to run udp_input() in parallel. It consists of > three major parts. > > - Use PR_MPSAFE flag to protocol deliver loop with shared > netlock. Queue packet and switch to deliver loop with exclusive > netlock, of a protocol is not MP safe. > > - Use a rwlock to protect the inp_notify field. As ip_output() > may be called in in_pcbnotifyall() and may sleep in pflock, we > need a sleeping lock. > > - Use a mutex at the inpcb to protect the recv socket buffer. > > Before commiting I will split the diff in parts. Just showing what > I have now.
Parts of it are commited. Rebased to -current. Now I also use shared net lock in soreceive(). This diff is mainly for testing, not to commit yet. bluhm Index: kern/uipc_socket.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/kern/uipc_socket.c,v retrieving revision 1.283 diff -u -p -r1.283 uipc_socket.c --- kern/uipc_socket.c 15 Aug 2022 09:11:38 -0000 1.283 +++ kern/uipc_socket.c 21 Aug 2022 15:09:30 -0000 @@ -823,10 +823,10 @@ bad: if (mp) *mp = NULL; - solock(so); + solock_shared(so); restart: if ((error = sblock(so, &so->so_rcv, SBLOCKWAIT(flags))) != 0) { - sounlock(so); + sounlock_shared(so); return (error); } @@ -894,7 +894,7 @@ restart: sbunlock(so, &so->so_rcv); error = sbwait(so, &so->so_rcv); if (error) { - sounlock(so); + sounlock_shared(so); return (error); } goto restart; @@ -963,11 +963,11 @@ dontblock: sbsync(&so->so_rcv, nextrecord); if (controlp) { if (pr->pr_domain->dom_externalize) { - sounlock(so); + sounlock_shared(so); error = (*pr->pr_domain->dom_externalize) (cm, controllen, flags); - solock(so); + solock_shared(so); } *controlp = cm; } else { @@ -1041,9 +1041,9 @@ dontblock: SBLASTRECORDCHK(&so->so_rcv, "soreceive uiomove"); SBLASTMBUFCHK(&so->so_rcv, "soreceive uiomove"); resid = uio->uio_resid; - sounlock(so); + sounlock_shared(so); uio_error = uiomove(mtod(m, caddr_t) + moff, len, uio); - solock(so); + solock_shared(so); if (uio_error) uio->uio_resid = resid - len; } else @@ -1127,7 +1127,7 @@ dontblock: error = sbwait(so, &so->so_rcv); if (error) { sbunlock(so, &so->so_rcv); - sounlock(so); + sounlock_shared(so); return (0); } if ((m = so->so_rcv.sb_mb) != NULL) @@ -1172,7 +1172,7 @@ dontblock: *flagsp |= flags; release: sbunlock(so, &so->so_rcv); - sounlock(so); + sounlock_shared(so); return (error); } Index: kern/uipc_socket2.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/kern/uipc_socket2.c,v retrieving revision 1.127 diff -u -p -r1.127 uipc_socket2.c --- kern/uipc_socket2.c 13 Aug 2022 21:01:46 -0000 1.127 +++ kern/uipc_socket2.c 21 Aug 2022 15:09:30 -0000 @@ -360,6 +360,24 @@ solock(struct socket *so) } } +void +solock_shared(struct socket *so) +{ + switch (so->so_proto->pr_domain->dom_family) { + case PF_INET: + case PF_INET6: + if (so->so_proto->pr_usrreqs->pru_lock != NULL) { + NET_LOCK_SHARED(); + pru_lock(so); + } else + NET_LOCK(); + break; + default: + rw_enter_write(&so->so_lock); + break; + } +} + int solock_persocket(struct socket *so) { @@ -403,6 +421,24 @@ sounlock(struct socket *so) } void +sounlock_shared(struct socket *so) +{ + switch (so->so_proto->pr_domain->dom_family) { + case PF_INET: + case PF_INET6: + if (so->so_proto->pr_usrreqs->pru_unlock != NULL) { + pru_unlock(so); + NET_UNLOCK_SHARED(); + } else + NET_UNLOCK(); + break; + default: + rw_exit_write(&so->so_lock); + break; + } +} + +void soassertlocked(struct socket *so) { switch (so->so_proto->pr_domain->dom_family) { @@ -425,7 +461,15 @@ sosleep_nsec(struct socket *so, void *id switch (so->so_proto->pr_domain->dom_family) { case PF_INET: case PF_INET6: + if (so->so_proto->pr_usrreqs->pru_unlock != NULL && + rw_status(&netlock) == RW_READ) { + pru_unlock(so); + } ret = rwsleep_nsec(ident, &netlock, prio, wmesg, nsecs); + if (so->so_proto->pr_usrreqs->pru_lock != NULL && + rw_status(&netlock) == RW_READ) { + pru_lock(so); + } break; default: ret = rwsleep_nsec(ident, &so->so_lock, prio, wmesg, nsecs); Index: net/if_bridge.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/net/if_bridge.c,v retrieving revision 1.364 diff -u -p -r1.364 if_bridge.c --- net/if_bridge.c 7 Aug 2022 00:57:43 -0000 1.364 +++ net/if_bridge.c 21 Aug 2022 15:06:27 -0000 @@ -1590,7 +1590,7 @@ bridge_ipsec(struct ifnet *ifp, struct e off); tdb_unref(tdb); if (prot != IPPROTO_DONE) - ip_deliver(&m, &hlen, prot, af); + ip_deliver(&m, &hlen, prot, af, 0); return (1); } else { tdb_unref(tdb); Index: netinet/in_pcb.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/in_pcb.c,v retrieving revision 1.271 diff -u -p -r1.271 in_pcb.c --- netinet/in_pcb.c 21 Aug 2022 11:44:53 -0000 1.271 +++ netinet/in_pcb.c 21 Aug 2022 15:06:27 -0000 @@ -175,6 +175,7 @@ void in_pcbinit(struct inpcbtable *table, int hashsize) { mtx_init(&table->inpt_mtx, IPL_SOFTNET); + rw_init(&table->inpt_notify, "inpnotify"); TAILQ_INIT(&table->inpt_queue); table->inpt_hashtbl = hashinit(hashsize, M_PCB, M_WAITOK, &table->inpt_mask); @@ -696,8 +697,6 @@ in_pcbnotifyall(struct inpcbtable *table struct in_addr faddr; u_int rdomain; - NET_ASSERT_LOCKED_EXCLUSIVE(); - if (dst->sa_family != AF_INET) return; faddr = satosin(dst)->sin_addr; @@ -708,6 +707,7 @@ in_pcbnotifyall(struct inpcbtable *table SIMPLEQ_INIT(&inpcblist); rdomain = rtable_l2(rtable); + rw_enter_write(&table->inpt_notify); mtx_enter(&table->inpt_mtx); TAILQ_FOREACH(inp, &table->inpt_queue, inp_queue) { #ifdef INET6 @@ -729,6 +729,7 @@ in_pcbnotifyall(struct inpcbtable *table (*notify)(inp, errno); in_pcbunref(inp); } + rw_exit_write(&table->inpt_notify); } /* Index: netinet/in_pcb.h =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/in_pcb.h,v retrieving revision 1.130 diff -u -p -r1.130 in_pcb.h --- netinet/in_pcb.h 21 Aug 2022 11:44:53 -0000 1.130 +++ netinet/in_pcb.h 21 Aug 2022 15:06:27 -0000 @@ -66,6 +66,7 @@ #include <sys/queue.h> #include <sys/mutex.h> +#include <sys/rwlock.h> #include <sys/refcnt.h> #include <netinet/ip6.h> #include <netinet6/ip6_var.h> @@ -79,6 +80,7 @@ * I immutable after creation * N net lock * t inpt_mtx pcb table mutex + * y inpt_notify pcb table rwlock for notify * p inpcb_mtx pcb mutex */ @@ -103,7 +105,7 @@ struct inpcb { LIST_ENTRY(inpcb) inp_hash; /* [t] local and foreign hash */ LIST_ENTRY(inpcb) inp_lhash; /* [t] local port hash */ TAILQ_ENTRY(inpcb) inp_queue; /* [t] inet PCB queue */ - SIMPLEQ_ENTRY(inpcb) inp_notify; /* [N] notify or udp append */ + SIMPLEQ_ENTRY(inpcb) inp_notify; /* [y] notify or udp append */ struct inpcbtable *inp_table; /* [I] inet queue/hash table */ union inpaddru inp_faddru; /* Foreign address. */ union inpaddru inp_laddru; /* Local address. */ @@ -166,6 +168,7 @@ LIST_HEAD(inpcbhead, inpcb); struct inpcbtable { struct mutex inpt_mtx; /* protect queue and hash */ + struct rwlock inpt_notify; /* protect inp_notify list */ TAILQ_HEAD(inpthead, inpcb) inpt_queue; /* [t] inet PCB queue */ struct inpcbhead *inpt_hashtbl; /* [t] local and foreign hash */ struct inpcbhead *inpt_lhashtbl; /* [t] local port hash */ Index: netinet/in_proto.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/in_proto.c,v retrieving revision 1.99 diff -u -p -r1.99 in_proto.c --- netinet/in_proto.c 15 Aug 2022 09:11:38 -0000 1.99 +++ netinet/in_proto.c 21 Aug 2022 15:06:27 -0000 @@ -185,7 +185,7 @@ const struct protosw inetsw[] = { .pr_type = SOCK_DGRAM, .pr_domain = &inetdomain, .pr_protocol = IPPROTO_UDP, - .pr_flags = PR_ATOMIC|PR_ADDR|PR_SPLICE, + .pr_flags = PR_ATOMIC|PR_ADDR|PR_SPLICE|PR_MPSAFE, .pr_input = udp_input, .pr_ctlinput = udp_ctlinput, .pr_ctloutput = ip_ctloutput, Index: netinet/ip_input.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_input.c,v retrieving revision 1.380 diff -u -p -r1.380 ip_input.c --- netinet/ip_input.c 21 Aug 2022 14:15:55 -0000 1.380 +++ netinet/ip_input.c 21 Aug 2022 15:06:27 -0000 @@ -230,6 +230,11 @@ ip_init(void) #endif } +struct ip_offnxt { + int ion_off; + int ion_nxt; +}; + /* * Enqueue packet for local delivery. Queuing is used as a boundary * between the network layer (input/forward path) running with @@ -246,6 +251,30 @@ ip_ours(struct mbuf **mp, int *offp, int if (af != AF_UNSPEC) return nxt; + nxt = ip_deliver(mp, offp, nxt, AF_INET, 1); + if (nxt == IPPROTO_DONE) + return IPPROTO_DONE; + + /* save values for later, use after dequeue */ + if (*offp != sizeof(struct ip)) { + struct m_tag *mtag; + struct ip_offnxt *ion; + + /* mbuf tags are expensive, but only used for header options */ + mtag = m_tag_get(PACKET_TAG_IP_OFFNXT, sizeof(*ion), + M_NOWAIT); + if (mtag == NULL) { + ipstat_inc(ips_idropped); + m_freemp(mp); + return IPPROTO_DONE; + } + ion = (struct ip_offnxt *)(mtag + 1); + ion->ion_off = *offp; + ion->ion_nxt = nxt; + + m_tag_prepend(*mp, mtag); + } + niq_enqueue(&ipintrq, *mp); *mp = NULL; return IPPROTO_DONE; @@ -261,18 +290,31 @@ ipintr(void) struct mbuf *m; while ((m = niq_dequeue(&ipintrq)) != NULL) { - struct ip *ip; + struct m_tag *mtag; int off, nxt; #ifdef DIAGNOSTIC if ((m->m_flags & M_PKTHDR) == 0) panic("ipintr no HDR"); #endif - ip = mtod(m, struct ip *); - off = ip->ip_hl << 2; - nxt = ip->ip_p; + mtag = m_tag_find(m, PACKET_TAG_IP_OFFNXT, NULL); + if (mtag != NULL) { + struct ip_offnxt *ion; + + ion = (struct ip_offnxt *)(mtag + 1); + off = ion->ion_off; + nxt = ion->ion_nxt; + + m_tag_delete(m, mtag); + } else { + struct ip *ip; - nxt = ip_deliver(&m, &off, nxt, AF_INET); + ip = mtod(m, struct ip *); + off = ip->ip_hl << 2; + nxt = ip->ip_p; + } + + nxt = ip_deliver(&m, &off, nxt, AF_INET, 0); KASSERT(nxt == IPPROTO_DONE); } } @@ -673,7 +715,7 @@ ip_fragcheck(struct mbuf **mp, int *offp #endif int -ip_deliver(struct mbuf **mp, int *offp, int nxt, int af) +ip_deliver(struct mbuf **mp, int *offp, int nxt, int af, int shared) { const struct protosw *psw; int naf = af; @@ -681,26 +723,24 @@ ip_deliver(struct mbuf **mp, int *offp, int nest = 0; #endif /* INET6 */ - NET_ASSERT_LOCKED_EXCLUSIVE(); - - /* pf might have modified stuff, might have to chksum */ - switch (af) { - case AF_INET: - in_proto_cksum_out(*mp, NULL); - break; -#ifdef INET6 - case AF_INET6: - in6_proto_cksum_out(*mp, NULL); - break; -#endif /* INET6 */ - } - /* * Tell launch routine the next header */ IPSTAT_INC(delivered); while (nxt != IPPROTO_DONE) { + switch (af) { + case AF_INET: + psw = &inetsw[ip_protox[nxt]]; + break; +#ifdef INET6 + case AF_INET6: + psw = &inet6sw[ip6_protox[nxt]]; + break; +#endif /* INET6 */ + } + if (shared && !ISSET(psw->pr_flags, PR_MPSAFE)) + break; #ifdef INET6 if (af == AF_INET6 && ip6_hdrnestlimit && (++nest > ip6_hdrnestlimit)) { @@ -737,16 +777,6 @@ ip_deliver(struct mbuf **mp, int *offp, case IPPROTO_IPV6: naf = AF_INET6; ip6stat_inc(ip6s_delivered); - break; -#endif /* INET6 */ - } - switch (af) { - case AF_INET: - psw = &inetsw[ip_protox[nxt]]; - break; -#ifdef INET6 - case AF_INET6: - psw = &inet6sw[ip6_protox[nxt]]; break; #endif /* INET6 */ } Index: netinet/ip_var.h =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_var.h,v retrieving revision 1.98 diff -u -p -r1.98 ip_var.h --- netinet/ip_var.h 20 Aug 2022 23:48:58 -0000 1.98 +++ netinet/ip_var.h 21 Aug 2022 15:06:27 -0000 @@ -249,7 +249,7 @@ int ip_sysctl(int *, u_int, void *, siz void ip_savecontrol(struct inpcb *, struct mbuf **, struct ip *, struct mbuf *); int ip_input_if(struct mbuf **, int *, int, int, struct ifnet *); -int ip_deliver(struct mbuf **, int *, int, int); +int ip_deliver(struct mbuf **, int *, int, int, int); void ip_forward(struct mbuf *, struct ifnet *, struct rtentry *, int); int rip_ctloutput(int, struct socket *, int, int, struct mbuf *); void rip_init(void); Index: netinet/raw_ip.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/raw_ip.c,v retrieving revision 1.131 diff -u -p -r1.131 raw_ip.c --- netinet/raw_ip.c 20 Aug 2022 23:48:58 -0000 1.131 +++ netinet/raw_ip.c 21 Aug 2022 15:06:27 -0000 @@ -159,8 +159,8 @@ rip_input(struct mbuf **mp, int *offp, i } } #endif - NET_ASSERT_LOCKED_EXCLUSIVE(); SIMPLEQ_INIT(&inpcblist); + rw_enter_write(&rawcbtable.inpt_notify); mtx_enter(&rawcbtable.inpt_mtx); TAILQ_FOREACH(inp, &rawcbtable.inpt_queue, inp_queue) { if (inp->inp_socket->so_state & SS_CANTRCVMORE) @@ -188,6 +188,8 @@ rip_input(struct mbuf **mp, int *offp, i mtx_leave(&rawcbtable.inpt_mtx); if (SIMPLEQ_EMPTY(&inpcblist)) { + rw_exit_write(&rawcbtable.inpt_notify); + if (ip->ip_p != IPPROTO_ICMP) icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_PROTOCOL, 0, 0); @@ -198,6 +200,8 @@ rip_input(struct mbuf **mp, int *offp, i counters[ips_noproto]++; counters[ips_delivered]--; counters_leave(&ref, ipcounters); + + return IPPROTO_DONE; } while ((inp = SIMPLEQ_FIRST(&inpcblist)) != NULL) { @@ -223,6 +227,8 @@ rip_input(struct mbuf **mp, int *offp, i } in_pcbunref(inp); } + rw_exit_write(&rawcbtable.inpt_notify); + return IPPROTO_DONE; } Index: netinet/udp_usrreq.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/udp_usrreq.c,v retrieving revision 1.283 diff -u -p -r1.283 udp_usrreq.c --- netinet/udp_usrreq.c 20 Aug 2022 23:48:58 -0000 1.283 +++ netinet/udp_usrreq.c 21 Aug 2022 15:23:21 -0000 @@ -122,10 +122,15 @@ u_int udp_sendspace = 9216; /* really m u_int udp_recvspace = 40 * (1024 + sizeof(struct sockaddr_in)); /* 40 1K datagrams */ +void udp_lock(struct socket *); +void udp_unlock(struct socket *); + const struct pr_usrreqs udp_usrreqs = { .pru_usrreq = udp_usrreq, .pru_attach = udp_attach, .pru_detach = udp_detach, + .pru_lock = udp_lock, + .pru_unlock = udp_unlock, .pru_bind = udp_bind, }; @@ -371,8 +376,8 @@ udp_input(struct mbuf **mp, int *offp, i * Locate pcb(s) for datagram. * (Algorithm copied from raw_intr().) */ - NET_ASSERT_LOCKED_EXCLUSIVE(); SIMPLEQ_INIT(&inpcblist); + rw_enter_write(&udbtable.inpt_notify); mtx_enter(&udbtable.inpt_mtx); TAILQ_FOREACH(inp, &udbtable.inpt_queue, inp_queue) { if (inp->inp_socket->so_state & SS_CANTRCVMORE) @@ -445,6 +450,7 @@ udp_input(struct mbuf **mp, int *offp, i mtx_leave(&udbtable.inpt_mtx); if (SIMPLEQ_EMPTY(&inpcblist)) { + rw_exit_write(&udbtable.inpt_notify); /* * No matching pcb found; discard datagram. * (No need to send an ICMP Port Unreachable @@ -468,6 +474,8 @@ udp_input(struct mbuf **mp, int *offp, i } in_pcbunref(inp); } + rw_exit_write(&udbtable.inpt_notify); + return IPPROTO_DONE; } /* @@ -648,12 +656,17 @@ udp_sbappend(struct inpcb *inp, struct m } #endif m_adj(m, hlen); + + mtx_enter(&inp->inp_mtx); if (sbappendaddr(so, &so->so_rcv, srcaddr, m, opts) == 0) { + mtx_leave(&inp->inp_mtx); udpstat_inc(udps_fullsock); m_freem(m); m_freem(opts); return; } + mtx_leave(&inp->inp_mtx); + sorwakeup(so); } @@ -1270,6 +1283,24 @@ udp_detach(struct socket *so) in_pcbdetach(inp); return (0); +} + +void +udp_lock(struct socket *so) +{ + struct inpcb *inp = sotoinpcb(so); + + NET_ASSERT_LOCKED(); + mtx_enter(&inp->inp_mtx); +} + +void +udp_unlock(struct socket *so) +{ + struct inpcb *inp = sotoinpcb(so); + + NET_ASSERT_LOCKED(); + mtx_leave(&inp->inp_mtx); } int Index: netinet6/in6_pcb.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/in6_pcb.c,v retrieving revision 1.119 diff -u -p -r1.119 in6_pcb.c --- netinet6/in6_pcb.c 8 Aug 2022 12:06:31 -0000 1.119 +++ netinet6/in6_pcb.c 21 Aug 2022 15:06:27 -0000 @@ -387,8 +387,6 @@ in6_pcbnotify(struct inpcbtable *table, u_int32_t flowinfo; u_int rdomain; - NET_ASSERT_LOCKED_EXCLUSIVE(); - if ((unsigned)cmd >= PRC_NCMDS) return; @@ -430,6 +428,7 @@ in6_pcbnotify(struct inpcbtable *table, SIMPLEQ_INIT(&inpcblist); rdomain = rtable_l2(rtable); + rw_enter_write(&table->inpt_notify); mtx_enter(&table->inpt_mtx); TAILQ_FOREACH(inp, &table->inpt_queue, inp_queue) { if ((inp->inp_flags & INP_IPV6) == 0) @@ -513,6 +512,7 @@ in6_pcbnotify(struct inpcbtable *table, (*notify)(inp, errno); in_pcbunref(inp); } + rw_exit_write(&table->inpt_notify); } struct inpcb * Index: netinet6/in6_proto.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/in6_proto.c,v retrieving revision 1.110 diff -u -p -r1.110 in6_proto.c --- netinet6/in6_proto.c 15 Aug 2022 09:11:39 -0000 1.110 +++ netinet6/in6_proto.c 21 Aug 2022 15:06:27 -0000 @@ -136,7 +136,7 @@ const struct protosw inet6sw[] = { .pr_type = SOCK_DGRAM, .pr_domain = &inet6domain, .pr_protocol = IPPROTO_UDP, - .pr_flags = PR_ATOMIC|PR_ADDR|PR_SPLICE, + .pr_flags = PR_ATOMIC|PR_ADDR|PR_SPLICE|PR_MPSAFE, .pr_input = udp_input, .pr_ctlinput = udp6_ctlinput, .pr_ctloutput = ip6_ctloutput, Index: netinet6/ip6_input.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/ip6_input.c,v retrieving revision 1.254 diff -u -p -r1.254 ip6_input.c --- netinet6/ip6_input.c 21 Aug 2022 14:15:55 -0000 1.254 +++ netinet6/ip6_input.c 21 Aug 2022 15:06:27 -0000 @@ -190,6 +190,10 @@ ip6_ours(struct mbuf **mp, int *offp, in if (af != AF_UNSPEC) return nxt; + nxt = ip_deliver(mp, offp, nxt, AF_INET6, 1); + if (nxt == IPPROTO_DONE) + return IPPROTO_DONE; + /* save values for later, use after dequeue */ if (*offp != sizeof(struct ip6_hdr)) { struct m_tag *mtag; @@ -248,7 +252,7 @@ ip6intr(void) off = sizeof(struct ip6_hdr); nxt = ip6->ip6_nxt; } - nxt = ip_deliver(&m, &off, nxt, AF_INET6); + nxt = ip_deliver(&m, &off, nxt, AF_INET6, 0); KASSERT(nxt == IPPROTO_DONE); } } Index: netinet6/raw_ip6.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/raw_ip6.c,v retrieving revision 1.151 diff -u -p -r1.151 raw_ip6.c --- netinet6/raw_ip6.c 20 Aug 2022 23:48:58 -0000 1.151 +++ netinet6/raw_ip6.c 21 Aug 2022 15:06:27 -0000 @@ -171,8 +171,8 @@ rip6_input(struct mbuf **mp, int *offp, } } #endif - NET_ASSERT_LOCKED_EXCLUSIVE(); SIMPLEQ_INIT(&inpcblist); + rw_enter_write(&rawin6pcbtable.inpt_notify); mtx_enter(&rawin6pcbtable.inpt_mtx); TAILQ_FOREACH(in6p, &rawin6pcbtable.inpt_queue, inp_queue) { if (in6p->inp_socket->so_state & SS_CANTRCVMORE) @@ -223,6 +223,8 @@ rip6_input(struct mbuf **mp, int *offp, struct counters_ref ref; uint64_t *counters; + rw_exit_write(&rawin6pcbtable.inpt_notify); + if (proto != IPPROTO_ICMPV6) { rip6stat_inc(rip6s_nosock); if (m->m_flags & M_MCAST) @@ -239,6 +241,8 @@ rip6_input(struct mbuf **mp, int *offp, counters = counters_enter(&ref, ip6counters); counters[ip6s_delivered]--; counters_leave(&ref, ip6counters); + + return IPPROTO_DONE; } while ((in6p = SIMPLEQ_FIRST(&inpcblist)) != NULL) { @@ -266,6 +270,8 @@ rip6_input(struct mbuf **mp, int *offp, } in_pcbunref(in6p); } + rw_exit_write(&rawin6pcbtable.inpt_notify); + return IPPROTO_DONE; } Index: sys/mbuf.h =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/sys/mbuf.h,v retrieving revision 1.255 diff -u -p -r1.255 mbuf.h --- sys/mbuf.h 15 Aug 2022 16:15:37 -0000 1.255 +++ sys/mbuf.h 21 Aug 2022 15:06:27 -0000 @@ -471,6 +471,8 @@ struct m_tag *m_tag_next(struct mbuf *, #define PACKET_TAG_IPSEC_IN_DONE 0x0001 /* IPsec applied, in */ #define PACKET_TAG_IPSEC_OUT_DONE 0x0002 /* IPsec applied, out */ #define PACKET_TAG_IPSEC_FLOWINFO 0x0004 /* IPsec flowinfo */ +#define PACKET_TAG_IP_OFFNXT 0x0010 /* IPv4 offset and next proto */ +#define PACKET_TAG_IP6_OFFNXT 0x0020 /* IPv6 offset and next proto */ #define PACKET_TAG_WIREGUARD 0x0040 /* WireGuard data */ #define PACKET_TAG_GRE 0x0080 /* GRE processing done */ #define PACKET_TAG_DLT 0x0100 /* data link layer type */ @@ -479,7 +481,6 @@ struct m_tag *m_tag_next(struct mbuf *, #define PACKET_TAG_SRCROUTE 0x1000 /* IPv4 source routing options */ #define PACKET_TAG_TUNNEL 0x2000 /* Tunnel endpoint address */ #define PACKET_TAG_CARP_BAL_IP 0x4000 /* carp(4) ip balanced marker */ -#define PACKET_TAG_IP6_OFFNXT 0x8000 /* IPv6 offset and next proto */ #define MTAG_BITS \ ("\20\1IPSEC_IN_DONE\2IPSEC_OUT_DONE\3IPSEC_FLOWINFO" \ Index: sys/protosw.h =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/sys/protosw.h,v retrieving revision 1.38 diff -u -p -r1.38 protosw.h --- sys/protosw.h 20 Aug 2022 23:48:58 -0000 1.38 +++ sys/protosw.h 21 Aug 2022 15:14:59 -0000 @@ -66,6 +66,8 @@ struct pr_usrreqs { int (*pru_attach)(struct socket *, int); int (*pru_detach)(struct socket *); + void (*pru_lock)(struct socket *); + void (*pru_unlock)(struct socket *); int (*pru_bind)(struct socket *, struct mbuf *, struct proc *); }; @@ -113,6 +115,7 @@ struct protosw { #define PR_ABRTACPTDIS 0x20 /* abort on accept(2) to disconnected socket */ #define PR_SPLICE 0x40 /* socket splicing is possible */ +#define PR_MPSAFE 0x80 /* input runs with shared netlock */ /* * The arguments to usrreq are: @@ -260,6 +263,18 @@ static inline int pru_detach(struct socket *so) { return (*so->so_proto->pr_usrreqs->pru_detach)(so); +} + +static inline void +pru_lock(struct socket *so) +{ + (*so->so_proto->pr_usrreqs->pru_lock)(so); +} + +static inline void +pru_unlock(struct socket *so) +{ + (*so->so_proto->pr_usrreqs->pru_unlock)(so); } static inline int Index: sys/socketvar.h =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/sys/socketvar.h,v retrieving revision 1.107 diff -u -p -r1.107 socketvar.h --- sys/socketvar.h 13 Aug 2022 21:01:46 -0000 1.107 +++ sys/socketvar.h 21 Aug 2022 15:09:30 -0000 @@ -349,9 +349,11 @@ int sockargs(struct mbuf **, const void int sosleep_nsec(struct socket *, void *, int, const char *, uint64_t); void solock(struct socket *); +void solock_shared(struct socket *); int solock_persocket(struct socket *); void solock_pair(struct socket *, struct socket *); void sounlock(struct socket *); +void sounlock_shared(struct socket *); int sendit(struct proc *, int, struct msghdr *, int, register_t *); int recvit(struct proc *, int, struct msghdr *, caddr_t, register_t *);