On Mon, Nov 04, 2024 at 03:58:38PM +0100, Alexander Bluhm wrote: > On Sun, Oct 27, 2024 at 02:11:22AM +0300, Vitaliy Makkoveev wrote: > > I just checked this diff with 7.6-stable. It perfectly applies to > > sources, compiles and runs. I'm not surprised, because -current is very > > close to release. It seems you did something wrong. > > > > The attached diff was made against 7.6-stable. As you can see it is the > > same. Please be sure your mail client doesn't drop spaces in the > > beginning of line or something else. > > I finally got vxlan over UDP multicast working and did send UDP > multicast packets over it. For now it is only one packet, no > multicore performance test yet. Multicast receive, route, send > over vxlan works, after I have tweaked mvs@ inpcb iterator diff. > > Changes to the previous version: > - PCB Iterator is only checked for UDP sockets. If we implement > it for others, we can adopt. > - tmp = TAILQ_NEXT(inp, inp_queue) was wrong, it must be > tmp = TAILQ_NEXT((struct inpcb *)iter, inp_queue) > - in_pcbunref(inp) automatically does a NULL check. > - When exiting the iterater loop early, in_pcb_iterator_abort() > removes the iterator from the queue. > - Rename tinp to last. This was the name in 4.4BSD before I > splitted the loop into two. > > mvs@, what do you think about this? >
Your diff is ok by me. I forgot to remove `iter' in the early exit case, that's why my diff crashed. in_pcb_iterator_abort() fixes this. We also could save a little stack space by moving `inp_queue' and `inp_table' to the beginning of 'inpcb' structure, but this reordering could be done separately. > I am throwing this on regress right now. And as mentioned before, > I should implement automatic UDP multicast tests over vxlan. With > that, unlocking multicast in general will be easier. > > bluhm > > Index: kern/kern_sysctl.c > =================================================================== > RCS file: /data/mirror/openbsd/cvs/src/sys/kern/kern_sysctl.c,v > diff -u -p -r1.451 kern_sysctl.c > --- kern/kern_sysctl.c 31 Oct 2024 10:06:51 -0000 1.451 > +++ kern/kern_sysctl.c 4 Nov 2024 10:10:34 -0000 > @@ -1689,13 +1689,19 @@ sysctl_file(int *name, u_int namelen, ch > mtx_leave(&tcb6table.inpt_mtx); > #endif > mtx_enter(&udbtable.inpt_mtx); > - TAILQ_FOREACH(inp, &udbtable.inpt_queue, inp_queue) > + TAILQ_FOREACH(inp, &udbtable.inpt_queue, inp_queue) { > + if (in_pcb_is_iterator(inp)) > + continue; > FILLSO(inp->inp_socket); > + } > mtx_leave(&udbtable.inpt_mtx); > #ifdef INET6 > mtx_enter(&udb6table.inpt_mtx); > - TAILQ_FOREACH(inp, &udb6table.inpt_queue, inp_queue) > + TAILQ_FOREACH(inp, &udb6table.inpt_queue, inp_queue) { > + if (in_pcb_is_iterator(inp)) > + continue; > FILLSO(inp->inp_socket); > + } > mtx_leave(&udb6table.inpt_mtx); > #endif > mtx_enter(&rawcbtable.inpt_mtx); > Index: netinet/in_pcb.c > =================================================================== > RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/in_pcb.c,v > diff -u -p -r1.303 in_pcb.c > --- netinet/in_pcb.c 12 Jul 2024 19:50:35 -0000 1.303 > +++ netinet/in_pcb.c 4 Nov 2024 13:49:46 -0000 > @@ -644,6 +644,55 @@ in_pcbunref(struct inpcb *inp) > pool_put(&inpcb_pool, inp); > } > > +struct inpcb * > +in_pcb_iterator(struct inpcbtable *table, struct inpcb *inp, > + struct inpcb_iterator *iter) > +{ > + struct inpcb *tmp; > + > + mtx_enter(&table->inpt_mtx); > + > + if (inp) > + tmp = TAILQ_NEXT((struct inpcb *)iter, inp_queue); > + else > + tmp = TAILQ_FIRST(&table->inpt_queue); > + > + while (tmp && tmp->inp_table == NULL) > + tmp = TAILQ_NEXT(tmp, inp_queue); > + > + if (inp) { > + TAILQ_REMOVE(&table->inpt_queue, (struct inpcb *)iter, > + inp_queue); > + } > + if (tmp) { > + TAILQ_INSERT_AFTER(&table->inpt_queue, tmp, > + (struct inpcb *)iter, inp_queue); > + in_pcbref(tmp); > + } > + > + mtx_leave(&table->inpt_mtx); > + > + in_pcbunref(inp); > + > + return tmp; > +} > + > +void > +in_pcb_iterator_abort(struct inpcbtable *table, struct inpcb *inp, > + struct inpcb_iterator *iter) > +{ > + mtx_enter(&table->inpt_mtx); > + > + if (inp) { > + TAILQ_REMOVE(&table->inpt_queue, (struct inpcb *)iter, > + inp_queue); > + } > + > + mtx_leave(&table->inpt_mtx); > + > + in_pcbunref(inp); > +} > + > void > in_setsockaddr(struct inpcb *inp, struct mbuf *nam) > { > @@ -743,6 +792,8 @@ in_pcbnotifyall(struct inpcbtable *table > rw_enter_write(&table->inpt_notify); > mtx_enter(&table->inpt_mtx); > TAILQ_FOREACH(inp, &table->inpt_queue, inp_queue) { > + if (in_pcb_is_iterator(inp)) > + continue; > KASSERT(!ISSET(inp->inp_flags, INP_IPV6)); > > if (inp->inp_faddr.s_addr != dst->sin_addr.s_addr || > @@ -1098,6 +1149,8 @@ in_pcbresize(struct inpcbtable *table, i > table->inpt_size = hashsize; > > TAILQ_FOREACH(inp, &table->inpt_queue, inp_queue) { > + if (in_pcb_is_iterator(inp)) > + continue; > LIST_REMOVE(inp, inp_lhash); > LIST_REMOVE(inp, inp_hash); > in_pcbhash_insert(inp); > Index: netinet/in_pcb.h > =================================================================== > RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/in_pcb.h,v > diff -u -p -r1.158 in_pcb.h > --- netinet/in_pcb.h 12 Jul 2024 19:50:35 -0000 1.158 > +++ netinet/in_pcb.h 4 Nov 2024 13:49:40 -0000 > @@ -178,6 +178,20 @@ struct inpcb { > > LIST_HEAD(inpcbhead, inpcb); > > +struct inpcb_iterator { > + LIST_ENTRY(inpcb) inp_hash; /* unused */ > + LIST_ENTRY(inpcb) inp_lhash; /* unused */ > + TAILQ_ENTRY(inpcb) inp_queue; /* [t] inet PCB queue */ > + SIMPLEQ_ENTRY(inpcb) inp_notify; /* unused */ > + struct inpcbtable *inp_table; /* [I] always NULL */ > +}; > + > +static inline int > +in_pcb_is_iterator(struct inpcb *inp) > +{ > + return (inp->inp_table == NULL ? 1 : 0); > +} > + > struct inpcbtable { > struct mutex inpt_mtx; /* protect queue and hash */ > struct rwlock inpt_notify; /* protect inp_notify list */ > @@ -302,6 +316,11 @@ struct inpcb * > in_pcbref(struct inpcb *); > void in_pcbunref(struct inpcb *); > void in_pcbdisconnect(struct inpcb *); > +struct inpcb * > + in_pcb_iterator(struct inpcbtable *, struct inpcb *, > + struct inpcb_iterator *); > +void in_pcb_iterator_abort(struct inpcbtable *, struct inpcb *, > + struct inpcb_iterator *); > struct inpcb * > in_pcblookup(struct inpcbtable *, struct in_addr, > u_int, struct in_addr, u_int, u_int); > Index: netinet/udp_usrreq.c > =================================================================== > RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/udp_usrreq.c,v > diff -u -p -r1.325 udp_usrreq.c > --- netinet/udp_usrreq.c 3 Nov 2024 14:28:06 -0000 1.325 > +++ netinet/udp_usrreq.c 4 Nov 2024 13:48:51 -0000 > @@ -382,7 +382,8 @@ udp_input(struct mbuf **mp, int *offp, i > } > > if (m->m_flags & (M_BCAST|M_MCAST)) { > - SIMPLEQ_HEAD(, inpcb) inpcblist; > + struct inpcb_iterator iter = {.inp_table = NULL}; > + struct inpcb *last; > struct inpcbtable *table; > > /* > @@ -401,11 +402,6 @@ udp_input(struct mbuf **mp, int *offp, i > * fixing the interface. Maybe 4.5BSD will remedy this?) > */ > > - /* > - * Locate pcb(s) for datagram. > - * (Algorithm copied from raw_intr().) > - */ > - SIMPLEQ_INIT(&inpcblist); > #ifdef INET6 > if (ip6) > table = &udb6table; > @@ -413,9 +409,8 @@ udp_input(struct mbuf **mp, int *offp, i > #endif > table = &udbtable; > > - rw_enter_write(&table->inpt_notify); > - mtx_enter(&table->inpt_mtx); > - TAILQ_FOREACH(inp, &table->inpt_queue, inp_queue) { > + last = inp = NULL; > + while ((inp = in_pcb_iterator(table, inp, &iter)) != NULL) { > if (ip6) > KASSERT(ISSET(inp->inp_flags, INP_IPV6)); > else > @@ -466,8 +461,18 @@ udp_input(struct mbuf **mp, int *offp, i > continue; > } > > - in_pcbref(inp); > - SIMPLEQ_INSERT_TAIL(&inpcblist, inp, inp_notify); > + if (last != NULL) { > + struct mbuf *n; > + > + n = m_copym(m, 0, M_COPYALL, M_NOWAIT); > + if (n != NULL) { > + udp_sbappend(last, n, ip, ip6, iphlen, > + uh, &srcsa.sa, 0); > + } > + in_pcbunref(last); > + } > + > + last = in_pcbref(inp); > > /* > * Don't look for additional matches if this one does > @@ -478,14 +483,13 @@ udp_input(struct mbuf **mp, int *offp, i > * clear these options after setting them. > */ > if ((inp->inp_socket->so_options & (SO_REUSEPORT | > - SO_REUSEADDR)) == 0) > + SO_REUSEADDR)) == 0) { > + in_pcb_iterator_abort(table, inp, &iter); > break; > + } > } > - mtx_leave(&table->inpt_mtx); > - > - if (SIMPLEQ_EMPTY(&inpcblist)) { > - rw_exit_write(&table->inpt_notify); > > + if (last == NULL) { > /* > * No matching pcb found; discard datagram. > * (No need to send an ICMP Port Unreachable > @@ -495,21 +499,8 @@ udp_input(struct mbuf **mp, int *offp, i > goto bad; > } > > - while ((inp = SIMPLEQ_FIRST(&inpcblist)) != NULL) { > - struct mbuf *n; > - > - SIMPLEQ_REMOVE_HEAD(&inpcblist, inp_notify); > - if (SIMPLEQ_EMPTY(&inpcblist)) > - n = m; > - else > - n = m_copym(m, 0, M_COPYALL, M_NOWAIT); > - if (n != NULL) { > - udp_sbappend(inp, n, ip, ip6, iphlen, uh, > - &srcsa.sa, 0); > - } > - in_pcbunref(inp); > - } > - rw_exit_write(&table->inpt_notify); > + udp_sbappend(last, m, ip, ip6, iphlen, uh, &srcsa.sa, 0); > + in_pcbunref(last); > > return IPPROTO_DONE; > } > Index: netinet6/in6_pcb.c > =================================================================== > RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/in6_pcb.c,v > diff -u -p -r1.144 in6_pcb.c > --- netinet6/in6_pcb.c 12 Apr 2024 16:07:09 -0000 1.144 > +++ netinet6/in6_pcb.c 3 Nov 2024 17:58:52 -0000 > @@ -479,6 +479,8 @@ in6_pcbnotify(struct inpcbtable *table, > rw_enter_write(&table->inpt_notify); > mtx_enter(&table->inpt_mtx); > TAILQ_FOREACH(inp, &table->inpt_queue, inp_queue) { > + if (in_pcb_is_iterator(inp)) > + continue; > KASSERT(ISSET(inp->inp_flags, INP_IPV6)); > > /* >