On Sun, Oct 27, 2024 at 02:11:22AM +0300, Vitaliy Makkoveev wrote: > I just checked this diff with 7.6-stable. It perfectly applies to > sources, compiles and runs. I'm not surprised, because -current is very > close to release. It seems you did something wrong. > > The attached diff was made against 7.6-stable. As you can see it is the > same. Please be sure your mail client doesn't drop spaces in the > beginning of line or something else.
I finally got vxlan over UDP multicast working and did send UDP multicast packets over it. For now it is only one packet, no multicore performance test yet. Multicast receive, route, send over vxlan works, after I have tweaked mvs@ inpcb iterator diff. Changes to the previous version: - PCB Iterator is only checked for UDP sockets. If we implement it for others, we can adopt. - tmp = TAILQ_NEXT(inp, inp_queue) was wrong, it must be tmp = TAILQ_NEXT((struct inpcb *)iter, inp_queue) - in_pcbunref(inp) automatically does a NULL check. - When exiting the iterater loop early, in_pcb_iterator_abort() removes the iterator from the queue. - Rename tinp to last. This was the name in 4.4BSD before I splitted the loop into two. mvs@, what do you think about this? I am throwing this on regress right now. And as mentioned before, I should implement automatic UDP multicast tests over vxlan. With that, unlocking multicast in general will be easier. bluhm Index: kern/kern_sysctl.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/kern/kern_sysctl.c,v diff -u -p -r1.451 kern_sysctl.c --- kern/kern_sysctl.c 31 Oct 2024 10:06:51 -0000 1.451 +++ kern/kern_sysctl.c 4 Nov 2024 10:10:34 -0000 @@ -1689,13 +1689,19 @@ sysctl_file(int *name, u_int namelen, ch mtx_leave(&tcb6table.inpt_mtx); #endif mtx_enter(&udbtable.inpt_mtx); - TAILQ_FOREACH(inp, &udbtable.inpt_queue, inp_queue) + TAILQ_FOREACH(inp, &udbtable.inpt_queue, inp_queue) { + if (in_pcb_is_iterator(inp)) + continue; FILLSO(inp->inp_socket); + } mtx_leave(&udbtable.inpt_mtx); #ifdef INET6 mtx_enter(&udb6table.inpt_mtx); - TAILQ_FOREACH(inp, &udb6table.inpt_queue, inp_queue) + TAILQ_FOREACH(inp, &udb6table.inpt_queue, inp_queue) { + if (in_pcb_is_iterator(inp)) + continue; FILLSO(inp->inp_socket); + } mtx_leave(&udb6table.inpt_mtx); #endif mtx_enter(&rawcbtable.inpt_mtx); Index: netinet/in_pcb.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/in_pcb.c,v diff -u -p -r1.303 in_pcb.c --- netinet/in_pcb.c 12 Jul 2024 19:50:35 -0000 1.303 +++ netinet/in_pcb.c 4 Nov 2024 13:49:46 -0000 @@ -644,6 +644,55 @@ in_pcbunref(struct inpcb *inp) pool_put(&inpcb_pool, inp); } +struct inpcb * +in_pcb_iterator(struct inpcbtable *table, struct inpcb *inp, + struct inpcb_iterator *iter) +{ + struct inpcb *tmp; + + mtx_enter(&table->inpt_mtx); + + if (inp) + tmp = TAILQ_NEXT((struct inpcb *)iter, inp_queue); + else + tmp = TAILQ_FIRST(&table->inpt_queue); + + while (tmp && tmp->inp_table == NULL) + tmp = TAILQ_NEXT(tmp, inp_queue); + + if (inp) { + TAILQ_REMOVE(&table->inpt_queue, (struct inpcb *)iter, + inp_queue); + } + if (tmp) { + TAILQ_INSERT_AFTER(&table->inpt_queue, tmp, + (struct inpcb *)iter, inp_queue); + in_pcbref(tmp); + } + + mtx_leave(&table->inpt_mtx); + + in_pcbunref(inp); + + return tmp; +} + +void +in_pcb_iterator_abort(struct inpcbtable *table, struct inpcb *inp, + struct inpcb_iterator *iter) +{ + mtx_enter(&table->inpt_mtx); + + if (inp) { + TAILQ_REMOVE(&table->inpt_queue, (struct inpcb *)iter, + inp_queue); + } + + mtx_leave(&table->inpt_mtx); + + in_pcbunref(inp); +} + void in_setsockaddr(struct inpcb *inp, struct mbuf *nam) { @@ -743,6 +792,8 @@ in_pcbnotifyall(struct inpcbtable *table rw_enter_write(&table->inpt_notify); mtx_enter(&table->inpt_mtx); TAILQ_FOREACH(inp, &table->inpt_queue, inp_queue) { + if (in_pcb_is_iterator(inp)) + continue; KASSERT(!ISSET(inp->inp_flags, INP_IPV6)); if (inp->inp_faddr.s_addr != dst->sin_addr.s_addr || @@ -1098,6 +1149,8 @@ in_pcbresize(struct inpcbtable *table, i table->inpt_size = hashsize; TAILQ_FOREACH(inp, &table->inpt_queue, inp_queue) { + if (in_pcb_is_iterator(inp)) + continue; LIST_REMOVE(inp, inp_lhash); LIST_REMOVE(inp, inp_hash); in_pcbhash_insert(inp); Index: netinet/in_pcb.h =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/in_pcb.h,v diff -u -p -r1.158 in_pcb.h --- netinet/in_pcb.h 12 Jul 2024 19:50:35 -0000 1.158 +++ netinet/in_pcb.h 4 Nov 2024 13:49:40 -0000 @@ -178,6 +178,20 @@ struct inpcb { LIST_HEAD(inpcbhead, inpcb); +struct inpcb_iterator { + LIST_ENTRY(inpcb) inp_hash; /* unused */ + LIST_ENTRY(inpcb) inp_lhash; /* unused */ + TAILQ_ENTRY(inpcb) inp_queue; /* [t] inet PCB queue */ + SIMPLEQ_ENTRY(inpcb) inp_notify; /* unused */ + struct inpcbtable *inp_table; /* [I] always NULL */ +}; + +static inline int +in_pcb_is_iterator(struct inpcb *inp) +{ + return (inp->inp_table == NULL ? 1 : 0); +} + struct inpcbtable { struct mutex inpt_mtx; /* protect queue and hash */ struct rwlock inpt_notify; /* protect inp_notify list */ @@ -302,6 +316,11 @@ struct inpcb * in_pcbref(struct inpcb *); void in_pcbunref(struct inpcb *); void in_pcbdisconnect(struct inpcb *); +struct inpcb * + in_pcb_iterator(struct inpcbtable *, struct inpcb *, + struct inpcb_iterator *); +void in_pcb_iterator_abort(struct inpcbtable *, struct inpcb *, + struct inpcb_iterator *); struct inpcb * in_pcblookup(struct inpcbtable *, struct in_addr, u_int, struct in_addr, u_int, u_int); Index: netinet/udp_usrreq.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/udp_usrreq.c,v diff -u -p -r1.325 udp_usrreq.c --- netinet/udp_usrreq.c 3 Nov 2024 14:28:06 -0000 1.325 +++ netinet/udp_usrreq.c 4 Nov 2024 13:48:51 -0000 @@ -382,7 +382,8 @@ udp_input(struct mbuf **mp, int *offp, i } if (m->m_flags & (M_BCAST|M_MCAST)) { - SIMPLEQ_HEAD(, inpcb) inpcblist; + struct inpcb_iterator iter = {.inp_table = NULL}; + struct inpcb *last; struct inpcbtable *table; /* @@ -401,11 +402,6 @@ udp_input(struct mbuf **mp, int *offp, i * fixing the interface. Maybe 4.5BSD will remedy this?) */ - /* - * Locate pcb(s) for datagram. - * (Algorithm copied from raw_intr().) - */ - SIMPLEQ_INIT(&inpcblist); #ifdef INET6 if (ip6) table = &udb6table; @@ -413,9 +409,8 @@ udp_input(struct mbuf **mp, int *offp, i #endif table = &udbtable; - rw_enter_write(&table->inpt_notify); - mtx_enter(&table->inpt_mtx); - TAILQ_FOREACH(inp, &table->inpt_queue, inp_queue) { + last = inp = NULL; + while ((inp = in_pcb_iterator(table, inp, &iter)) != NULL) { if (ip6) KASSERT(ISSET(inp->inp_flags, INP_IPV6)); else @@ -466,8 +461,18 @@ udp_input(struct mbuf **mp, int *offp, i continue; } - in_pcbref(inp); - SIMPLEQ_INSERT_TAIL(&inpcblist, inp, inp_notify); + if (last != NULL) { + struct mbuf *n; + + n = m_copym(m, 0, M_COPYALL, M_NOWAIT); + if (n != NULL) { + udp_sbappend(last, n, ip, ip6, iphlen, + uh, &srcsa.sa, 0); + } + in_pcbunref(last); + } + + last = in_pcbref(inp); /* * Don't look for additional matches if this one does @@ -478,14 +483,13 @@ udp_input(struct mbuf **mp, int *offp, i * clear these options after setting them. */ if ((inp->inp_socket->so_options & (SO_REUSEPORT | - SO_REUSEADDR)) == 0) + SO_REUSEADDR)) == 0) { + in_pcb_iterator_abort(table, inp, &iter); break; + } } - mtx_leave(&table->inpt_mtx); - - if (SIMPLEQ_EMPTY(&inpcblist)) { - rw_exit_write(&table->inpt_notify); + if (last == NULL) { /* * No matching pcb found; discard datagram. * (No need to send an ICMP Port Unreachable @@ -495,21 +499,8 @@ udp_input(struct mbuf **mp, int *offp, i goto bad; } - while ((inp = SIMPLEQ_FIRST(&inpcblist)) != NULL) { - struct mbuf *n; - - SIMPLEQ_REMOVE_HEAD(&inpcblist, inp_notify); - if (SIMPLEQ_EMPTY(&inpcblist)) - n = m; - else - n = m_copym(m, 0, M_COPYALL, M_NOWAIT); - if (n != NULL) { - udp_sbappend(inp, n, ip, ip6, iphlen, uh, - &srcsa.sa, 0); - } - in_pcbunref(inp); - } - rw_exit_write(&table->inpt_notify); + udp_sbappend(last, m, ip, ip6, iphlen, uh, &srcsa.sa, 0); + in_pcbunref(last); return IPPROTO_DONE; } Index: netinet6/in6_pcb.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/in6_pcb.c,v diff -u -p -r1.144 in6_pcb.c --- netinet6/in6_pcb.c 12 Apr 2024 16:07:09 -0000 1.144 +++ netinet6/in6_pcb.c 3 Nov 2024 17:58:52 -0000 @@ -479,6 +479,8 @@ in6_pcbnotify(struct inpcbtable *table, rw_enter_write(&table->inpt_notify); mtx_enter(&table->inpt_mtx); TAILQ_FOREACH(inp, &table->inpt_queue, inp_queue) { + if (in_pcb_is_iterator(inp)) + continue; KASSERT(ISSET(inp->inp_flags, INP_IPV6)); /*