On Mon, Nov 04, 2024 at 03:58:38PM +0100, Alexander Bluhm wrote:
> On Sun, Oct 27, 2024 at 02:11:22AM +0300, Vitaliy Makkoveev wrote:
> > I just checked this diff with 7.6-stable. It perfectly applies to
> > sources, compiles and runs. I'm not surprised, because -current is very
> > close to release. It seems you did something wrong.
> > 
> > The attached diff was made against 7.6-stable. As you can see it is the
> > same. Please be sure your mail client doesn't drop spaces in the
> > beginning of line or something else.
> 
> I finally got vxlan over UDP multicast working and did send UDP
> multicast packets over it.  For now it is only one packet, no
> multicore performance test yet.  Multicast receive, route, send
> over vxlan works, after I have tweaked mvs@ inpcb iterator diff.
> 
> Changes to the previous version:
> - PCB Iterator is only checked for UDP sockets.  If we implement
>   it for others, we can adopt.
> - tmp = TAILQ_NEXT(inp, inp_queue) was wrong, it must be
>   tmp = TAILQ_NEXT((struct inpcb *)iter, inp_queue)
> - in_pcbunref(inp) automatically does a NULL check.
> - When exiting the iterater loop early, in_pcb_iterator_abort()
>   removes the iterator from the queue.
> - Rename tinp to last.  This was the name in 4.4BSD before I
>   splitted the loop into two.
> 
> mvs@, what do you think about this?
> 

Your diff is ok by me. I forgot to remove `iter' in the early exit case,
that's why my diff crashed. in_pcb_iterator_abort() fixes this.

We also could save a little stack space by moving `inp_queue' and
`inp_table' to the beginning of 'inpcb' structure, but this reordering
could be done separately.

> I am throwing this on regress right now.  And as mentioned before,
> I should implement automatic UDP multicast tests over vxlan.  With
> that, unlocking multicast in general will be easier.
> 
> bluhm
> 
> Index: kern/kern_sysctl.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/kern/kern_sysctl.c,v
> diff -u -p -r1.451 kern_sysctl.c
> --- kern/kern_sysctl.c        31 Oct 2024 10:06:51 -0000      1.451
> +++ kern/kern_sysctl.c        4 Nov 2024 10:10:34 -0000
> @@ -1689,13 +1689,19 @@ sysctl_file(int *name, u_int namelen, ch
>                       mtx_leave(&tcb6table.inpt_mtx);
>  #endif
>                       mtx_enter(&udbtable.inpt_mtx);
> -                     TAILQ_FOREACH(inp, &udbtable.inpt_queue, inp_queue)
> +                     TAILQ_FOREACH(inp, &udbtable.inpt_queue, inp_queue) {
> +                             if (in_pcb_is_iterator(inp))
> +                                     continue;
>                               FILLSO(inp->inp_socket);
> +                     }
>                       mtx_leave(&udbtable.inpt_mtx);
>  #ifdef INET6
>                       mtx_enter(&udb6table.inpt_mtx);
> -                     TAILQ_FOREACH(inp, &udb6table.inpt_queue, inp_queue)
> +                     TAILQ_FOREACH(inp, &udb6table.inpt_queue, inp_queue) {
> +                             if (in_pcb_is_iterator(inp))
> +                                     continue;
>                               FILLSO(inp->inp_socket);
> +                     }
>                       mtx_leave(&udb6table.inpt_mtx);
>  #endif
>                       mtx_enter(&rawcbtable.inpt_mtx);
> Index: netinet/in_pcb.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/in_pcb.c,v
> diff -u -p -r1.303 in_pcb.c
> --- netinet/in_pcb.c  12 Jul 2024 19:50:35 -0000      1.303
> +++ netinet/in_pcb.c  4 Nov 2024 13:49:46 -0000
> @@ -644,6 +644,55 @@ in_pcbunref(struct inpcb *inp)
>       pool_put(&inpcb_pool, inp);
>  }
>  
> +struct inpcb *
> +in_pcb_iterator(struct inpcbtable *table, struct inpcb *inp,
> +    struct inpcb_iterator *iter)
> +{
> +     struct inpcb *tmp;
> +
> +     mtx_enter(&table->inpt_mtx);
> +
> +     if (inp)
> +             tmp = TAILQ_NEXT((struct inpcb *)iter, inp_queue);
> +     else
> +             tmp = TAILQ_FIRST(&table->inpt_queue);
> +
> +     while (tmp && tmp->inp_table == NULL)
> +             tmp = TAILQ_NEXT(tmp, inp_queue);
> +
> +     if (inp) {
> +             TAILQ_REMOVE(&table->inpt_queue, (struct inpcb *)iter,
> +                 inp_queue);
> +     }
> +     if (tmp) {
> +             TAILQ_INSERT_AFTER(&table->inpt_queue, tmp,
> +                 (struct inpcb *)iter, inp_queue);
> +             in_pcbref(tmp);
> +     }
> +
> +     mtx_leave(&table->inpt_mtx);
> +
> +     in_pcbunref(inp);
> +     
> +     return tmp;
> +}
> +
> +void
> +in_pcb_iterator_abort(struct inpcbtable *table, struct inpcb *inp,
> +    struct inpcb_iterator *iter)
> +{
> +     mtx_enter(&table->inpt_mtx);
> +
> +     if (inp) {
> +             TAILQ_REMOVE(&table->inpt_queue, (struct inpcb *)iter,
> +                 inp_queue);
> +     }
> +
> +     mtx_leave(&table->inpt_mtx);
> +
> +     in_pcbunref(inp);
> +}
> +
>  void
>  in_setsockaddr(struct inpcb *inp, struct mbuf *nam)
>  {
> @@ -743,6 +792,8 @@ in_pcbnotifyall(struct inpcbtable *table
>       rw_enter_write(&table->inpt_notify);
>       mtx_enter(&table->inpt_mtx);
>       TAILQ_FOREACH(inp, &table->inpt_queue, inp_queue) {
> +             if (in_pcb_is_iterator(inp))
> +                     continue;
>               KASSERT(!ISSET(inp->inp_flags, INP_IPV6));
>  
>               if (inp->inp_faddr.s_addr != dst->sin_addr.s_addr ||
> @@ -1098,6 +1149,8 @@ in_pcbresize(struct inpcbtable *table, i
>       table->inpt_size = hashsize;
>  
>       TAILQ_FOREACH(inp, &table->inpt_queue, inp_queue) {
> +             if (in_pcb_is_iterator(inp))
> +                     continue;
>               LIST_REMOVE(inp, inp_lhash);
>               LIST_REMOVE(inp, inp_hash);
>               in_pcbhash_insert(inp);
> Index: netinet/in_pcb.h
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/in_pcb.h,v
> diff -u -p -r1.158 in_pcb.h
> --- netinet/in_pcb.h  12 Jul 2024 19:50:35 -0000      1.158
> +++ netinet/in_pcb.h  4 Nov 2024 13:49:40 -0000
> @@ -178,6 +178,20 @@ struct inpcb {
>  
>  LIST_HEAD(inpcbhead, inpcb);
>  
> +struct inpcb_iterator {
> +     LIST_ENTRY(inpcb) inp_hash;             /* unused */
> +     LIST_ENTRY(inpcb) inp_lhash;            /* unused */
> +     TAILQ_ENTRY(inpcb) inp_queue;           /* [t] inet PCB queue */
> +     SIMPLEQ_ENTRY(inpcb) inp_notify;        /* unused */
> +     struct    inpcbtable *inp_table;        /* [I] always NULL */
> +};
> +
> +static inline int
> +in_pcb_is_iterator(struct inpcb *inp)
> +{
> +     return (inp->inp_table == NULL ? 1 : 0);
> +}
> +
>  struct inpcbtable {
>       struct mutex inpt_mtx;                  /* protect queue and hash */
>       struct rwlock inpt_notify;              /* protect inp_notify list */
> @@ -302,6 +316,11 @@ struct inpcb *
>        in_pcbref(struct inpcb *);
>  void  in_pcbunref(struct inpcb *);
>  void  in_pcbdisconnect(struct inpcb *);
> +struct inpcb *
> +      in_pcb_iterator(struct inpcbtable *, struct inpcb *,
> +         struct inpcb_iterator *);
> +void  in_pcb_iterator_abort(struct inpcbtable *, struct inpcb *,
> +         struct inpcb_iterator *);
>  struct inpcb *
>        in_pcblookup(struct inpcbtable *, struct in_addr,
>                              u_int, struct in_addr, u_int, u_int);
> Index: netinet/udp_usrreq.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/udp_usrreq.c,v
> diff -u -p -r1.325 udp_usrreq.c
> --- netinet/udp_usrreq.c      3 Nov 2024 14:28:06 -0000       1.325
> +++ netinet/udp_usrreq.c      4 Nov 2024 13:48:51 -0000
> @@ -382,7 +382,8 @@ udp_input(struct mbuf **mp, int *offp, i
>       }
>  
>       if (m->m_flags & (M_BCAST|M_MCAST)) {
> -             SIMPLEQ_HEAD(, inpcb) inpcblist;
> +             struct inpcb_iterator iter = {.inp_table = NULL};
> +             struct inpcb *last;
>               struct inpcbtable *table;
>  
>               /*
> @@ -401,11 +402,6 @@ udp_input(struct mbuf **mp, int *offp, i
>                * fixing the interface.  Maybe 4.5BSD will remedy this?)
>                */
>  
> -             /*
> -              * Locate pcb(s) for datagram.
> -              * (Algorithm copied from raw_intr().)
> -              */
> -             SIMPLEQ_INIT(&inpcblist);
>  #ifdef INET6
>               if (ip6)
>                       table = &udb6table;
> @@ -413,9 +409,8 @@ udp_input(struct mbuf **mp, int *offp, i
>  #endif
>                       table = &udbtable;
>  
> -             rw_enter_write(&table->inpt_notify);
> -             mtx_enter(&table->inpt_mtx);
> -             TAILQ_FOREACH(inp, &table->inpt_queue, inp_queue) {
> +             last = inp = NULL;
> +             while ((inp = in_pcb_iterator(table, inp, &iter)) != NULL) {
>                       if (ip6)
>                               KASSERT(ISSET(inp->inp_flags, INP_IPV6));
>                       else
> @@ -466,8 +461,18 @@ udp_input(struct mbuf **mp, int *offp, i
>                                       continue;
>                       }
>  
> -                     in_pcbref(inp);
> -                     SIMPLEQ_INSERT_TAIL(&inpcblist, inp, inp_notify);
> +                     if (last != NULL) {
> +                             struct mbuf *n;
> +                             
> +                             n = m_copym(m, 0, M_COPYALL, M_NOWAIT);
> +                             if (n != NULL) {
> +                                     udp_sbappend(last, n, ip, ip6, iphlen,
> +                                         uh, &srcsa.sa, 0);
> +                             }
> +                             in_pcbunref(last);
> +                     }
> +
> +                     last = in_pcbref(inp);
>  
>                       /*
>                        * Don't look for additional matches if this one does
> @@ -478,14 +483,13 @@ udp_input(struct mbuf **mp, int *offp, i
>                        * clear these options after setting them.
>                        */
>                       if ((inp->inp_socket->so_options & (SO_REUSEPORT |
> -                         SO_REUSEADDR)) == 0)
> +                         SO_REUSEADDR)) == 0) {
> +                             in_pcb_iterator_abort(table, inp, &iter);
>                               break;
> +                     }
>               }
> -             mtx_leave(&table->inpt_mtx);
> -
> -             if (SIMPLEQ_EMPTY(&inpcblist)) {
> -                     rw_exit_write(&table->inpt_notify);
>  
> +             if (last == NULL) {
>                       /*
>                        * No matching pcb found; discard datagram.
>                        * (No need to send an ICMP Port Unreachable
> @@ -495,21 +499,8 @@ udp_input(struct mbuf **mp, int *offp, i
>                       goto bad;
>               }
>  
> -             while ((inp = SIMPLEQ_FIRST(&inpcblist)) != NULL) {
> -                     struct mbuf *n;
> -
> -                     SIMPLEQ_REMOVE_HEAD(&inpcblist, inp_notify);
> -                     if (SIMPLEQ_EMPTY(&inpcblist))
> -                             n = m;
> -                     else
> -                             n = m_copym(m, 0, M_COPYALL, M_NOWAIT);
> -                     if (n != NULL) {
> -                             udp_sbappend(inp, n, ip, ip6, iphlen, uh,
> -                                 &srcsa.sa, 0);
> -                     }
> -                     in_pcbunref(inp);
> -             }
> -             rw_exit_write(&table->inpt_notify);
> +             udp_sbappend(last, m, ip, ip6, iphlen, uh, &srcsa.sa, 0);
> +             in_pcbunref(last);
>  
>               return IPPROTO_DONE;
>       }
> Index: netinet6/in6_pcb.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/in6_pcb.c,v
> diff -u -p -r1.144 in6_pcb.c
> --- netinet6/in6_pcb.c        12 Apr 2024 16:07:09 -0000      1.144
> +++ netinet6/in6_pcb.c        3 Nov 2024 17:58:52 -0000
> @@ -479,6 +479,8 @@ in6_pcbnotify(struct inpcbtable *table, 
>       rw_enter_write(&table->inpt_notify);
>       mtx_enter(&table->inpt_mtx);
>       TAILQ_FOREACH(inp, &table->inpt_queue, inp_queue) {
> +             if (in_pcb_is_iterator(inp))
> +                     continue;
>               KASSERT(ISSET(inp->inp_flags, INP_IPV6));
>  
>               /*
> 

Reply via email to