On Fri, Oct 25, 2024 at 07:03:57PM +0300, Vitaliy Makkoveev wrote: > On Fri, Oct 25, 2024 at 04:48:25PM +0200, Alexander Bluhm wrote: > > On Fri, Oct 25, 2024 at 11:52:13AM +0300, Vitaliy Makkoveev wrote: > > > On Fri, Oct 25, 2024 at 10:18:42AM +0200, Holger Glaess wrote: > > > > hi > > > > > > > > see below , is from the first reboot aber sysupgrade from 7.5 to 7.6 > > > > > > > > > > > > Holger > > > > > > > > > > > > ===> Adding the _dhcp6leased user > > > > panic: rw_enter: inpnotify locking against myself > > > > Stopped at?????????? db_enter+0x14:?? popq?????? %rbp > > > > ===> Adding the _dhcp6leased user > > > > ?????? TID?????? PID?????? UID???????? PRFLAGS???????? PFLAGS?? CPU?? > > > > COMMAND > > > > ??239340???? 5005?????????? 0?????? 0x100803???????? 0x2000?????? 0K > > > > grep > > > > *432877?? 32323?????????? 0???????? 0x14000?????????? 0x200?????? 1?? > > > > softnet0 > > > > db_enter() at db_enter+0x14 > > > > panic(ffffffff8233bb4f) at panic+0xdd > > > > rw_enter_diag(ffffffff828bb170,1) at rw_enter_diag+0x4e > > > > rw_enter(ffffffff828bb170,1) at rw_enter+0x103 > > > > udp_input(ffff800030da6ab8,ffff800030da6ac4,11,2) at udp_input+0x60f > > > > ip_deliver(ffff800030da6ab8,ffff800030da6ac4,11,2,1) at ip_deliver+0xf8 > > > > ip_ours(ffff800030da6ab8,ffff800030da6ac4,ffff800030da6a0c,0) at > > > > ip_ours+0x6f > > > > ip_input_running > > > > rc.sysmergeif(ffff800030da6ab8,ffff800030da6ac4,31,0,ffff8000008ab800) > > > > at > > > > ip_inpu > > > > t_if+0x1f0 > > > > ipv4_input(ffff8000008ab800,fffffd807d870300) at i===> Adding the > > > > _dhcp6leased grouppv4_input+0x38 > > > > ether_input(ffff8000008ab800,fffffd807d870300) at ether_input+0x3df > > > > vxlan_input(ffff800001315680,fffffd807d870300,fffffd8074bcb050,0,fffffd8074bcb0 > > > > 64,1c) at vxlan_input+0x301 > > > > udp_sbappend(fffffd82779de000,fffffd807d870300,fffffd8074bcb050,0,14,fffffd8074 > > > > bcb064,cae52dbc4504571,14) at udp_sbappend+0x7f > > > > udp_input(ffff800030da6fa8,ffff800030da6fb4,11,2) at udp_input+0x9c2 > > > > ip_deliver(ffff800030da6fa8,ffff800030da6fb4,11,2,1) at ip_deliver+0xf8 > > > > end trace frame: 0xffff800030da6eb0, count: 0 > > > > https://www.openbsd.org/ddb.html describes the minimum info required in > > > > bug > > > > reports.?? Insufficient info makes it difficult to find and fix bugs. > > > > > > > > > > This diff should help. > > > > The inpt_notify lock is there to lock the inp_notify field. > > mvs@, we should not release it while traversing over inpcblist. > > > > Yeah, missed that. In other hand, we could use iterators like > pipex_iterator() and avoid locks while doing udp_sbappend().
This is the diff with iterator. To keep it small I modified only udp_input() loop. Holger, could you test it and approve it helps? I tested it with udp(4) broadcasts, but can't test with vxlan(4). Alexander, all except within inp_resize() inp_queue loops could be converted in this way, so in_pcbresize() will be the only place where we need to do in_pcb_is_iterator() check. Also this iterator simplifies netlock dances in sysctl_file(). Also we don't hold rwlock(9) while calling handler. Index: sys/kern/kern_sysctl.c =================================================================== RCS file: /cvs/src/sys/kern/kern_sysctl.c,v diff -u -p -r1.448 kern_sysctl.c --- sys/kern/kern_sysctl.c 30 Sep 2024 12:32:26 -0000 1.448 +++ sys/kern/kern_sysctl.c 25 Oct 2024 20:32:47 -0000 @@ -1673,34 +1673,52 @@ sysctl_file(int *name, u_int namelen, ch NET_LOCK(); mtx_enter(&tcbtable.inpt_mtx); - TAILQ_FOREACH(inp, &tcbtable.inpt_queue, inp_queue) + TAILQ_FOREACH(inp, &tcbtable.inpt_queue, inp_queue) { + if (in_pcb_is_iterator(inp)) + continue; FILLSO(inp->inp_socket); + } mtx_leave(&tcbtable.inpt_mtx); #ifdef INET6 mtx_enter(&tcb6table.inpt_mtx); - TAILQ_FOREACH(inp, &tcb6table.inpt_queue, inp_queue) + TAILQ_FOREACH(inp, &tcb6table.inpt_queue, inp_queue) { + if (in_pcb_is_iterator(inp)) + continue; FILLSO(inp->inp_socket); + } mtx_leave(&tcb6table.inpt_mtx); #endif mtx_enter(&udbtable.inpt_mtx); - TAILQ_FOREACH(inp, &udbtable.inpt_queue, inp_queue) + TAILQ_FOREACH(inp, &udbtable.inpt_queue, inp_queue) { + if (in_pcb_is_iterator(inp)) + continue; FILLSO(inp->inp_socket); + } mtx_leave(&udbtable.inpt_mtx); #ifdef INET6 mtx_enter(&udb6table.inpt_mtx); - TAILQ_FOREACH(inp, &udb6table.inpt_queue, inp_queue) + TAILQ_FOREACH(inp, &udb6table.inpt_queue, inp_queue) { + if (in_pcb_is_iterator(inp)) + continue; FILLSO(inp->inp_socket); + } mtx_leave(&udb6table.inpt_mtx); #endif mtx_enter(&rawcbtable.inpt_mtx); - TAILQ_FOREACH(inp, &rawcbtable.inpt_queue, inp_queue) + TAILQ_FOREACH(inp, &rawcbtable.inpt_queue, inp_queue) { + if (in_pcb_is_iterator(inp)) + continue; FILLSO(inp->inp_socket); + } mtx_leave(&rawcbtable.inpt_mtx); #ifdef INET6 mtx_enter(&rawin6pcbtable.inpt_mtx); TAILQ_FOREACH(inp, &rawin6pcbtable.inpt_queue, - inp_queue) + inp_queue) { + if (in_pcb_is_iterator(inp)) + continue; FILLSO(inp->inp_socket); + } mtx_leave(&rawin6pcbtable.inpt_mtx); #endif NET_UNLOCK(); Index: sys/netinet/in_pcb.c =================================================================== RCS file: /cvs/src/sys/netinet/in_pcb.c,v diff -u -p -r1.303 in_pcb.c --- sys/netinet/in_pcb.c 12 Jul 2024 19:50:35 -0000 1.303 +++ sys/netinet/in_pcb.c 25 Oct 2024 20:32:47 -0000 @@ -644,6 +644,39 @@ in_pcbunref(struct inpcb *inp) pool_put(&inpcb_pool, inp); } +struct inpcb * +in_pcb_iterator(struct inpcbtable *table, struct inpcb *inp, + struct inpcb_iterator *iter) +{ + struct inpcb *tmp; + + mtx_enter(&table->inpt_mtx); + + if (inp) + tmp = TAILQ_NEXT(inp, inp_queue); + else + tmp = TAILQ_FIRST(&table->inpt_queue); + + while (tmp && tmp->inp_table == NULL) + tmp = TAILQ_NEXT(tmp, inp_queue); + + if (inp) + TAILQ_REMOVE(&table->inpt_queue, (struct inpcb *)iter, + inp_queue); + if (tmp) { + TAILQ_INSERT_AFTER(&table->inpt_queue, tmp, + (struct inpcb *)iter, inp_queue); + in_pcbref(tmp); + } + + mtx_leave(&table->inpt_mtx); + + if (inp) + in_pcbunref(inp); + + return tmp; +} + void in_setsockaddr(struct inpcb *inp, struct mbuf *nam) { @@ -743,6 +776,8 @@ in_pcbnotifyall(struct inpcbtable *table rw_enter_write(&table->inpt_notify); mtx_enter(&table->inpt_mtx); TAILQ_FOREACH(inp, &table->inpt_queue, inp_queue) { + if (in_pcb_is_iterator(inp)) + continue; KASSERT(!ISSET(inp->inp_flags, INP_IPV6)); if (inp->inp_faddr.s_addr != dst->sin_addr.s_addr || @@ -1098,6 +1133,8 @@ in_pcbresize(struct inpcbtable *table, i table->inpt_size = hashsize; TAILQ_FOREACH(inp, &table->inpt_queue, inp_queue) { + if (in_pcb_is_iterator(inp)) + continue; LIST_REMOVE(inp, inp_lhash); LIST_REMOVE(inp, inp_hash); in_pcbhash_insert(inp); Index: sys/netinet/in_pcb.h =================================================================== RCS file: /cvs/src/sys/netinet/in_pcb.h,v diff -u -p -r1.158 in_pcb.h --- sys/netinet/in_pcb.h 12 Jul 2024 19:50:35 -0000 1.158 +++ sys/netinet/in_pcb.h 25 Oct 2024 20:32:47 -0000 @@ -178,6 +178,20 @@ struct inpcb { LIST_HEAD(inpcbhead, inpcb); +struct inpcb_iterator { + LIST_ENTRY(inpcb) inp_hash; /* unused */ + LIST_ENTRY(inpcb) inp_lhash; /* unused */ + TAILQ_ENTRY(inpcb) inp_queue; /* [t] inet PCB queue */ + SIMPLEQ_ENTRY(inpcb) inp_notify; /* unused */ + struct inpcbtable *inp_table; /* [I] always NULL */ +}; + +static inline int +in_pcb_is_iterator(struct inpcb *inp) +{ + return (inp->inp_table == NULL ? 1 : 0); +} + struct inpcbtable { struct mutex inpt_mtx; /* protect queue and hash */ struct rwlock inpt_notify; /* protect inp_notify list */ @@ -302,6 +316,9 @@ struct inpcb * in_pcbref(struct inpcb *); void in_pcbunref(struct inpcb *); void in_pcbdisconnect(struct inpcb *); +struct inpcb * + in_pcb_iterator(struct inpcbtable *, struct inpcb *, + struct inpcb_iterator *); struct inpcb * in_pcblookup(struct inpcbtable *, struct in_addr, u_int, struct in_addr, u_int, u_int); Index: sys/netinet/ip_divert.c =================================================================== RCS file: /cvs/src/sys/netinet/ip_divert.c,v diff -u -p -r1.97 ip_divert.c --- sys/netinet/ip_divert.c 16 Aug 2024 09:20:35 -0000 1.97 +++ sys/netinet/ip_divert.c 25 Oct 2024 20:32:47 -0000 @@ -203,6 +203,8 @@ divert_packet(struct mbuf *m, int dir, u mtx_enter(&divbtable.inpt_mtx); TAILQ_FOREACH(inp, &divbtable.inpt_queue, inp_queue) { + if (in_pcb_is_iterator(inp)) + continue; if (inp->inp_lport != divert_port) continue; in_pcbref(inp); Index: sys/netinet/raw_ip.c =================================================================== RCS file: /cvs/src/sys/netinet/raw_ip.c,v diff -u -p -r1.160 raw_ip.c --- sys/netinet/raw_ip.c 12 Jul 2024 19:50:35 -0000 1.160 +++ sys/netinet/raw_ip.c 25 Oct 2024 20:32:47 -0000 @@ -167,6 +167,8 @@ rip_input(struct mbuf **mp, int *offp, i rw_enter_write(&rawcbtable.inpt_notify); mtx_enter(&rawcbtable.inpt_mtx); TAILQ_FOREACH(inp, &rawcbtable.inpt_queue, inp_queue) { + if (in_pcb_is_iterator(inp)) + continue; KASSERT(!ISSET(inp->inp_flags, INP_IPV6)); /* Index: sys/netinet/udp_usrreq.c =================================================================== RCS file: /cvs/src/sys/netinet/udp_usrreq.c,v diff -u -p -r1.324 udp_usrreq.c --- sys/netinet/udp_usrreq.c 6 Aug 2024 20:15:53 -0000 1.324 +++ sys/netinet/udp_usrreq.c 25 Oct 2024 20:32:47 -0000 @@ -381,7 +381,8 @@ udp_input(struct mbuf **mp, int *offp, i } if (m->m_flags & (M_BCAST|M_MCAST)) { - SIMPLEQ_HEAD(, inpcb) inpcblist; + struct inpcb_iterator iter = {.inp_table = NULL}; + struct inpcb *tinp = NULL; struct inpcbtable *table; /* @@ -400,11 +401,6 @@ udp_input(struct mbuf **mp, int *offp, i * fixing the interface. Maybe 4.5BSD will remedy this?) */ - /* - * Locate pcb(s) for datagram. - * (Algorithm copied from raw_intr().) - */ - SIMPLEQ_INIT(&inpcblist); #ifdef INET6 if (ip6) table = &udb6table; @@ -412,9 +408,8 @@ udp_input(struct mbuf **mp, int *offp, i #endif table = &udbtable; - rw_enter_write(&table->inpt_notify); - mtx_enter(&table->inpt_mtx); - TAILQ_FOREACH(inp, &table->inpt_queue, inp_queue) { + inp = NULL; + while ((inp = in_pcb_iterator(table, inp, &iter)) != NULL){ if (ip6) KASSERT(ISSET(inp->inp_flags, INP_IPV6)); else @@ -465,8 +460,18 @@ udp_input(struct mbuf **mp, int *offp, i continue; } - in_pcbref(inp); - SIMPLEQ_INSERT_TAIL(&inpcblist, inp, inp_notify); + if (tinp != NULL) { + struct mbuf *n; + + n = m_copym(m, 0, M_COPYALL, M_NOWAIT); + if (n != NULL) { + udp_sbappend(tinp, n, ip, ip6, iphlen, + uh, &srcsa.sa, 0); + } + in_pcbunref(tinp); + } + + tinp = in_pcbref(inp); /* * Don't look for additional matches if this one does @@ -477,14 +482,13 @@ udp_input(struct mbuf **mp, int *offp, i * clear these options after setting them. */ if ((inp->inp_socket->so_options & (SO_REUSEPORT | - SO_REUSEADDR)) == 0) + SO_REUSEADDR)) == 0) { + in_pcbunref(inp); break; + } } - mtx_leave(&table->inpt_mtx); - - if (SIMPLEQ_EMPTY(&inpcblist)) { - rw_exit_write(&table->inpt_notify); + if (tinp == NULL) { /* * No matching pcb found; discard datagram. * (No need to send an ICMP Port Unreachable @@ -494,21 +498,8 @@ udp_input(struct mbuf **mp, int *offp, i goto bad; } - while ((inp = SIMPLEQ_FIRST(&inpcblist)) != NULL) { - struct mbuf *n; - - SIMPLEQ_REMOVE_HEAD(&inpcblist, inp_notify); - if (SIMPLEQ_EMPTY(&inpcblist)) - n = m; - else - n = m_copym(m, 0, M_COPYALL, M_NOWAIT); - if (n != NULL) { - udp_sbappend(inp, n, ip, ip6, iphlen, uh, - &srcsa.sa, 0); - } - in_pcbunref(inp); - } - rw_exit_write(&table->inpt_notify); + udp_sbappend(tinp, m, ip, ip6, iphlen, uh, &srcsa.sa, 0); + in_pcbunref(tinp); return IPPROTO_DONE; } Index: sys/netinet6/in6_pcb.c =================================================================== RCS file: /cvs/src/sys/netinet6/in6_pcb.c,v diff -u -p -r1.144 in6_pcb.c --- sys/netinet6/in6_pcb.c 12 Apr 2024 16:07:09 -0000 1.144 +++ sys/netinet6/in6_pcb.c 25 Oct 2024 20:32:47 -0000 @@ -479,6 +479,8 @@ in6_pcbnotify(struct inpcbtable *table, rw_enter_write(&table->inpt_notify); mtx_enter(&table->inpt_mtx); TAILQ_FOREACH(inp, &table->inpt_queue, inp_queue) { + if (in_pcb_is_iterator(inp)) + continue; KASSERT(ISSET(inp->inp_flags, INP_IPV6)); /* Index: sys/netinet6/ip6_divert.c =================================================================== RCS file: /cvs/src/sys/netinet6/ip6_divert.c,v diff -u -p -r1.97 ip6_divert.c --- sys/netinet6/ip6_divert.c 16 Aug 2024 09:20:35 -0000 1.97 +++ sys/netinet6/ip6_divert.c 25 Oct 2024 20:32:47 -0000 @@ -212,6 +212,8 @@ divert6_packet(struct mbuf *m, int dir, mtx_enter(&divb6table.inpt_mtx); TAILQ_FOREACH(inp, &divb6table.inpt_queue, inp_queue) { + if (in_pcb_is_iterator(inp)) + continue; if (inp->inp_lport != divert_port) continue; in_pcbref(inp); Index: sys/netinet6/raw_ip6.c =================================================================== RCS file: /cvs/src/sys/netinet6/raw_ip6.c,v diff -u -p -r1.185 raw_ip6.c --- sys/netinet6/raw_ip6.c 12 Jul 2024 19:50:35 -0000 1.185 +++ sys/netinet6/raw_ip6.c 25 Oct 2024 20:32:47 -0000 @@ -181,6 +181,8 @@ rip6_input(struct mbuf **mp, int *offp, rw_enter_write(&rawin6pcbtable.inpt_notify); mtx_enter(&rawin6pcbtable.inpt_mtx); TAILQ_FOREACH(inp, &rawin6pcbtable.inpt_queue, inp_queue) { + if (in_pcb_is_iterator(inp)) + continue; KASSERT(ISSET(inp->inp_flags, INP_IPV6)); /*