On 25.10.24 22:45, Vitaliy Makkoveev wrote:
On Fri, Oct 25, 2024 at 07:03:57PM +0300, Vitaliy Makkoveev wrote:
On Fri, Oct 25, 2024 at 04:48:25PM +0200, Alexander Bluhm wrote:
On Fri, Oct 25, 2024 at 11:52:13AM +0300, Vitaliy Makkoveev wrote:
On Fri, Oct 25, 2024 at 10:18:42AM +0200, Holger Glaess wrote:
hi

see below , is from the first reboot aber sysupgrade from 7.5 to 7.6


Holger


===> Adding the _dhcp6leased user
panic: rw_enter: inpnotify locking against myself
Stopped at?????????? db_enter+0x14:?? popq?????? %rbp
===> Adding the _dhcp6leased user
?????? TID?????? PID?????? UID???????? PRFLAGS???????? PFLAGS?? CPU?? COMMAND
??239340???? 5005?????????? 0?????? 0x100803???????? 0x2000?????? 0K grep
*432877?? 32323?????????? 0???????? 0x14000?????????? 0x200?????? 1?? softnet0
db_enter() at db_enter+0x14
panic(ffffffff8233bb4f) at panic+0xdd
rw_enter_diag(ffffffff828bb170,1) at rw_enter_diag+0x4e
rw_enter(ffffffff828bb170,1) at rw_enter+0x103
udp_input(ffff800030da6ab8,ffff800030da6ac4,11,2) at udp_input+0x60f
ip_deliver(ffff800030da6ab8,ffff800030da6ac4,11,2,1) at ip_deliver+0xf8
ip_ours(ffff800030da6ab8,ffff800030da6ac4,ffff800030da6a0c,0) at
ip_ours+0x6f
ip_input_running
rc.sysmergeif(ffff800030da6ab8,ffff800030da6ac4,31,0,ffff8000008ab800) at
ip_inpu
t_if+0x1f0
ipv4_input(ffff8000008ab800,fffffd807d870300) at i===> Adding the
_dhcp6leased grouppv4_input+0x38
ether_input(ffff8000008ab800,fffffd807d870300) at ether_input+0x3df
vxlan_input(ffff800001315680,fffffd807d870300,fffffd8074bcb050,0,fffffd8074bcb0
64,1c) at vxlan_input+0x301
udp_sbappend(fffffd82779de000,fffffd807d870300,fffffd8074bcb050,0,14,fffffd8074
bcb064,cae52dbc4504571,14) at udp_sbappend+0x7f
udp_input(ffff800030da6fa8,ffff800030da6fb4,11,2) at udp_input+0x9c2
ip_deliver(ffff800030da6fa8,ffff800030da6fb4,11,2,1) at ip_deliver+0xf8
end trace frame: 0xffff800030da6eb0, count: 0
https://www.openbsd.org/ddb.html describes the minimum info required in bug
reports.?? Insufficient info makes it difficult to find and fix bugs.

This diff should help.
The inpt_notify lock is there to lock the inp_notify field.
mvs@, we should not release it while traversing over inpcblist.

Yeah, missed that. In other hand, we could use iterators like
pipex_iterator() and avoid locks while doing udp_sbappend().
This is the diff with iterator. To keep it small I modified only
udp_input() loop. Holger, could you test it and approve it helps? I
tested it with udp(4) broadcasts, but can't test with vxlan(4).

Alexander, all except within inp_resize() inp_queue loops could be
converted in this way, so in_pcbresize() will be the only place where we
need to do in_pcb_is_iterator() check. Also this iterator simplifies
netlock dances in sysctl_file(). Also we don't hold rwlock(9) while
calling handler.


hi

what i did and hope there is not to mutch wrong:

cvs -qd anon...@anoncvs.eu.openbsd.org:/cvs checkout -rOPENBSD_7_6 -P src
/usr/src 317>cat vxlan_v2.patch | patch -p0

i rework all the files where i get an rej.

/usr/src 319>find . -name "*.rej"
 ./sys/kern/kern_sysctl.c.rej
 ./sys/netinet/in_pcb.c.rej
 ./sys/netinet/in_pcb.h.rej
 ./sys/netinet/ip_divert.c.rej
 ./sys/netinet/raw_ip.c.rej
 ./sys/netinet/udp_usrreq.c.rej
 ./sys/netinet6/in6_pcb.c.rej
 ./sys/netinet6/ip6_divert.c.rej
 ./sys/netinet6/raw_ip6.c.rej


keep in sys/netinet/udp_usrreq.c close at line 494
  udpstat_inc(udps_noportbcast);

build new kernel

cd /usr/src/sys/arch/amd64/conf

cp GENERIC.MP VXLAN_PATCHED

config VXLAN_PATCHED

cd ../compile

make

cp VXLAN_PATCHED/obj/bsd /bsd_vxlan_patched


after boot the new kernel i got

reorder_kernel: failed -- see /usr/share/relink/kernel/VXLAN_PATCHED/relink.log
uvm_fault(0xfffffd823c0d26d8, 0x112, 0, 1) -> e
kernel: page fault trap, code=0
Stopped at      udp_input+0x68f:        testb   $0x20,0x112(%rax)
    TID    PID    UID     PRFLAGS     PFLAGS  CPU  COMMAND
*349804  28087    629        0x10          0    1  avahi-daemon
udp_input(ffff800030f54ad8,ffff800030f54ae4,11,2) at udp_input+0x68f
ip_deliver(ffff800030f54ad8,ffff800030f54ae4,11,2,1) at ip_deliver+0xf8
ip_ours(ffff800030f54ad8,ffff800030f54ae4,fb0000e0,0) at ip_ours+0x6f
ip_input_if(ffff800030f54ad8,ffff800030f54ae4,2,0,ffff80000089f000) at ip_input
_if+0x1f0
ipv4_input(ffff80000089f000,fffffd8075065100) at ipv4_input+0x38
if_input_local(ffff80000089f000,fffffd8075065100,2) at if_input_local+0x1dd
ip_output(fffffd8075055a00,0,fffffd823b5c3478,0,ffff8000014090b0,fffffd823b5c35
10,da5908a4d6759ad9) at ip_output+0x566
udp_output(fffffd823b5c3400,fffffd8075055900,fffffd8075054a00,0) at udp_output+
0x42a
sosend(fffffd823a6833f0,fffffd8075054a00,ffff800030f54e30,0,0,80) at sosend+0x3
e2
sendit(ffff800030e9aa48,d,ffff800030f54fa0,0,ffff800030f55050) at sendit+0x395 sys_sendmsg(ffff800030e9aa48,ffff800030f550e0,ffff800030f55050) at sys_sendmsg+
0x14a
syscall(ffff800030f550e0) at syscall+0x620
Xsyscall() at Xsyscall+0x128
end of kernel
end trace frame: 0x7b0dac30db70, count: 2
https://www.openbsd.org/ddb.html describes the minimum info required in bug
reports.  Insufficient info makes it difficult to find and fix bugs.
ddb{1}>

Index: sys/kern/kern_sysctl.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_sysctl.c,v
diff -u -p -r1.448 kern_sysctl.c
--- sys/kern/kern_sysctl.c      30 Sep 2024 12:32:26 -0000      1.448
+++ sys/kern/kern_sysctl.c      25 Oct 2024 20:32:47 -0000
@@ -1673,34 +1673,52 @@ sysctl_file(int *name, u_int namelen, ch
NET_LOCK();
                        mtx_enter(&tcbtable.inpt_mtx);
-                       TAILQ_FOREACH(inp, &tcbtable.inpt_queue, inp_queue)
+                       TAILQ_FOREACH(inp, &tcbtable.inpt_queue, inp_queue) {
+                               if (in_pcb_is_iterator(inp))
+                                       continue;
                                FILLSO(inp->inp_socket);
+                       }
                        mtx_leave(&tcbtable.inpt_mtx);
  #ifdef INET6
                        mtx_enter(&tcb6table.inpt_mtx);
-                       TAILQ_FOREACH(inp, &tcb6table.inpt_queue, inp_queue)
+                       TAILQ_FOREACH(inp, &tcb6table.inpt_queue, inp_queue) {
+                               if (in_pcb_is_iterator(inp))
+                                       continue;
                                FILLSO(inp->inp_socket);
+                       }
                        mtx_leave(&tcb6table.inpt_mtx);
  #endif
                        mtx_enter(&udbtable.inpt_mtx);
-                       TAILQ_FOREACH(inp, &udbtable.inpt_queue, inp_queue)
+                       TAILQ_FOREACH(inp, &udbtable.inpt_queue, inp_queue) {
+                               if (in_pcb_is_iterator(inp))
+                                       continue;
                                FILLSO(inp->inp_socket);
+                       }
                        mtx_leave(&udbtable.inpt_mtx);
  #ifdef INET6
                        mtx_enter(&udb6table.inpt_mtx);
-                       TAILQ_FOREACH(inp, &udb6table.inpt_queue, inp_queue)
+                       TAILQ_FOREACH(inp, &udb6table.inpt_queue, inp_queue) {
+                               if (in_pcb_is_iterator(inp))
+                                       continue;
                                FILLSO(inp->inp_socket);
+                       }
                        mtx_leave(&udb6table.inpt_mtx);
  #endif
                        mtx_enter(&rawcbtable.inpt_mtx);
-                       TAILQ_FOREACH(inp, &rawcbtable.inpt_queue, inp_queue)
+                       TAILQ_FOREACH(inp, &rawcbtable.inpt_queue, inp_queue) {
+                               if (in_pcb_is_iterator(inp))
+                                       continue;
                                FILLSO(inp->inp_socket);
+                       }
                        mtx_leave(&rawcbtable.inpt_mtx);
  #ifdef INET6
                        mtx_enter(&rawin6pcbtable.inpt_mtx);
                        TAILQ_FOREACH(inp, &rawin6pcbtable.inpt_queue,
-                           inp_queue)
+                           inp_queue) {
+                               if (in_pcb_is_iterator(inp))
+                                       continue;
                                FILLSO(inp->inp_socket);
+                       }
                        mtx_leave(&rawin6pcbtable.inpt_mtx);
  #endif
                        NET_UNLOCK();
Index: sys/netinet/in_pcb.c
===================================================================
RCS file: /cvs/src/sys/netinet/in_pcb.c,v
diff -u -p -r1.303 in_pcb.c
--- sys/netinet/in_pcb.c        12 Jul 2024 19:50:35 -0000      1.303
+++ sys/netinet/in_pcb.c        25 Oct 2024 20:32:47 -0000
@@ -644,6 +644,39 @@ in_pcbunref(struct inpcb *inp)
        pool_put(&inpcb_pool, inp);
  }
+struct inpcb *
+in_pcb_iterator(struct inpcbtable *table, struct inpcb *inp,
+    struct inpcb_iterator *iter)
+{
+       struct inpcb *tmp;
+
+       mtx_enter(&table->inpt_mtx);
+
+       if (inp)
+               tmp = TAILQ_NEXT(inp, inp_queue);
+       else
+               tmp = TAILQ_FIRST(&table->inpt_queue);
+
+       while (tmp && tmp->inp_table == NULL)
+               tmp = TAILQ_NEXT(tmp, inp_queue);
+
+       if (inp)
+               TAILQ_REMOVE(&table->inpt_queue, (struct inpcb *)iter,
+                   inp_queue);
+       if (tmp) {
+               TAILQ_INSERT_AFTER(&table->inpt_queue, tmp,
+                   (struct inpcb *)iter, inp_queue);
+               in_pcbref(tmp);
+       }
+
+       mtx_leave(&table->inpt_mtx);
+
+       if (inp)
+               in_pcbunref(inp);
+       
+       return tmp;
+}
+
  void
  in_setsockaddr(struct inpcb *inp, struct mbuf *nam)
  {
@@ -743,6 +776,8 @@ in_pcbnotifyall(struct inpcbtable *table
        rw_enter_write(&table->inpt_notify);
        mtx_enter(&table->inpt_mtx);
        TAILQ_FOREACH(inp, &table->inpt_queue, inp_queue) {
+               if (in_pcb_is_iterator(inp))
+                       continue;
                KASSERT(!ISSET(inp->inp_flags, INP_IPV6));
if (inp->inp_faddr.s_addr != dst->sin_addr.s_addr ||
@@ -1098,6 +1133,8 @@ in_pcbresize(struct inpcbtable *table, i
        table->inpt_size = hashsize;
TAILQ_FOREACH(inp, &table->inpt_queue, inp_queue) {
+               if (in_pcb_is_iterator(inp))
+                       continue;
                LIST_REMOVE(inp, inp_lhash);
                LIST_REMOVE(inp, inp_hash);
                in_pcbhash_insert(inp);
Index: sys/netinet/in_pcb.h
===================================================================
RCS file: /cvs/src/sys/netinet/in_pcb.h,v
diff -u -p -r1.158 in_pcb.h
--- sys/netinet/in_pcb.h        12 Jul 2024 19:50:35 -0000      1.158
+++ sys/netinet/in_pcb.h        25 Oct 2024 20:32:47 -0000
@@ -178,6 +178,20 @@ struct inpcb {
LIST_HEAD(inpcbhead, inpcb); +struct inpcb_iterator {
+       LIST_ENTRY(inpcb) inp_hash;             /* unused */
+       LIST_ENTRY(inpcb) inp_lhash;            /* unused */
+       TAILQ_ENTRY(inpcb) inp_queue;           /* [t] inet PCB queue */
+       SIMPLEQ_ENTRY(inpcb) inp_notify;        /* unused */
+       struct    inpcbtable *inp_table;        /* [I] always NULL */
+};
+
+static inline int
+in_pcb_is_iterator(struct inpcb *inp)
+{
+       return (inp->inp_table == NULL ? 1 : 0);
+}
+
  struct inpcbtable {
        struct mutex inpt_mtx;                  /* protect queue and hash */
        struct rwlock inpt_notify;              /* protect inp_notify list */
@@ -302,6 +316,9 @@ struct inpcb *
         in_pcbref(struct inpcb *);
  void   in_pcbunref(struct inpcb *);
  void   in_pcbdisconnect(struct inpcb *);
+struct inpcb *
+        in_pcb_iterator(struct inpcbtable *, struct inpcb *,
+           struct inpcb_iterator *);
  struct inpcb *
         in_pcblookup(struct inpcbtable *, struct in_addr,
                               u_int, struct in_addr, u_int, u_int);
Index: sys/netinet/ip_divert.c
===================================================================
RCS file: /cvs/src/sys/netinet/ip_divert.c,v
diff -u -p -r1.97 ip_divert.c
--- sys/netinet/ip_divert.c     16 Aug 2024 09:20:35 -0000      1.97
+++ sys/netinet/ip_divert.c     25 Oct 2024 20:32:47 -0000
@@ -203,6 +203,8 @@ divert_packet(struct mbuf *m, int dir, u
mtx_enter(&divbtable.inpt_mtx);
        TAILQ_FOREACH(inp, &divbtable.inpt_queue, inp_queue) {
+               if (in_pcb_is_iterator(inp))
+                       continue;
                if (inp->inp_lport != divert_port)
                        continue;
                in_pcbref(inp);
Index: sys/netinet/raw_ip.c
===================================================================
RCS file: /cvs/src/sys/netinet/raw_ip.c,v
diff -u -p -r1.160 raw_ip.c
--- sys/netinet/raw_ip.c        12 Jul 2024 19:50:35 -0000      1.160
+++ sys/netinet/raw_ip.c        25 Oct 2024 20:32:47 -0000
@@ -167,6 +167,8 @@ rip_input(struct mbuf **mp, int *offp, i
        rw_enter_write(&rawcbtable.inpt_notify);
        mtx_enter(&rawcbtable.inpt_mtx);
        TAILQ_FOREACH(inp, &rawcbtable.inpt_queue, inp_queue) {
+               if (in_pcb_is_iterator(inp))
+                       continue;
                KASSERT(!ISSET(inp->inp_flags, INP_IPV6));
/*
Index: sys/netinet/udp_usrreq.c
===================================================================
RCS file: /cvs/src/sys/netinet/udp_usrreq.c,v
diff -u -p -r1.324 udp_usrreq.c
--- sys/netinet/udp_usrreq.c    6 Aug 2024 20:15:53 -0000       1.324
+++ sys/netinet/udp_usrreq.c    25 Oct 2024 20:32:47 -0000
@@ -381,7 +381,8 @@ udp_input(struct mbuf **mp, int *offp, i
        }
if (m->m_flags & (M_BCAST|M_MCAST)) {
-               SIMPLEQ_HEAD(, inpcb) inpcblist;
+               struct inpcb_iterator iter = {.inp_table = NULL};
+               struct inpcb *tinp = NULL;
                struct inpcbtable *table;
/*
@@ -400,11 +401,6 @@ udp_input(struct mbuf **mp, int *offp, i
                 * fixing the interface.  Maybe 4.5BSD will remedy this?)
                 */
- /*
-                * Locate pcb(s) for datagram.
-                * (Algorithm copied from raw_intr().)
-                */
-               SIMPLEQ_INIT(&inpcblist);
  #ifdef INET6
                if (ip6)
                        table = &udb6table;
@@ -412,9 +408,8 @@ udp_input(struct mbuf **mp, int *offp, i
  #endif
                        table = &udbtable;
- rw_enter_write(&table->inpt_notify);
-               mtx_enter(&table->inpt_mtx);
-               TAILQ_FOREACH(inp, &table->inpt_queue, inp_queue) {
+               inp = NULL;
+               while ((inp = in_pcb_iterator(table, inp, &iter)) != NULL){
                        if (ip6)
                                KASSERT(ISSET(inp->inp_flags, INP_IPV6));
                        else
@@ -465,8 +460,18 @@ udp_input(struct mbuf **mp, int *offp, i
                                        continue;
                        }
- in_pcbref(inp);
-                       SIMPLEQ_INSERT_TAIL(&inpcblist, inp, inp_notify);
+                       if (tinp != NULL) {
+                               struct mbuf *n;
+                               
+                               n = m_copym(m, 0, M_COPYALL, M_NOWAIT);
+                               if (n != NULL) {
+                                       udp_sbappend(tinp, n, ip, ip6, iphlen,
+                                           uh, &srcsa.sa, 0);
+                               }
+                               in_pcbunref(tinp);
+                       }
+
+                       tinp = in_pcbref(inp);
/*
                         * Don't look for additional matches if this one does
@@ -477,14 +482,13 @@ udp_input(struct mbuf **mp, int *offp, i
                         * clear these options after setting them.
                         */
                        if ((inp->inp_socket->so_options & (SO_REUSEPORT |
-                           SO_REUSEADDR)) == 0)
+                           SO_REUSEADDR)) == 0) {
+                               in_pcbunref(inp);
                                break;
+                       }
                }
-               mtx_leave(&table->inpt_mtx);
-
-               if (SIMPLEQ_EMPTY(&inpcblist)) {
-                       rw_exit_write(&table->inpt_notify);
+ if (tinp == NULL) {
                        /*
                         * No matching pcb found; discard datagram.
                         * (No need to send an ICMP Port Unreachable
@@ -494,21 +498,8 @@ udp_input(struct mbuf **mp, int *offp, i
                        goto bad;
                }
- while ((inp = SIMPLEQ_FIRST(&inpcblist)) != NULL) {
-                       struct mbuf *n;
-
-                       SIMPLEQ_REMOVE_HEAD(&inpcblist, inp_notify);
-                       if (SIMPLEQ_EMPTY(&inpcblist))
-                               n = m;
-                       else
-                               n = m_copym(m, 0, M_COPYALL, M_NOWAIT);
-                       if (n != NULL) {
-                               udp_sbappend(inp, n, ip, ip6, iphlen, uh,
-                                   &srcsa.sa, 0);
-                       }
-                       in_pcbunref(inp);
-               }
-               rw_exit_write(&table->inpt_notify);
+               udp_sbappend(tinp, m, ip, ip6, iphlen, uh, &srcsa.sa, 0);
+               in_pcbunref(tinp);
return IPPROTO_DONE;
        }
Index: sys/netinet6/in6_pcb.c
===================================================================
RCS file: /cvs/src/sys/netinet6/in6_pcb.c,v
diff -u -p -r1.144 in6_pcb.c
--- sys/netinet6/in6_pcb.c      12 Apr 2024 16:07:09 -0000      1.144
+++ sys/netinet6/in6_pcb.c      25 Oct 2024 20:32:47 -0000
@@ -479,6 +479,8 @@ in6_pcbnotify(struct inpcbtable *table,
        rw_enter_write(&table->inpt_notify);
        mtx_enter(&table->inpt_mtx);
        TAILQ_FOREACH(inp, &table->inpt_queue, inp_queue) {
+               if (in_pcb_is_iterator(inp))
+                       continue;
                KASSERT(ISSET(inp->inp_flags, INP_IPV6));
/*
Index: sys/netinet6/ip6_divert.c
===================================================================
RCS file: /cvs/src/sys/netinet6/ip6_divert.c,v
diff -u -p -r1.97 ip6_divert.c
--- sys/netinet6/ip6_divert.c   16 Aug 2024 09:20:35 -0000      1.97
+++ sys/netinet6/ip6_divert.c   25 Oct 2024 20:32:47 -0000
@@ -212,6 +212,8 @@ divert6_packet(struct mbuf *m, int dir,
mtx_enter(&divb6table.inpt_mtx);
        TAILQ_FOREACH(inp, &divb6table.inpt_queue, inp_queue) {
+               if (in_pcb_is_iterator(inp))
+                       continue;
                if (inp->inp_lport != divert_port)
                        continue;
                in_pcbref(inp);
Index: sys/netinet6/raw_ip6.c
===================================================================
RCS file: /cvs/src/sys/netinet6/raw_ip6.c,v
diff -u -p -r1.185 raw_ip6.c
--- sys/netinet6/raw_ip6.c      12 Jul 2024 19:50:35 -0000      1.185
+++ sys/netinet6/raw_ip6.c      25 Oct 2024 20:32:47 -0000
@@ -181,6 +181,8 @@ rip6_input(struct mbuf **mp, int *offp,
        rw_enter_write(&rawin6pcbtable.inpt_notify);
        mtx_enter(&rawin6pcbtable.inpt_mtx);
        TAILQ_FOREACH(inp, &rawin6pcbtable.inpt_queue, inp_queue) {
+               if (in_pcb_is_iterator(inp))
+                       continue;
                KASSERT(ISSET(inp->inp_flags, INP_IPV6));
/*

Reply via email to