On Fri, Jul 21, 2023 at 07:38:17PM +0200, Alexander Bluhm wrote: > On Thu, Jul 13, 2023 at 02:22:17AM +0300, Vitaliy Makkoveev wrote: > > This is a part of my standalone sblock() work. I need this movement > > because buffers related SO_SND* and SO_RCV* socket options modification > > should be protected with sblock(). However, standalone sblock() has > > different lock orders with solock() for receive and send buffers. At > > least sblock() for `so_snd' buffer will always be taken before solock() > > in the sosend() path. > > > > The switch() block was split by two. SO_DONTROUTE, SO_SPLICE, SO_SND* > > and SO_RCV* cases do not require to call (*pr_ctloutput)(), so they were > > moved to the first switch() block solock() was pushed into each case > > individually. For SO_SND* and SO_RCV* cases solock() will be replaced by > > sblock() in the future. SO_RTABLE case calls (*pr_ctloutput)(), but do > > this in the special way, so it was placed to the first switch() block > > too. > > > > The second switch() block contains the cases which require to call > > (*pr_ctloutput)(). solock() is taken around this block together with the > > (*pr_ctloutput)() call to keep atomicy. > > I did not see where (*pr_ctloutput)() is actually required. In > this else level == SOL_SOCKET and all pr_ctloutput functions I could > find do nothing for case SOL_SOCKET. > > So I do not see the cases where calling (*pr_ctloutput)() is required > and where not. What did I miss? >
Hmm, you a right. Except SO_RTABLE option, nothing requires to call (*pr_ctloutput)(). So we could drop in the else branch, but keep it for SO_RTABLE only. This simplifies diff. > I run regress with this and a witness kernel. No fallout. These > witness warnings are alway there when I run regress. > > http://bluhm.genua.de/regress/results/2023-07-21T13%3A08%3A59Z/bsdcons-ot29.txt > Thanks for testing. There is updated diff below. Index: sys/kern/uipc_socket.c =================================================================== RCS file: /cvs/src/sys/kern/uipc_socket.c,v retrieving revision 1.305 diff -u -p -r1.305 uipc_socket.c --- sys/kern/uipc_socket.c 4 Jul 2023 22:28:24 -0000 1.305 +++ sys/kern/uipc_socket.c 22 Jul 2023 14:38:43 -0000 @@ -1789,12 +1789,12 @@ sosetopt(struct socket *so, int level, i { int error = 0; - soassertlocked(so); - if (level != SOL_SOCKET) { if (so->so_proto->pr_ctloutput) { + solock(so); error = (*so->so_proto->pr_ctloutput)(PRCO_SETOPT, so, level, optname, m); + sounlock(so); return (error); } error = ENOPROTOOPT; @@ -1813,9 +1813,16 @@ sosetopt(struct socket *so, int level, i mtod(m, struct linger *)->l_linger < 0 || mtod(m, struct linger *)->l_linger > SHRT_MAX) return (EINVAL); + + solock(so); so->so_linger = mtod(m, struct linger *)->l_linger; - /* FALLTHROUGH */ + if (*mtod(m, int *)) + so->so_options |= optname; + else + so->so_options &= ~optname; + sounlock(so); + break; case SO_BINDANY: case SO_DEBUG: case SO_KEEPALIVE: @@ -1828,12 +1835,15 @@ sosetopt(struct socket *so, int level, i case SO_ZEROIZE: if (m == NULL || m->m_len < sizeof (int)) return (EINVAL); + + solock(so); if (*mtod(m, int *)) so->so_options |= optname; else so->so_options &= ~optname; - break; + sounlock(so); + break; case SO_DONTROUTE: if (m == NULL || m->m_len < sizeof (int)) return (EINVAL); @@ -1853,23 +1863,32 @@ sosetopt(struct socket *so, int level, i cnt = *mtod(m, int *); if ((long)cnt <= 0) cnt = 1; - switch (optname) { + solock(so); + switch (optname) { case SO_SNDBUF: - if (so->so_snd.sb_state & SS_CANTSENDMORE) - return (EINVAL); + if (so->so_snd.sb_state & SS_CANTSENDMORE) { + error = EINVAL; + break; + } if (sbcheckreserve(cnt, so->so_snd.sb_wat) || - sbreserve(so, &so->so_snd, cnt)) - return (ENOBUFS); + sbreserve(so, &so->so_snd, cnt)) { + error = ENOBUFS; + break; + } so->so_snd.sb_wat = cnt; break; case SO_RCVBUF: - if (so->so_rcv.sb_state & SS_CANTRCVMORE) - return (EINVAL); + if (so->so_rcv.sb_state & SS_CANTRCVMORE) { + error = EINVAL; + break; + } if (sbcheckreserve(cnt, so->so_rcv.sb_wat) || - sbreserve(so, &so->so_rcv, cnt)) - return (ENOBUFS); + sbreserve(so, &so->so_rcv, cnt)) { + error = ENOBUFS; + break; + } so->so_rcv.sb_wat = cnt; break; @@ -1884,6 +1903,7 @@ sosetopt(struct socket *so, int level, i so->so_rcv.sb_hiwat : cnt; break; } + sounlock(so); break; } @@ -1903,8 +1923,9 @@ sosetopt(struct socket *so, int level, i return (EDOM); if (nsecs == 0) nsecs = INFSLP; - switch (optname) { + solock(so); + switch (optname) { case SO_SNDTIMEO: so->so_snd.sb_timeo_nsecs = nsecs; break; @@ -1912,6 +1933,7 @@ sosetopt(struct socket *so, int level, i so->so_rcv.sb_timeo_nsecs = nsecs; break; } + sounlock(so); break; } @@ -1923,19 +1945,20 @@ sosetopt(struct socket *so, int level, i so->so_proto->pr_domain; level = dom->dom_protosw->pr_protocol; + solock(so); error = (*so->so_proto->pr_ctloutput) (PRCO_SETOPT, so, level, optname, m); - return (error); - } - error = ENOPROTOOPT; + sounlock(so); + } else + error = ENOPROTOOPT; break; - #ifdef SOCKET_SPLICE case SO_SPLICE: + solock(so); if (m == NULL) { error = sosplice(so, -1, 0, NULL); } else if (m->m_len < sizeof(int)) { - return (EINVAL); + error = EINVAL; } else if (m->m_len < sizeof(struct splice)) { error = sosplice(so, *mtod(m, int *), 0, NULL); } else { @@ -1944,16 +1967,13 @@ sosetopt(struct socket *so, int level, i mtod(m, struct splice *)->sp_max, &mtod(m, struct splice *)->sp_idle); } + sounlock(so); break; #endif /* SOCKET_SPLICE */ default: error = ENOPROTOOPT; break; - } - if (error == 0 && so->so_proto->pr_ctloutput) { - (*so->so_proto->pr_ctloutput)(PRCO_SETOPT, so, - level, optname, m); } } Index: sys/kern/uipc_syscalls.c =================================================================== RCS file: /cvs/src/sys/kern/uipc_syscalls.c,v retrieving revision 1.212 diff -u -p -r1.212 uipc_syscalls.c --- sys/kern/uipc_syscalls.c 10 Feb 2023 14:34:17 -0000 1.212 +++ sys/kern/uipc_syscalls.c 22 Jul 2023 14:38:43 -0000 @@ -1232,9 +1232,7 @@ sys_setsockopt(struct proc *p, void *v, m->m_len = SCARG(uap, valsize); } so = fp->f_data; - solock(so); error = sosetopt(so, SCARG(uap, level), SCARG(uap, name), m); - sounlock(so); bad: m_freem(m); FRELE(fp, p); Index: sys/net/bfd.c =================================================================== RCS file: /cvs/src/sys/net/bfd.c,v retrieving revision 1.79 diff -u -p -r1.79 bfd.c --- sys/net/bfd.c 12 Jul 2023 16:10:45 -0000 1.79 +++ sys/net/bfd.c 22 Jul 2023 14:38:43 -0000 @@ -452,9 +452,7 @@ bfd_listener(struct bfd_config *bfd, uns mopt->m_len = sizeof(int); ip = mtod(mopt, int *); *ip = MAXTTL; - solock(so); error = sosetopt(so, IPPROTO_IP, IP_MINTTL, mopt); - sounlock(so); m_freem(mopt); if (error) { printf("%s: sosetopt error %d\n", @@ -531,9 +529,7 @@ bfd_sender(struct bfd_config *bfd, unsig mopt->m_len = sizeof(int); ip = mtod(mopt, int *); *ip = IP_PORTRANGE_HIGH; - solock(so); error = sosetopt(so, IPPROTO_IP, IP_PORTRANGE, mopt); - sounlock(so); m_freem(mopt); if (error) { printf("%s: sosetopt error %d\n", @@ -545,9 +541,7 @@ bfd_sender(struct bfd_config *bfd, unsig mopt->m_len = sizeof(int); ip = mtod(mopt, int *); *ip = MAXTTL; - solock(so); error = sosetopt(so, IPPROTO_IP, IP_TTL, mopt); - sounlock(so); m_freem(mopt); if (error) { printf("%s: sosetopt error %d\n", @@ -559,9 +553,7 @@ bfd_sender(struct bfd_config *bfd, unsig mopt->m_len = sizeof(int); ip = mtod(mopt, int *); *ip = IPTOS_PREC_INTERNETCONTROL; - solock(so); error = sosetopt(so, IPPROTO_IP, IP_TOS, mopt); - sounlock(so); m_freem(mopt); if (error) { printf("%s: sosetopt error %d\n", Index: sys/net/if_vxlan.c =================================================================== RCS file: /cvs/src/sys/net/if_vxlan.c,v retrieving revision 1.92 diff -u -p -r1.92 if_vxlan.c --- sys/net/if_vxlan.c 13 Apr 2023 02:19:05 -0000 1.92 +++ sys/net/if_vxlan.c 22 Jul 2023 14:38:43 -0000 @@ -934,9 +934,9 @@ vxlan_tep_add_addr(struct vxlan_softc *s goto free; solock(so); - sotoinpcb(so)->inp_upcall = vxlan_input; sotoinpcb(so)->inp_upcall_arg = vt; + sounlock(so); m_inithdr(&m); m.m_len = sizeof(vt->vt_rdomain); @@ -973,12 +973,12 @@ vxlan_tep_add_addr(struct vxlan_softc *s unhandled_af(vt->vt_af); } + solock(so); error = sobind(so, &m, curproc); + sounlock(so); if (error != 0) goto close; - sounlock(so); - rw_assert_wrlock(&vxlan_lock); TAILQ_INSERT_TAIL(&vxlan_teps, vt, vt_entry); @@ -987,7 +987,6 @@ vxlan_tep_add_addr(struct vxlan_softc *s return (0); close: - sounlock(so); soclose(so, MSG_DONTWAIT); free: free(vt, M_DEVBUF, sizeof(*vt)); Index: sys/net/if_wg.c =================================================================== RCS file: /cvs/src/sys/net/if_wg.c,v retrieving revision 1.28 diff -u -p -r1.28 if_wg.c --- sys/net/if_wg.c 1 Jun 2023 18:57:53 -0000 1.28 +++ sys/net/if_wg.c 22 Jul 2023 14:38:43 -0000 @@ -720,14 +720,16 @@ wg_socket_open(struct socket **so, int a solock(*so); sotoinpcb(*so)->inp_upcall = wg_input; sotoinpcb(*so)->inp_upcall_arg = upcall_arg; + sounlock(*so); if ((ret = sosetopt(*so, SOL_SOCKET, SO_RTABLE, &mrtable)) == 0) { + solock(*so); if ((ret = sobind(*so, &mhostnam, curproc)) == 0) { *port = sotoinpcb(*so)->inp_lport; *rtable = sotoinpcb(*so)->inp_rtableid; } + sounlock(*so); } - sounlock(*so); if (ret != 0) wg_socket_close(so); Index: sys/nfs/krpc_subr.c =================================================================== RCS file: /cvs/src/sys/nfs/krpc_subr.c,v retrieving revision 1.37 diff -u -p -r1.37 krpc_subr.c --- sys/nfs/krpc_subr.c 6 Jun 2022 14:45:41 -0000 1.37 +++ sys/nfs/krpc_subr.c 22 Jul 2023 14:38:43 -0000 @@ -239,9 +239,7 @@ krpc_call(struct sockaddr_in *sa, u_int tv.tv_usec = 0; memcpy(mtod(m, struct timeval *), &tv, sizeof tv); m->m_len = sizeof(tv); - solock(so); error = sosetopt(so, SOL_SOCKET, SO_RCVTIMEO, m); - sounlock(so); m_freem(m); if (error) goto out; @@ -255,9 +253,7 @@ krpc_call(struct sockaddr_in *sa, u_int on = mtod(m, int32_t *); m->m_len = sizeof(*on); *on = 1; - solock(so); error = sosetopt(so, SOL_SOCKET, SO_BROADCAST, m); - sounlock(so); m_freem(m); if (error) goto out; @@ -272,9 +268,7 @@ krpc_call(struct sockaddr_in *sa, u_int mopt->m_len = sizeof(int); ip = mtod(mopt, int *); *ip = IP_PORTRANGE_LOW; - solock(so); error = sosetopt(so, IPPROTO_IP, IP_PORTRANGE, mopt); - sounlock(so); m_freem(mopt); if (error) goto out; @@ -299,9 +293,7 @@ krpc_call(struct sockaddr_in *sa, u_int mopt->m_len = sizeof(int); ip = mtod(mopt, int *); *ip = IP_PORTRANGE_DEFAULT; - solock(so); error = sosetopt(so, IPPROTO_IP, IP_PORTRANGE, mopt); - sounlock(so); m_freem(mopt); if (error) goto out; Index: sys/nfs/nfs_socket.c =================================================================== RCS file: /cvs/src/sys/nfs/nfs_socket.c,v retrieving revision 1.143 diff -u -p -r1.143 nfs_socket.c --- sys/nfs/nfs_socket.c 13 Aug 2022 21:01:46 -0000 1.143 +++ sys/nfs/nfs_socket.c 22 Jul 2023 14:38:43 -0000 @@ -258,7 +258,6 @@ nfs_connect(struct nfsmount *nmp, struct MGET(nam, M_WAIT, MT_SONAME); so = nmp->nm_so; - solock(so); nmp->nm_soflags = so->so_proto->pr_flags; /* @@ -282,7 +281,9 @@ nfs_connect(struct nfsmount *nmp, struct sin->sin_family = AF_INET; sin->sin_addr.s_addr = INADDR_ANY; sin->sin_port = htons(0); + solock(so); error = sobind(so, nam, &proc0); + sounlock(so); if (error) goto bad; @@ -294,6 +295,7 @@ nfs_connect(struct nfsmount *nmp, struct goto bad; } + solock(so); /* * Protocols that do not require connections may be optionally left * unconnected for servers that reply from a port other than NFS_PORT. @@ -301,12 +303,12 @@ nfs_connect(struct nfsmount *nmp, struct if (nmp->nm_flag & NFSMNT_NOCONN) { if (nmp->nm_soflags & PR_CONNREQUIRED) { error = ENOTCONN; - goto bad; + goto bad_locked; } } else { error = soconnect(so, nmp->nm_nam); if (error) - goto bad; + goto bad_locked; /* * Wait for the connection to complete. Cribbed from the @@ -320,13 +322,13 @@ nfs_connect(struct nfsmount *nmp, struct so->so_error == 0 && rep && (error = nfs_sigintr(nmp, rep, rep->r_procp)) != 0){ so->so_state &= ~SS_ISCONNECTING; - goto bad; + goto bad_locked; } } if (so->so_error) { error = so->so_error; so->so_error = 0; - goto bad; + goto bad_locked; } } /* @@ -338,6 +340,7 @@ nfs_connect(struct nfsmount *nmp, struct so->so_snd.sb_timeo_nsecs = SEC_TO_NSEC(5); else so->so_snd.sb_timeo_nsecs = INFSLP; + sounlock(so); if (nmp->nm_sotype == SOCK_DGRAM) { sndreserve = nmp->nm_wsize + NFS_MAXPKTHDR; rcvreserve = (max(nmp->nm_rsize, nmp->nm_readdirsize) + @@ -360,9 +363,10 @@ nfs_connect(struct nfsmount *nmp, struct } else { panic("%s: nm_sotype %d", __func__, nmp->nm_sotype); } + solock(so); error = soreserve(so, sndreserve, rcvreserve); if (error) - goto bad; + goto bad_locked; so->so_rcv.sb_flags |= SB_NOINTR; so->so_snd.sb_flags |= SB_NOINTR; sounlock(so); @@ -377,8 +381,9 @@ nfs_connect(struct nfsmount *nmp, struct nmp->nm_timeouts = 0; return (0); -bad: +bad_locked: sounlock(so); +bad: m_freem(mopt); m_freem(nam); Index: sys/nfs/nfs_syscalls.c =================================================================== RCS file: /cvs/src/sys/nfs/nfs_syscalls.c,v retrieving revision 1.118 diff -u -p -r1.118 nfs_syscalls.c --- sys/nfs/nfs_syscalls.c 6 Jun 2022 14:45:41 -0000 1.118 +++ sys/nfs/nfs_syscalls.c 22 Jul 2023 14:38:43 -0000 @@ -249,8 +249,8 @@ nfssvc_addsock(struct file *fp, struct m siz = NFS_MAXPACKET; solock(so); error = soreserve(so, siz, siz); + sounlock(so); if (error) { - sounlock(so); m_freem(mynam); return (error); } @@ -275,6 +275,7 @@ nfssvc_addsock(struct file *fp, struct m sosetopt(so, IPPROTO_TCP, TCP_NODELAY, m); m_freem(m); } + solock(so); so->so_rcv.sb_flags &= ~SB_NOINTR; so->so_rcv.sb_timeo_nsecs = INFSLP; so->so_snd.sb_flags &= ~SB_NOINTR;