On Fri, Jul 21, 2023 at 07:38:17PM +0200, Alexander Bluhm wrote:
> On Thu, Jul 13, 2023 at 02:22:17AM +0300, Vitaliy Makkoveev wrote:
> > This is a part of my standalone sblock() work. I need this movement
> > because buffers related SO_SND* and SO_RCV* socket options modification
> > should be protected with sblock(). However, standalone sblock() has
> > different lock orders with solock() for receive and send buffers. At
> > least sblock() for `so_snd' buffer will always be taken before solock()
> > in the sosend() path.
> > 
> > The switch() block was split by two. SO_DONTROUTE, SO_SPLICE, SO_SND*
> > and SO_RCV* cases do not require to call (*pr_ctloutput)(), so they were
> > moved to the first switch() block solock() was pushed into each case
> > individually. For SO_SND* and SO_RCV* cases solock() will be replaced by
> > sblock() in the future. SO_RTABLE case calls (*pr_ctloutput)(), but do
> > this in the special way, so it was placed to the first switch() block
> > too.
> > 
> > The second switch() block contains the cases which require to call
> > (*pr_ctloutput)(). solock() is taken around this block together with the
> > (*pr_ctloutput)() call to keep atomicy.
> 
> I did not see where (*pr_ctloutput)() is actually required.  In
> this else level == SOL_SOCKET and all pr_ctloutput functions I could
> find do nothing for case SOL_SOCKET.
> 
> So I do not see the cases where calling (*pr_ctloutput)() is required
> and where not.  What did I miss?
> 

Hmm, you a right. Except SO_RTABLE option, nothing requires to call
(*pr_ctloutput)(). So we could drop in the else branch, but keep it 
for SO_RTABLE only. This simplifies diff.

> I run regress with this and a witness kernel.  No fallout.  These
> witness warnings are alway there when I run regress.
> 
> http://bluhm.genua.de/regress/results/2023-07-21T13%3A08%3A59Z/bsdcons-ot29.txt
> 

Thanks for testing. There is updated diff below.

Index: sys/kern/uipc_socket.c
===================================================================
RCS file: /cvs/src/sys/kern/uipc_socket.c,v
retrieving revision 1.305
diff -u -p -r1.305 uipc_socket.c
--- sys/kern/uipc_socket.c      4 Jul 2023 22:28:24 -0000       1.305
+++ sys/kern/uipc_socket.c      22 Jul 2023 14:38:43 -0000
@@ -1789,12 +1789,12 @@ sosetopt(struct socket *so, int level, i
 {
        int error = 0;
 
-       soassertlocked(so);
-
        if (level != SOL_SOCKET) {
                if (so->so_proto->pr_ctloutput) {
+                       solock(so);
                        error = (*so->so_proto->pr_ctloutput)(PRCO_SETOPT, so,
                            level, optname, m);
+                       sounlock(so);
                        return (error);
                }
                error = ENOPROTOOPT;
@@ -1813,9 +1813,16 @@ sosetopt(struct socket *so, int level, i
                            mtod(m, struct linger *)->l_linger < 0 ||
                            mtod(m, struct linger *)->l_linger > SHRT_MAX)
                                return (EINVAL);
+
+                       solock(so);
                        so->so_linger = mtod(m, struct linger *)->l_linger;
-                       /* FALLTHROUGH */
+                       if (*mtod(m, int *))
+                               so->so_options |= optname;
+                       else
+                               so->so_options &= ~optname;
+                       sounlock(so);
 
+                       break;
                case SO_BINDANY:
                case SO_DEBUG:
                case SO_KEEPALIVE:
@@ -1828,12 +1835,15 @@ sosetopt(struct socket *so, int level, i
                case SO_ZEROIZE:
                        if (m == NULL || m->m_len < sizeof (int))
                                return (EINVAL);
+
+                       solock(so);
                        if (*mtod(m, int *))
                                so->so_options |= optname;
                        else
                                so->so_options &= ~optname;
-                       break;
+                       sounlock(so);
 
+                       break;
                case SO_DONTROUTE:
                        if (m == NULL || m->m_len < sizeof (int))
                                return (EINVAL);
@@ -1853,23 +1863,32 @@ sosetopt(struct socket *so, int level, i
                        cnt = *mtod(m, int *);
                        if ((long)cnt <= 0)
                                cnt = 1;
-                       switch (optname) {
 
+                       solock(so);
+                       switch (optname) {
                        case SO_SNDBUF:
-                               if (so->so_snd.sb_state & SS_CANTSENDMORE)
-                                       return (EINVAL);
+                               if (so->so_snd.sb_state & SS_CANTSENDMORE) {
+                                       error = EINVAL;
+                                       break;
+                               }
                                if (sbcheckreserve(cnt, so->so_snd.sb_wat) ||
-                                   sbreserve(so, &so->so_snd, cnt))
-                                       return (ENOBUFS);
+                                   sbreserve(so, &so->so_snd, cnt)) {
+                                       error = ENOBUFS;
+                                       break;
+                               }
                                so->so_snd.sb_wat = cnt;
                                break;
 
                        case SO_RCVBUF:
-                               if (so->so_rcv.sb_state & SS_CANTRCVMORE)
-                                       return (EINVAL);
+                               if (so->so_rcv.sb_state & SS_CANTRCVMORE) {
+                                       error = EINVAL;
+                                       break;
+                               }
                                if (sbcheckreserve(cnt, so->so_rcv.sb_wat) ||
-                                   sbreserve(so, &so->so_rcv, cnt))
-                                       return (ENOBUFS);
+                                   sbreserve(so, &so->so_rcv, cnt)) {
+                                       error = ENOBUFS;
+                                       break;
+                               }
                                so->so_rcv.sb_wat = cnt;
                                break;
 
@@ -1884,6 +1903,7 @@ sosetopt(struct socket *so, int level, i
                                    so->so_rcv.sb_hiwat : cnt;
                                break;
                        }
+                       sounlock(so);
                        break;
                    }
 
@@ -1903,8 +1923,9 @@ sosetopt(struct socket *so, int level, i
                                return (EDOM);
                        if (nsecs == 0)
                                nsecs = INFSLP;
-                       switch (optname) {
 
+                       solock(so);
+                       switch (optname) {
                        case SO_SNDTIMEO:
                                so->so_snd.sb_timeo_nsecs = nsecs;
                                break;
@@ -1912,6 +1933,7 @@ sosetopt(struct socket *so, int level, i
                                so->so_rcv.sb_timeo_nsecs = nsecs;
                                break;
                        }
+                       sounlock(so);
                        break;
                    }
 
@@ -1923,19 +1945,20 @@ sosetopt(struct socket *so, int level, i
                                    so->so_proto->pr_domain;
 
                                level = dom->dom_protosw->pr_protocol;
+                               solock(so);
                                error = (*so->so_proto->pr_ctloutput)
                                    (PRCO_SETOPT, so, level, optname, m);
-                               return (error);
-                       }
-                       error = ENOPROTOOPT;
+                               sounlock(so);
+                       } else
+                               error = ENOPROTOOPT;
                        break;
-
 #ifdef SOCKET_SPLICE
                case SO_SPLICE:
+                       solock(so);
                        if (m == NULL) {
                                error = sosplice(so, -1, 0, NULL);
                        } else if (m->m_len < sizeof(int)) {
-                               return (EINVAL);
+                               error = EINVAL;
                        } else if (m->m_len < sizeof(struct splice)) {
                                error = sosplice(so, *mtod(m, int *), 0, NULL);
                        } else {
@@ -1944,16 +1967,13 @@ sosetopt(struct socket *so, int level, i
                                    mtod(m, struct splice *)->sp_max,
                                   &mtod(m, struct splice *)->sp_idle);
                        }
+                       sounlock(so);
                        break;
 #endif /* SOCKET_SPLICE */
 
                default:
                        error = ENOPROTOOPT;
                        break;
-               }
-               if (error == 0 && so->so_proto->pr_ctloutput) {
-                       (*so->so_proto->pr_ctloutput)(PRCO_SETOPT, so,
-                           level, optname, m);
                }
        }
 
Index: sys/kern/uipc_syscalls.c
===================================================================
RCS file: /cvs/src/sys/kern/uipc_syscalls.c,v
retrieving revision 1.212
diff -u -p -r1.212 uipc_syscalls.c
--- sys/kern/uipc_syscalls.c    10 Feb 2023 14:34:17 -0000      1.212
+++ sys/kern/uipc_syscalls.c    22 Jul 2023 14:38:43 -0000
@@ -1232,9 +1232,7 @@ sys_setsockopt(struct proc *p, void *v, 
                m->m_len = SCARG(uap, valsize);
        }
        so = fp->f_data;
-       solock(so);
        error = sosetopt(so, SCARG(uap, level), SCARG(uap, name), m);
-       sounlock(so);
 bad:
        m_freem(m);
        FRELE(fp, p);
Index: sys/net/bfd.c
===================================================================
RCS file: /cvs/src/sys/net/bfd.c,v
retrieving revision 1.79
diff -u -p -r1.79 bfd.c
--- sys/net/bfd.c       12 Jul 2023 16:10:45 -0000      1.79
+++ sys/net/bfd.c       22 Jul 2023 14:38:43 -0000
@@ -452,9 +452,7 @@ bfd_listener(struct bfd_config *bfd, uns
        mopt->m_len = sizeof(int);
        ip = mtod(mopt, int *);
        *ip = MAXTTL;
-       solock(so);
        error = sosetopt(so, IPPROTO_IP, IP_MINTTL, mopt);
-       sounlock(so);
        m_freem(mopt);
        if (error) {
                printf("%s: sosetopt error %d\n",
@@ -531,9 +529,7 @@ bfd_sender(struct bfd_config *bfd, unsig
        mopt->m_len = sizeof(int);
        ip = mtod(mopt, int *);
        *ip = IP_PORTRANGE_HIGH;
-       solock(so);
        error = sosetopt(so, IPPROTO_IP, IP_PORTRANGE, mopt);
-       sounlock(so);
        m_freem(mopt);
        if (error) {
                printf("%s: sosetopt error %d\n",
@@ -545,9 +541,7 @@ bfd_sender(struct bfd_config *bfd, unsig
        mopt->m_len = sizeof(int);
        ip = mtod(mopt, int *);
        *ip = MAXTTL;
-       solock(so);
        error = sosetopt(so, IPPROTO_IP, IP_TTL, mopt);
-       sounlock(so);
        m_freem(mopt);
        if (error) {
                printf("%s: sosetopt error %d\n",
@@ -559,9 +553,7 @@ bfd_sender(struct bfd_config *bfd, unsig
        mopt->m_len = sizeof(int);
        ip = mtod(mopt, int *);
        *ip = IPTOS_PREC_INTERNETCONTROL;
-       solock(so);
        error = sosetopt(so, IPPROTO_IP, IP_TOS, mopt);
-       sounlock(so);
        m_freem(mopt);
        if (error) {
                printf("%s: sosetopt error %d\n",
Index: sys/net/if_vxlan.c
===================================================================
RCS file: /cvs/src/sys/net/if_vxlan.c,v
retrieving revision 1.92
diff -u -p -r1.92 if_vxlan.c
--- sys/net/if_vxlan.c  13 Apr 2023 02:19:05 -0000      1.92
+++ sys/net/if_vxlan.c  22 Jul 2023 14:38:43 -0000
@@ -934,9 +934,9 @@ vxlan_tep_add_addr(struct vxlan_softc *s
                goto free;
 
        solock(so);
-
        sotoinpcb(so)->inp_upcall = vxlan_input;
        sotoinpcb(so)->inp_upcall_arg = vt;
+       sounlock(so);
 
        m_inithdr(&m);
        m.m_len = sizeof(vt->vt_rdomain);
@@ -973,12 +973,12 @@ vxlan_tep_add_addr(struct vxlan_softc *s
                unhandled_af(vt->vt_af);
        }
 
+       solock(so);
        error = sobind(so, &m, curproc);
+       sounlock(so);
        if (error != 0)
                goto close;
 
-       sounlock(so);
-
        rw_assert_wrlock(&vxlan_lock);
        TAILQ_INSERT_TAIL(&vxlan_teps, vt, vt_entry);
 
@@ -987,7 +987,6 @@ vxlan_tep_add_addr(struct vxlan_softc *s
        return (0);
 
 close:
-       sounlock(so);
        soclose(so, MSG_DONTWAIT);
 free:
        free(vt, M_DEVBUF, sizeof(*vt));
Index: sys/net/if_wg.c
===================================================================
RCS file: /cvs/src/sys/net/if_wg.c,v
retrieving revision 1.28
diff -u -p -r1.28 if_wg.c
--- sys/net/if_wg.c     1 Jun 2023 18:57:53 -0000       1.28
+++ sys/net/if_wg.c     22 Jul 2023 14:38:43 -0000
@@ -720,14 +720,16 @@ wg_socket_open(struct socket **so, int a
        solock(*so);
        sotoinpcb(*so)->inp_upcall = wg_input;
        sotoinpcb(*so)->inp_upcall_arg = upcall_arg;
+       sounlock(*so);
 
        if ((ret = sosetopt(*so, SOL_SOCKET, SO_RTABLE, &mrtable)) == 0) {
+               solock(*so);
                if ((ret = sobind(*so, &mhostnam, curproc)) == 0) {
                        *port = sotoinpcb(*so)->inp_lport;
                        *rtable = sotoinpcb(*so)->inp_rtableid;
                }
+               sounlock(*so);
        }
-       sounlock(*so);
 
        if (ret != 0)
                wg_socket_close(so);
Index: sys/nfs/krpc_subr.c
===================================================================
RCS file: /cvs/src/sys/nfs/krpc_subr.c,v
retrieving revision 1.37
diff -u -p -r1.37 krpc_subr.c
--- sys/nfs/krpc_subr.c 6 Jun 2022 14:45:41 -0000       1.37
+++ sys/nfs/krpc_subr.c 22 Jul 2023 14:38:43 -0000
@@ -239,9 +239,7 @@ krpc_call(struct sockaddr_in *sa, u_int 
        tv.tv_usec = 0;
        memcpy(mtod(m, struct timeval *), &tv, sizeof tv);
        m->m_len = sizeof(tv);
-       solock(so);
        error = sosetopt(so, SOL_SOCKET, SO_RCVTIMEO, m);
-       sounlock(so);
        m_freem(m);
        if (error)
                goto out;
@@ -255,9 +253,7 @@ krpc_call(struct sockaddr_in *sa, u_int 
                on = mtod(m, int32_t *);
                m->m_len = sizeof(*on);
                *on = 1;
-               solock(so);
                error = sosetopt(so, SOL_SOCKET, SO_BROADCAST, m);
-               sounlock(so);
                m_freem(m);
                if (error)
                        goto out;
@@ -272,9 +268,7 @@ krpc_call(struct sockaddr_in *sa, u_int 
        mopt->m_len = sizeof(int);
        ip = mtod(mopt, int *);
        *ip = IP_PORTRANGE_LOW;
-       solock(so);
        error = sosetopt(so, IPPROTO_IP, IP_PORTRANGE, mopt);
-       sounlock(so);
        m_freem(mopt);
        if (error)
                goto out;
@@ -299,9 +293,7 @@ krpc_call(struct sockaddr_in *sa, u_int 
        mopt->m_len = sizeof(int);
        ip = mtod(mopt, int *);
        *ip = IP_PORTRANGE_DEFAULT;
-       solock(so);
        error = sosetopt(so, IPPROTO_IP, IP_PORTRANGE, mopt);
-       sounlock(so);
        m_freem(mopt);
        if (error)
                goto out;
Index: sys/nfs/nfs_socket.c
===================================================================
RCS file: /cvs/src/sys/nfs/nfs_socket.c,v
retrieving revision 1.143
diff -u -p -r1.143 nfs_socket.c
--- sys/nfs/nfs_socket.c        13 Aug 2022 21:01:46 -0000      1.143
+++ sys/nfs/nfs_socket.c        22 Jul 2023 14:38:43 -0000
@@ -258,7 +258,6 @@ nfs_connect(struct nfsmount *nmp, struct
                MGET(nam, M_WAIT, MT_SONAME);
 
        so = nmp->nm_so;
-       solock(so);
        nmp->nm_soflags = so->so_proto->pr_flags;
 
        /*
@@ -282,7 +281,9 @@ nfs_connect(struct nfsmount *nmp, struct
                sin->sin_family = AF_INET;
                sin->sin_addr.s_addr = INADDR_ANY;
                sin->sin_port = htons(0);
+               solock(so);
                error = sobind(so, nam, &proc0);
+               sounlock(so);
                if (error)
                        goto bad;
 
@@ -294,6 +295,7 @@ nfs_connect(struct nfsmount *nmp, struct
                        goto bad;
        }
 
+       solock(so);
        /*
         * Protocols that do not require connections may be optionally left
         * unconnected for servers that reply from a port other than NFS_PORT.
@@ -301,12 +303,12 @@ nfs_connect(struct nfsmount *nmp, struct
        if (nmp->nm_flag & NFSMNT_NOCONN) {
                if (nmp->nm_soflags & PR_CONNREQUIRED) {
                        error = ENOTCONN;
-                       goto bad;
+                       goto bad_locked;
                }
        } else {
                error = soconnect(so, nmp->nm_nam);
                if (error)
-                       goto bad;
+                       goto bad_locked;
 
                /*
                 * Wait for the connection to complete. Cribbed from the
@@ -320,13 +322,13 @@ nfs_connect(struct nfsmount *nmp, struct
                            so->so_error == 0 && rep &&
                            (error = nfs_sigintr(nmp, rep, rep->r_procp)) != 0){
                                so->so_state &= ~SS_ISCONNECTING;
-                               goto bad;
+                               goto bad_locked;
                        }
                }
                if (so->so_error) {
                        error = so->so_error;
                        so->so_error = 0;
-                       goto bad;
+                       goto bad_locked;
                }
        }
        /*
@@ -338,6 +340,7 @@ nfs_connect(struct nfsmount *nmp, struct
                so->so_snd.sb_timeo_nsecs = SEC_TO_NSEC(5);
        else
                so->so_snd.sb_timeo_nsecs = INFSLP;
+       sounlock(so);
        if (nmp->nm_sotype == SOCK_DGRAM) {
                sndreserve = nmp->nm_wsize + NFS_MAXPKTHDR;
                rcvreserve = (max(nmp->nm_rsize, nmp->nm_readdirsize) +
@@ -360,9 +363,10 @@ nfs_connect(struct nfsmount *nmp, struct
        } else {
                panic("%s: nm_sotype %d", __func__, nmp->nm_sotype);
        }
+       solock(so);
        error = soreserve(so, sndreserve, rcvreserve);
        if (error)
-               goto bad;
+               goto bad_locked;
        so->so_rcv.sb_flags |= SB_NOINTR;
        so->so_snd.sb_flags |= SB_NOINTR;
        sounlock(so);
@@ -377,8 +381,9 @@ nfs_connect(struct nfsmount *nmp, struct
        nmp->nm_timeouts = 0;
        return (0);
 
-bad:
+bad_locked:
        sounlock(so);
+bad:
 
        m_freem(mopt);
        m_freem(nam);
Index: sys/nfs/nfs_syscalls.c
===================================================================
RCS file: /cvs/src/sys/nfs/nfs_syscalls.c,v
retrieving revision 1.118
diff -u -p -r1.118 nfs_syscalls.c
--- sys/nfs/nfs_syscalls.c      6 Jun 2022 14:45:41 -0000       1.118
+++ sys/nfs/nfs_syscalls.c      22 Jul 2023 14:38:43 -0000
@@ -249,8 +249,8 @@ nfssvc_addsock(struct file *fp, struct m
                siz = NFS_MAXPACKET;
        solock(so);
        error = soreserve(so, siz, siz); 
+       sounlock(so);
        if (error) {
-               sounlock(so);
                m_freem(mynam);
                return (error);
        }
@@ -275,6 +275,7 @@ nfssvc_addsock(struct file *fp, struct m
                sosetopt(so, IPPROTO_TCP, TCP_NODELAY, m);
                m_freem(m);
        }
+       solock(so);
        so->so_rcv.sb_flags &= ~SB_NOINTR;
        so->so_rcv.sb_timeo_nsecs = INFSLP;
        so->so_snd.sb_flags &= ~SB_NOINTR;

Reply via email to