The branch main has been updated by markj:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=bd4a39cc93d9faf8b5c000855d5aa90df592dd49

commit bd4a39cc93d9faf8b5c000855d5aa90df592dd49
Author:     Mark Johnston <ma...@freebsd.org>
AuthorDate: 2021-09-07 18:49:53 +0000
Commit:     Mark Johnston <ma...@freebsd.org>
CommitDate: 2021-09-07 21:11:43 +0000

    socket: Properly interlock when transitioning to a listening socket
    
    Currently, most protocols implement pru_listen with something like the
    following:
    
            SOCK_LOCK(so);
            error = solisten_proto_check(so);
            if (error) {
                    SOCK_UNLOCK(so);
                    return (error);
            }
            solisten_proto(so);
            SOCK_UNLOCK(so);
    
    solisten_proto_check() fails if the socket is connected or connecting.
    However, the socket lock is not used during I/O, so this pattern is
    racy.
    
    The change modifies solisten_proto_check() to additionally acquire
    socket buffer locks, and the calling thread holds them until
    solisten_proto() or solisten_proto_abort() is called.  Now that the
    socket buffer locks are preserved across a listen(2), this change allows
    socket I/O paths to properly interlock with listen(2).
    
    This fixes a large number of syzbot reports, only one is listed below
    and the rest will be dup'ed to it.
    
    Reported by:    syzbot+9fece8a63c0e27273...@syzkaller.appspotmail.com
    Reviewed by:    tuexen, gallatin
    MFC after:      1 month
    Sponsored by:   The FreeBSD Foundation
    Differential Revision:  https://reviews.freebsd.org/D31659
---
 sys/kern/uipc_socket.c                             | 58 ++++++++++++++++++----
 sys/kern/uipc_usrreq.c                             | 28 +++++++----
 sys/netgraph/bluetooth/socket/ng_btsocket_l2cap.c  |  3 ++
 sys/netgraph/bluetooth/socket/ng_btsocket_rfcomm.c |  5 +-
 sys/netinet/sctp_usrreq.c                          | 41 ++++++++-------
 sys/netinet/tcp_usrreq.c                           | 40 ++++++++++++---
 sys/sys/socketvar.h                                |  1 +
 7 files changed, 130 insertions(+), 46 deletions(-)

diff --git a/sys/kern/uipc_socket.c b/sys/kern/uipc_socket.c
index 5932c28aca2f..b26d0591cbdd 100644
--- a/sys/kern/uipc_socket.c
+++ b/sys/kern/uipc_socket.c
@@ -418,12 +418,14 @@ soalloc(struct vnet *vnet)
         * a feature to change class of an existing lock, so we use DUPOK.
         */
        mtx_init(&so->so_lock, "socket", NULL, MTX_DEF | MTX_DUPOK);
+       so->so_snd.sb_mtx = &so->so_snd_mtx;
+       so->so_rcv.sb_mtx = &so->so_rcv_mtx;
        SOCKBUF_LOCK_INIT(&so->so_snd, "so_snd");
        SOCKBUF_LOCK_INIT(&so->so_rcv, "so_rcv");
        so->so_rcv.sb_sel = &so->so_rdsel;
        so->so_snd.sb_sel = &so->so_wrsel;
-       sx_init(&so->so_snd.sb_sx, "so_snd_sx");
-       sx_init(&so->so_rcv.sb_sx, "so_rcv_sx");
+       sx_init(&so->so_snd_sx, "so_snd_sx");
+       sx_init(&so->so_rcv_sx, "so_rcv_sx");
        TAILQ_INIT(&so->so_snd.sb_aiojobq);
        TAILQ_INIT(&so->so_rcv.sb_aiojobq);
        TASK_INIT(&so->so_snd.sb_aiotask, 0, soaio_snd, so);
@@ -487,8 +489,8 @@ sodealloc(struct socket *so)
                if (so->so_snd.sb_hiwat)
                        (void)chgsbsize(so->so_cred->cr_uidinfo,
                            &so->so_snd.sb_hiwat, 0, RLIM_INFINITY);
-               sx_destroy(&so->so_snd.sb_sx);
-               sx_destroy(&so->so_rcv.sb_sx);
+               sx_destroy(&so->so_snd_sx);
+               sx_destroy(&so->so_rcv_sx);
                SOCKBUF_LOCK_DESTROY(&so->so_snd);
                SOCKBUF_LOCK_DESTROY(&so->so_rcv);
        }
@@ -899,18 +901,48 @@ solisten(struct socket *so, int backlog, struct thread 
*td)
        return (error);
 }
 
+/*
+ * Prepare for a call to solisten_proto().  Acquire all socket buffer locks in
+ * order to interlock with socket I/O.
+ */
 int
 solisten_proto_check(struct socket *so)
 {
-
        SOCK_LOCK_ASSERT(so);
 
-       if (so->so_state & (SS_ISCONNECTED | SS_ISCONNECTING |
-           SS_ISDISCONNECTING))
+       if ((so->so_state & (SS_ISCONNECTED | SS_ISCONNECTING |
+           SS_ISDISCONNECTING)) != 0)
                return (EINVAL);
+
+       /*
+        * Sleeping is not permitted here, so simply fail if userspace is
+        * attempting to transmit or receive on the socket.  This kind of
+        * transient failure is not ideal, but it should occur only if userspace
+        * is misusing the socket interfaces.
+        */
+       if (!sx_try_xlock(&so->so_snd_sx))
+               return (EAGAIN);
+       if (!sx_try_xlock(&so->so_rcv_sx)) {
+               sx_xunlock(&so->so_snd_sx);
+               return (EAGAIN);
+       }
+       mtx_lock(&so->so_snd_mtx);
+       mtx_lock(&so->so_rcv_mtx);
        return (0);
 }
 
+/*
+ * Undo the setup done by solisten_proto_check().
+ */
+void
+solisten_proto_abort(struct socket *so)
+{
+       mtx_unlock(&so->so_snd_mtx);
+       mtx_unlock(&so->so_rcv_mtx);
+       sx_xunlock(&so->so_snd_sx);
+       sx_xunlock(&so->so_rcv_sx);
+}
+
 void
 solisten_proto(struct socket *so, int backlog)
 {
@@ -920,6 +952,9 @@ solisten_proto(struct socket *so, int backlog)
        sbintime_t sbrcv_timeo, sbsnd_timeo;
 
        SOCK_LOCK_ASSERT(so);
+       KASSERT((so->so_state & (SS_ISCONNECTED | SS_ISCONNECTING |
+           SS_ISDISCONNECTING)) == 0,
+           ("%s: bad socket state %p", __func__, so));
 
        if (SOLISTENING(so))
                goto listening;
@@ -938,10 +973,6 @@ solisten_proto(struct socket *so, int backlog)
 
        sbdestroy(&so->so_snd, so);
        sbdestroy(&so->so_rcv, so);
-       sx_destroy(&so->so_snd.sb_sx);
-       sx_destroy(&so->so_rcv.sb_sx);
-       SOCKBUF_LOCK_DESTROY(&so->so_snd);
-       SOCKBUF_LOCK_DESTROY(&so->so_rcv);
 
 #ifdef INVARIANTS
        bzero(&so->so_rcv,
@@ -974,6 +1005,11 @@ listening:
        if (backlog < 0 || backlog > somaxconn)
                backlog = somaxconn;
        so->sol_qlimit = backlog;
+
+       mtx_unlock(&so->so_snd_mtx);
+       mtx_unlock(&so->so_rcv_mtx);
+       sx_xunlock(&so->so_snd_sx);
+       sx_xunlock(&so->so_rcv_sx);
 }
 
 /*
diff --git a/sys/kern/uipc_usrreq.c b/sys/kern/uipc_usrreq.c
index c736f35b5ee0..5add930bfa8e 100644
--- a/sys/kern/uipc_usrreq.c
+++ b/sys/kern/uipc_usrreq.c
@@ -890,13 +890,17 @@ uipc_listen(struct socket *so, int backlog, struct thread 
*td)
        if (so->so_type != SOCK_STREAM && so->so_type != SOCK_SEQPACKET)
                return (EOPNOTSUPP);
 
+       /*
+        * Synchronize with concurrent connection attempts.
+        */
+       error = 0;
        unp = sotounpcb(so);
-       KASSERT(unp != NULL, ("uipc_listen: unp == NULL"));
-
        UNP_PCB_LOCK(unp);
-       if (unp->unp_vnode == NULL) {
-               /* Already connected or not bound to an address. */
-               error = unp->unp_conn != NULL ? EINVAL : EDESTADDRREQ;
+       if (unp->unp_conn != NULL || (unp->unp_flags & UNP_CONNECTING) != 0)
+               error = EINVAL;
+       else if (unp->unp_vnode == NULL)
+               error = EDESTADDRREQ;
+       if (error != 0) {
                UNP_PCB_UNLOCK(unp);
                return (error);
        }
@@ -1523,6 +1527,7 @@ unp_connectat(int fd, struct socket *so, struct sockaddr 
*nam,
        bcopy(soun->sun_path, buf, len);
        buf[len] = 0;
 
+       error = 0;
        unp = sotounpcb(so);
        UNP_PCB_LOCK(unp);
        for (;;) {
@@ -1538,13 +1543,16 @@ unp_connectat(int fd, struct socket *so, struct 
sockaddr *nam,
                 * lock the peer socket, to ensure that unp_conn cannot
                 * transition between two valid sockets while locks are dropped.
                 */
-               if (unp->unp_conn != NULL) {
-                       UNP_PCB_UNLOCK(unp);
-                       return (EISCONN);
+               if (SOLISTENING(so))
+                       error = EOPNOTSUPP;
+               else if (unp->unp_conn != NULL)
+                       error = EISCONN;
+               else if ((unp->unp_flags & UNP_CONNECTING) != 0) {
+                       error = EALREADY;
                }
-               if ((unp->unp_flags & UNP_CONNECTING) != 0) {
+               if (error != 0) {
                        UNP_PCB_UNLOCK(unp);
-                       return (EALREADY);
+                       return (error);
                }
                if (unp->unp_pairbusy > 0) {
                        unp->unp_flags |= UNP_WAITING;
diff --git a/sys/netgraph/bluetooth/socket/ng_btsocket_l2cap.c 
b/sys/netgraph/bluetooth/socket/ng_btsocket_l2cap.c
index cd620fe3aef9..18d7a89b7a2f 100644
--- a/sys/netgraph/bluetooth/socket/ng_btsocket_l2cap.c
+++ b/sys/netgraph/bluetooth/socket/ng_btsocket_l2cap.c
@@ -2506,14 +2506,17 @@ ng_btsocket_l2cap_listen(struct socket *so, int 
backlog, struct thread *td)
        if (error != 0)
                goto out;
        if (pcb == NULL) {
+               solisten_proto_abort(so);
                error = EINVAL;
                goto out;
        }
        if (ng_btsocket_l2cap_node == NULL) {
+               solisten_proto_abort(so);
                error = EINVAL;
                goto out;
        }
        if (pcb->psm == 0) {
+               solisten_proto_abort(so);
                error = EADDRNOTAVAIL;
                goto out;
        }
diff --git a/sys/netgraph/bluetooth/socket/ng_btsocket_rfcomm.c 
b/sys/netgraph/bluetooth/socket/ng_btsocket_rfcomm.c
index c0704bce55fa..5b7bbeb45407 100644
--- a/sys/netgraph/bluetooth/socket/ng_btsocket_rfcomm.c
+++ b/sys/netgraph/bluetooth/socket/ng_btsocket_rfcomm.c
@@ -894,6 +894,7 @@ ng_btsocket_rfcomm_listen(struct socket *so, int backlog, 
struct thread *td)
                 * from socreate()
                 */
                if (l2so == NULL) {
+                       solisten_proto_abort(so);
                        error = socreate_error;
                        goto out;
                }
@@ -907,8 +908,10 @@ ng_btsocket_rfcomm_listen(struct socket *so, int backlog, 
struct thread *td)
                 */
                error = ng_btsocket_rfcomm_session_create(&s, l2so,
                                        NG_HCI_BDADDR_ANY, NULL, td);
-               if (error != 0)
+               if (error != 0) {
+                       solisten_proto_abort(so);
                        goto out;
+               }
                l2so = NULL;
        }
        SOCK_LOCK(so);
diff --git a/sys/netinet/sctp_usrreq.c b/sys/netinet/sctp_usrreq.c
index 62d6996ab60d..76d73a11304a 100644
--- a/sys/netinet/sctp_usrreq.c
+++ b/sys/netinet/sctp_usrreq.c
@@ -7201,7 +7201,8 @@ sctp_listen(struct socket *so, int backlog, struct thread 
*p)
                        }
                }
        }
-       SCTP_INP_RLOCK(inp);
+       SCTP_INP_INFO_WLOCK();
+       SCTP_INP_WLOCK(inp);
 #ifdef SCTP_LOCK_LOGGING
        if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LOCK_LOGGING_ENABLE) {
                sctp_log_lock(inp, (struct sctp_tcb *)NULL, SCTP_LOG_LOCK_SOCK);
@@ -7209,10 +7210,9 @@ sctp_listen(struct socket *so, int backlog, struct 
thread *p)
 #endif
        SOCK_LOCK(so);
        error = solisten_proto_check(so);
-       SOCK_UNLOCK(so);
        if (error) {
-               SCTP_INP_RUNLOCK(inp);
-               return (error);
+               SOCK_UNLOCK(so);
+               goto out;
        }
        if ((sctp_is_feature_on(inp, SCTP_PCB_FLAGS_PORTREUSE)) &&
            (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) {
@@ -7223,39 +7223,44 @@ sctp_listen(struct socket *so, int backlog, struct 
thread *p)
                 * move the guy that was listener to the TCP Pool.
                 */
                if (sctp_swap_inpcb_for_listen(inp)) {
-                       SCTP_INP_RUNLOCK(inp);
-                       SCTP_LTRACE_ERR_RET(inp, NULL, NULL, 
SCTP_FROM_SCTP_USRREQ, EADDRINUSE);
-                       return (EADDRINUSE);
+                       SOCK_UNLOCK(so);
+                       solisten_proto_abort(so);
+                       error = EADDRINUSE;
+                       SCTP_LTRACE_ERR_RET(inp, NULL, NULL, 
SCTP_FROM_SCTP_USRREQ, error);
+                       goto out;
                }
        }
 
        if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) &&
            (inp->sctp_flags & SCTP_PCB_FLAGS_CONNECTED)) {
-               /* We are already connected AND the TCP model */
-               SCTP_INP_RUNLOCK(inp);
-               SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, 
EADDRINUSE);
-               return (EADDRINUSE);
+               SOCK_UNLOCK(so);
+               solisten_proto_abort(so);
+               error = EADDRINUSE;
+               SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, 
error);
+               goto out;
        }
-       SCTP_INP_RUNLOCK(inp);
        if (inp->sctp_flags & SCTP_PCB_FLAGS_UNBOUND) {
-               /* We must do a bind. */
-               if ((error = sctp_inpcb_bind(so, NULL, NULL, p))) {
+               if ((error = sctp_inpcb_bind_locked(inp, NULL, NULL, p))) {
+                       SOCK_UNLOCK(so);
+                       solisten_proto_abort(so);
                        /* bind error, probably perm */
-                       return (error);
+                       goto out;
                }
        }
-       SCTP_INP_WLOCK(inp);
        if ((inp->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE) == 0) {
-               SOCK_LOCK(so);
                solisten_proto(so, backlog);
-               SOCK_UNLOCK(so);
+       } else {
+               solisten_proto_abort(so);
        }
+       SOCK_UNLOCK(so);
        if (backlog > 0) {
                inp->sctp_flags |= SCTP_PCB_FLAGS_ACCEPTING;
        } else {
                inp->sctp_flags &= ~SCTP_PCB_FLAGS_ACCEPTING;
        }
+out:
        SCTP_INP_WUNLOCK(inp);
+       SCTP_INP_INFO_WUNLOCK();
        return (error);
 }
 
diff --git a/sys/netinet/tcp_usrreq.c b/sys/netinet/tcp_usrreq.c
index bcd7d18d9d62..3a1608cc106a 100644
--- a/sys/netinet/tcp_usrreq.c
+++ b/sys/netinet/tcp_usrreq.c
@@ -457,10 +457,15 @@ tcp_usr_listen(struct socket *so, int backlog, struct 
thread *td)
        TCPDEBUG1();
        SOCK_LOCK(so);
        error = solisten_proto_check(so);
-       INP_HASH_WLOCK(&V_tcbinfo);
-       if (error == 0 && inp->inp_lport == 0)
-               error = in_pcbbind(inp, (struct sockaddr *)0, td->td_ucred);
-       INP_HASH_WUNLOCK(&V_tcbinfo);
+       if (error != 0) {
+               SOCK_UNLOCK(so);
+               goto out;
+       }
+       if (inp->inp_lport == 0) {
+               INP_HASH_WLOCK(&V_tcbinfo);
+               error = in_pcbbind(inp, NULL, td->td_ucred);
+               INP_HASH_WUNLOCK(&V_tcbinfo);
+       }
        if (error == 0) {
                tcp_state_change(tp, TCPS_LISTEN);
                solisten_proto(so, backlog);
@@ -468,6 +473,8 @@ tcp_usr_listen(struct socket *so, int backlog, struct 
thread *td)
                if ((so->so_options & SO_NO_OFFLOAD) == 0)
                        tcp_offload_listen_start(tp);
 #endif
+       } else {
+               solisten_proto_abort(so);
        }
        SOCK_UNLOCK(so);
 
@@ -504,12 +511,16 @@ tcp6_usr_listen(struct socket *so, int backlog, struct 
thread *td)
        TCPDEBUG1();
        SOCK_LOCK(so);
        error = solisten_proto_check(so);
+       if (error != 0) {
+               SOCK_UNLOCK(so);
+               goto out;
+       }
        INP_HASH_WLOCK(&V_tcbinfo);
-       if (error == 0 && inp->inp_lport == 0) {
+       if (inp->inp_lport == 0) {
                inp->inp_vflag &= ~INP_IPV4;
                if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0)
                        inp->inp_vflag |= INP_IPV4;
-               error = in6_pcbbind(inp, (struct sockaddr *)0, td->td_ucred);
+               error = in6_pcbbind(inp, NULL, td->td_ucred);
        }
        INP_HASH_WUNLOCK(&V_tcbinfo);
        if (error == 0) {
@@ -519,6 +530,8 @@ tcp6_usr_listen(struct socket *so, int backlog, struct 
thread *td)
                if ((so->so_options & SO_NO_OFFLOAD) == 0)
                        tcp_offload_listen_start(tp);
 #endif
+       } else {
+               solisten_proto_abort(so);
        }
        SOCK_UNLOCK(so);
 
@@ -581,6 +594,10 @@ tcp_usr_connect(struct socket *so, struct sockaddr *nam, 
struct thread *td)
                error = ECONNREFUSED;
                goto out;
        }
+       if (SOLISTENING(so)) {
+               error = EOPNOTSUPP;
+               goto out;
+       }
        tp = intotcpcb(inp);
        TCPDEBUG1();
        NET_EPOCH_ENTER(et);
@@ -643,6 +660,10 @@ tcp6_usr_connect(struct socket *so, struct sockaddr *nam, 
struct thread *td)
                error = ECONNREFUSED;
                goto out;
        }
+       if (SOLISTENING(so)) {
+               error = EINVAL;
+               goto out;
+       }
        tp = intotcpcb(inp);
        TCPDEBUG1();
 #ifdef INET
@@ -1021,6 +1042,10 @@ tcp_usr_send(struct socket *so, int flags, struct mbuf 
*m,
 
        TCPDEBUG1();
        if (nam != NULL && tp->t_state < TCPS_SYN_SENT) {
+               if (tp->t_state == TCPS_LISTEN) {
+                       error = EINVAL;
+                       goto out;
+               }
                switch (nam->sa_family) {
 #ifdef INET
                case AF_INET:
@@ -1119,6 +1144,9 @@ tcp_usr_send(struct socket *so, int flags, struct mbuf *m,
                sbappendstream(&so->so_snd, m, flags);
                m = NULL;
                if (nam && tp->t_state < TCPS_SYN_SENT) {
+                       KASSERT(tp->t_state == TCPS_CLOSED,
+                           ("%s: tp %p is listening", __func__, tp));
+
                        /*
                         * Do implied connect if not yet connected,
                         * initialize window to default value, and
diff --git a/sys/sys/socketvar.h b/sys/sys/socketvar.h
index 69e182dfa9a5..69dd1706e366 100644
--- a/sys/sys/socketvar.h
+++ b/sys/sys/socketvar.h
@@ -453,6 +453,7 @@ void        sofree(struct socket *so);
 void   sohasoutofband(struct socket *so);
 int    solisten(struct socket *so, int backlog, struct thread *td);
 void   solisten_proto(struct socket *so, int backlog);
+void   solisten_proto_abort(struct socket *so);
 int    solisten_proto_check(struct socket *so);
 int    solisten_dequeue(struct socket *, struct socket **, int);
 struct socket *
_______________________________________________
dev-commits-src-main@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/dev-commits-src-main
To unsubscribe, send any mail to "dev-commits-src-main-unsubscr...@freebsd.org"

Reply via email to