The branch main has been updated by rscheff:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=032bf749fd44ac5ff20aab2c3d8e3c05491778ea

commit 032bf749fd44ac5ff20aab2c3d8e3c05491778ea
Author:     Richard Scheffenegger <rsch...@freebsd.org>
AuthorDate: 2021-05-21 09:00:53 +0000
Commit:     Richard Scheffenegger <rsch...@freebsd.org>
CommitDate: 2021-05-21 09:07:51 +0000

    [tcp] Keep socket buffer locked until upcall
    
    r367492 would unlock the socket buffer before eventually calling the upcall.
    This leads to problematic interaction with NFS kernel server/client 
components
    (MP threads) accessing the socket buffer with potentially not correctly 
updated
    state.
    
    Reported by: rmacklem
    Reviewed By: tuexen, #transport
    Tested by: rmacklem, otis
    MFC after: 2 weeks
    Sponsored By: NetApp, Inc.
    Differential Revision: https://reviews.freebsd.org/D29690
---
 sys/netinet/tcp_input.c                  | 38 ++++++++++++++------------------
 sys/netinet/tcp_reass.c                  |  2 --
 sys/netinet/tcp_stacks/bbr.c             | 20 ++++++++---------
 sys/netinet/tcp_stacks/rack.c            | 20 ++++++++---------
 sys/netinet/tcp_stacks/rack_bbr_common.c |  1 -
 sys/netinet/tcp_var.h                    |  2 +-
 6 files changed, 37 insertions(+), 46 deletions(-)

diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c
index 49db8cc63cb3..18ef52959c15 100644
--- a/sys/netinet/tcp_input.c
+++ b/sys/netinet/tcp_input.c
@@ -1509,18 +1509,17 @@ tcp_handle_wakeup(struct tcpcb *tp, struct socket *so)
         * the TIME_WAIT state before coming here, we need
         * to check if the socket is still connected.
         */
-       if ((so->so_state & SS_ISCONNECTED) == 0)
+       if (tp == NULL) {
                return;
+       }
+       if (so == NULL) {
+               return;
+       }
        INP_LOCK_ASSERT(tp->t_inpcb);
        if (tp->t_flags & TF_WAKESOR) {
                tp->t_flags &= ~TF_WAKESOR;
-               SOCKBUF_UNLOCK_ASSERT(&so->so_rcv);
-               sorwakeup(so);
-       }
-       if (tp->t_flags & TF_WAKESOW) {
-               tp->t_flags &= ~TF_WAKESOW;
-               SOCKBUF_UNLOCK_ASSERT(&so->so_snd);
-               sowwakeup(so);
+               SOCKBUF_LOCK_ASSERT(&so->so_rcv);
+               sorwakeup_locked(so);
        }
 }
 
@@ -1898,7 +1897,7 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct 
socket *so,
                                else if (!tcp_timer_active(tp, TT_PERSIST))
                                        tcp_timer_activate(tp, TT_REXMT,
                                                      tp->t_rxtcur);
-                               tp->t_flags |= TF_WAKESOW;
+                               sowwakeup(so);
                                if (sbavail(&so->so_snd))
                                        (void) tp->t_fb->tfb_tcp_output(tp);
                                goto check_delack;
@@ -1963,8 +1962,8 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct 
socket *so,
                                m_adj(m, drop_hdrlen);  /* delayed header drop 
*/
                                sbappendstream_locked(&so->so_rcv, m, 0);
                        }
-                       SOCKBUF_UNLOCK(&so->so_rcv);
-                       tp->t_flags |= TF_WAKESOR;
+                       /* NB: sorwakeup_locked() does an implicit unlock. */
+                       sorwakeup_locked(so);
                        if (DELAY_ACK(tp, tlen)) {
                                tp->t_flags |= TF_DELACK;
                        } else {
@@ -2502,9 +2501,11 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct 
socket *so,
                 * If segment contains data or ACK, will call tcp_reass()
                 * later; if not, do so now to pass queued data to user.
                 */
-               if (tlen == 0 && (thflags & TH_FIN) == 0)
+               if (tlen == 0 && (thflags & TH_FIN) == 0) {
                        (void) tcp_reass(tp, (struct tcphdr *)0, NULL, 0,
                            (struct mbuf *)0);
+                       tcp_handle_wakeup(tp, so);
+               }
                tp->snd_wl1 = th->th_seq - 1;
                /* FALLTHROUGH */
 
@@ -2959,8 +2960,8 @@ process_ACK:
                                tp->snd_wnd = 0;
                        ourfinisacked = 0;
                }
-               SOCKBUF_UNLOCK(&so->so_snd);
-               tp->t_flags |= TF_WAKESOW;
+               /* NB: sowwakeup_locked() does an implicit unlock. */
+               sowwakeup_locked(so);
                m_freem(mfree);
                /* Detect una wraparound. */
                if (!IN_RECOVERY(tp->t_flags) &&
@@ -3181,7 +3182,6 @@ dodata:                                                   
/* XXX */
                                m_freem(m);
                        else
                                sbappendstream_locked(&so->so_rcv, m, 0);
-                       SOCKBUF_UNLOCK(&so->so_rcv);
                        tp->t_flags |= TF_WAKESOR;
                } else {
                        /*
@@ -3227,6 +3227,7 @@ dodata:                                                   
/* XXX */
                                    save_start + tlen);
                        }
                }
+               tcp_handle_wakeup(tp, so);
 #if 0
                /*
                 * Note the amount of data that peer has sent into
@@ -3250,9 +3251,8 @@ dodata:                                                   
/* XXX */
         */
        if (thflags & TH_FIN) {
                if (TCPS_HAVERCVDFIN(tp->t_state) == 0) {
-                       socantrcvmore(so);
                        /* The socket upcall is handled by socantrcvmore. */
-                       tp->t_flags &= ~TF_WAKESOR;
+                       socantrcvmore(so);
                        /*
                         * If connection is half-synchronized
                         * (ie NEEDSYN flag on) then delay ACK,
@@ -3316,7 +3316,6 @@ check_delack:
                tp->t_flags &= ~TF_DELACK;
                tcp_timer_activate(tp, TT_DELACK, tcp_delacktime);
        }
-       tcp_handle_wakeup(tp, so);
        INP_WUNLOCK(tp->t_inpcb);
        return;
 
@@ -3350,7 +3349,6 @@ dropafterack:
        TCP_PROBE3(debug__input, tp, th, m);
        tp->t_flags |= TF_ACKNOW;
        (void) tp->t_fb->tfb_tcp_output(tp);
-       tcp_handle_wakeup(tp, so);
        INP_WUNLOCK(tp->t_inpcb);
        m_freem(m);
        return;
@@ -3358,7 +3356,6 @@ dropafterack:
 dropwithreset:
        if (tp != NULL) {
                tcp_dropwithreset(m, th, tp, tlen, rstreason);
-               tcp_handle_wakeup(tp, so);
                INP_WUNLOCK(tp->t_inpcb);
        } else
                tcp_dropwithreset(m, th, NULL, tlen, rstreason);
@@ -3375,7 +3372,6 @@ drop:
 #endif
        TCP_PROBE3(debug__input, tp, th, m);
        if (tp != NULL) {
-               tcp_handle_wakeup(tp, so);
                INP_WUNLOCK(tp->t_inpcb);
        }
        m_freem(m);
diff --git a/sys/netinet/tcp_reass.c b/sys/netinet/tcp_reass.c
index 310d7d540507..8e24e7412473 100644
--- a/sys/netinet/tcp_reass.c
+++ b/sys/netinet/tcp_reass.c
@@ -959,7 +959,6 @@ new_entry:
                } else {
                        sbappendstream_locked(&so->so_rcv, m, 0);
                }
-               SOCKBUF_UNLOCK(&so->so_rcv);
                tp->t_flags |= TF_WAKESOR;
                return (flags);
        }
@@ -1108,7 +1107,6 @@ present:
 #ifdef TCP_REASS_LOGGING
        tcp_reass_log_dump(tp);
 #endif
-       SOCKBUF_UNLOCK(&so->so_rcv);
        tp->t_flags |= TF_WAKESOR;
        return (flags);
 }
diff --git a/sys/netinet/tcp_stacks/bbr.c b/sys/netinet/tcp_stacks/bbr.c
index 22762d21c289..dd4cbfd7dc53 100644
--- a/sys/netinet/tcp_stacks/bbr.c
+++ b/sys/netinet/tcp_stacks/bbr.c
@@ -7825,8 +7825,8 @@ bbr_process_ack(struct mbuf *m, struct tcphdr *th, struct 
socket *so,
        acked_amount = min(acked, (int)sbavail(&so->so_snd));
        tp->snd_wnd -= acked_amount;
        mfree = sbcut_locked(&so->so_snd, acked_amount);
-       SOCKBUF_UNLOCK(&so->so_snd);
-       tp->t_flags |= TF_WAKESOW;
+       /* NB: sowwakeup_locked() does an implicit unlock. */
+       sowwakeup_locked(so);
        m_freem(mfree);
        if (SEQ_GT(th->th_ack, tp->snd_una)) {
                bbr_collapse_rtt(tp, bbr, TCP_REXMTVAL(tp));
@@ -8302,7 +8302,6 @@ bbr_process_data(struct mbuf *m, struct tcphdr *th, 
struct socket *so,
                                appended =
 #endif
                                        sbappendstream_locked(&so->so_rcv, m, 
0);
-                       SOCKBUF_UNLOCK(&so->so_rcv);
                        tp->t_flags |= TF_WAKESOR;
 #ifdef NETFLIX_SB_LIMITS
                        if (so->so_rcv.sb_shlim && appended != mcnt)
@@ -8353,6 +8352,7 @@ bbr_process_data(struct mbuf *m, struct tcphdr *th, 
struct socket *so,
                                    save_start + tlen);
                        }
                }
+               tcp_handle_wakeup(tp, so);
        } else {
                m_freem(m);
                thflags &= ~TH_FIN;
@@ -8364,9 +8364,8 @@ bbr_process_data(struct mbuf *m, struct tcphdr *th, 
struct socket *so,
         */
        if (thflags & TH_FIN) {
                if (TCPS_HAVERCVDFIN(tp->t_state) == 0) {
-                       socantrcvmore(so);
                        /* The socket upcall is handled by socantrcvmore. */
-                       tp->t_flags &= ~TF_WAKESOR;
+                       socantrcvmore(so);
                        /*
                         * If connection is half-synchronized (ie NEEDSYN
                         * flag on) then delay ACK, so it may be piggybacked
@@ -8557,8 +8556,8 @@ bbr_do_fastnewdata(struct mbuf *m, struct tcphdr *th, 
struct socket *so,
                        sbappendstream_locked(&so->so_rcv, m, 0);
                ctf_calc_rwin(so, tp);
        }
-       SOCKBUF_UNLOCK(&so->so_rcv);
-       tp->t_flags |= TF_WAKESOR;
+       /* NB: sorwakeup_locked() does an implicit unlock. */
+       sorwakeup_locked(so);
 #ifdef NETFLIX_SB_LIMITS
        if (so->so_rcv.sb_shlim && mcnt != appended)
                counter_fo_release(so->so_rcv.sb_shlim, mcnt - appended);
@@ -8749,7 +8748,7 @@ bbr_fastack(struct mbuf *m, struct tcphdr *th, struct 
socket *so,
                    &tcp_savetcp, 0);
 #endif
        /* Wake up the socket if we have room to write more */
-       tp->t_flags |= TF_WAKESOW;
+       sowwakeup(so);
        if (tp->snd_una == tp->snd_max) {
                /* Nothing left outstanding */
                bbr_log_progress_event(bbr, tp, ticks, PROGRESS_CLEAR, 
__LINE__);
@@ -9157,9 +9156,11 @@ bbr_do_syn_recv(struct mbuf *m, struct tcphdr *th, 
struct socket *so,
         * If segment contains data or ACK, will call tcp_reass() later; if
         * not, do so now to pass queued data to user.
         */
-       if (tlen == 0 && (thflags & TH_FIN) == 0)
+       if (tlen == 0 && (thflags & TH_FIN) == 0) {
                (void)tcp_reass(tp, (struct tcphdr *)0, NULL, 0,
                        (struct mbuf *)0);
+               tcp_handle_wakeup(tp, so);
+       }
        tp->snd_wl1 = th->th_seq - 1;
        if (bbr_process_ack(m, th, so, tp, to, tiwin, tlen, &ourfinisacked, 
thflags, &ret_val)) {
                return (ret_val);
@@ -11711,7 +11712,6 @@ bbr_do_segment(struct mbuf *m, struct tcphdr *th, 
struct socket *so,
        retval = bbr_do_segment_nounlock(m, th, so, tp,
                                         drop_hdrlen, tlen, iptos, 0, &tv);
        if (retval == 0) {
-               tcp_handle_wakeup(tp, so);
                INP_WUNLOCK(tp->t_inpcb);
        }
 }
diff --git a/sys/netinet/tcp_stacks/rack.c b/sys/netinet/tcp_stacks/rack.c
index 73b47745cbac..efab90a2e5bd 100644
--- a/sys/netinet/tcp_stacks/rack.c
+++ b/sys/netinet/tcp_stacks/rack.c
@@ -9857,8 +9857,8 @@ rack_process_ack(struct mbuf *m, struct tcphdr *th, 
struct socket *so,
        if (acked_amount && sbavail(&so->so_snd))
                rack_adjust_sendmap(rack, &so->so_snd, tp->snd_una);
        rack_log_wakeup(tp,rack, &so->so_snd, acked, 2);
-       SOCKBUF_UNLOCK(&so->so_snd);
-       tp->t_flags |= TF_WAKESOW;
+       /* NB: sowwakeup_locked() does an implicit unlock. */
+       sowwakeup_locked(so);
        m_freem(mfree);
        if (SEQ_GT(tp->snd_una, tp->snd_recover))
                tp->snd_recover = tp->snd_una;
@@ -10208,7 +10208,6 @@ rack_process_data(struct mbuf *m, struct tcphdr *th, 
struct socket *so,
                                        sbappendstream_locked(&so->so_rcv, m, 
0);
 
                        rack_log_wakeup(tp,rack, &so->so_rcv, tlen, 1);
-                       SOCKBUF_UNLOCK(&so->so_rcv);
                        tp->t_flags |= TF_WAKESOR;
 #ifdef NETFLIX_SB_LIMITS
                        if (so->so_rcv.sb_shlim && appended != mcnt)
@@ -10265,6 +10264,7 @@ rack_process_data(struct mbuf *m, struct tcphdr *th, 
struct socket *so,
                                    save_start + tlen);
                        }
                }
+               tcp_handle_wakeup(tp, so);
        } else {
                m_freem(m);
                thflags &= ~TH_FIN;
@@ -10276,9 +10276,8 @@ rack_process_data(struct mbuf *m, struct tcphdr *th, 
struct socket *so,
         */
        if (thflags & TH_FIN) {
                if (TCPS_HAVERCVDFIN(tp->t_state) == 0) {
-                       socantrcvmore(so);
                        /* The socket upcall is handled by socantrcvmore. */
-                       tp->t_flags &= ~TF_WAKESOR;
+                       socantrcvmore(so);
                        /*
                         * If connection is half-synchronized (ie NEEDSYN
                         * flag on) then delay ACK, so it may be piggybacked
@@ -10471,7 +10470,6 @@ rack_do_fastnewdata(struct mbuf *m, struct tcphdr *th, 
struct socket *so,
                ctf_calc_rwin(so, tp);
        }
        rack_log_wakeup(tp,rack, &so->so_rcv, tlen, 1);
-       SOCKBUF_UNLOCK(&so->so_rcv);
        tp->t_flags |= TF_WAKESOR;
 #ifdef NETFLIX_SB_LIMITS
        if (so->so_rcv.sb_shlim && mcnt != appended)
@@ -10631,8 +10629,7 @@ rack_fastack(struct mbuf *m, struct tcphdr *th, struct 
socket *so,
                rack_adjust_sendmap(rack, &so->so_snd, tp->snd_una);
                /* Wake up the socket if we have room to write more */
                rack_log_wakeup(tp,rack, &so->so_snd, acked, 2);
-               SOCKBUF_UNLOCK(&so->so_snd);
-               tp->t_flags |= TF_WAKESOW;
+               sowwakeup(so);
                m_freem(mfree);
                tp->t_rxtshift = 0;
                RACK_TCPT_RANGESET(tp->t_rxtcur, RACK_REXMTVAL(tp),
@@ -11070,9 +11067,11 @@ rack_do_syn_recv(struct mbuf *m, struct tcphdr *th, 
struct socket *so,
         * If segment contains data or ACK, will call tcp_reass() later; if
         * not, do so now to pass queued data to user.
         */
-       if (tlen == 0 && (thflags & TH_FIN) == 0)
+       if (tlen == 0 && (thflags & TH_FIN) == 0) {
                (void) tcp_reass(tp, (struct tcphdr *)0, NULL, 0,
                    (struct mbuf *)0);
+               tcp_handle_wakeup(tp, so);
+       }
        tp->snd_wl1 = th->th_seq - 1;
        /* For syn-recv we need to possibly update the rtt */
        if ((to->to_flags & TOF_TS) != 0 && to->to_tsecr) {
@@ -13155,8 +13154,7 @@ rack_do_compressed_ack_processing(struct tcpcb *tp, 
struct socket *so, struct mb
                        rack_adjust_sendmap(rack, &so->so_snd, tp->snd_una);
                        /* Wake up the socket if we have room to write more */
                        rack_log_wakeup(tp,rack, &so->so_snd, acked, 2);
-                       SOCKBUF_UNLOCK(&so->so_snd);
-                       tp->t_flags |= TF_WAKESOW;
+                       sowwakeup(so);
                        m_freem(mfree);
                }
                /* update progress */
diff --git a/sys/netinet/tcp_stacks/rack_bbr_common.c 
b/sys/netinet/tcp_stacks/rack_bbr_common.c
index 6475a1a01c26..501d29ac48da 100644
--- a/sys/netinet/tcp_stacks/rack_bbr_common.c
+++ b/sys/netinet/tcp_stacks/rack_bbr_common.c
@@ -533,7 +533,6 @@ ctf_do_queued_segments(struct socket *so, struct tcpcb *tp, 
int have_pkt)
                        /* We lost the tcpcb (maybe a RST came in)? */
                        return(1);
                }
-               tcp_handle_wakeup(tp, so);
        }
        return (0);
 }
diff --git a/sys/netinet/tcp_var.h b/sys/netinet/tcp_var.h
index b80746b1ede4..1742b3b1c514 100644
--- a/sys/netinet/tcp_var.h
+++ b/sys/netinet/tcp_var.h
@@ -408,7 +408,7 @@ TAILQ_HEAD(tcp_funchead, tcp_function);
 #define        TF_FORCEDATA    0x00800000      /* force out a byte */
 #define        TF_TSO          0x01000000      /* TSO enabled on this 
connection */
 #define        TF_TOE          0x02000000      /* this connection is offloaded 
*/
-#define        TF_WAKESOW      0x04000000      /* wake up send socket */
+#define        TF_UNUSED0      0x04000000      /* unused */
 #define        TF_UNUSED1      0x08000000      /* unused */
 #define        TF_LRD          0x10000000      /* Lost Retransmission 
Detection */
 #define        TF_CONGRECOVERY 0x20000000      /* congestion recovery mode */
_______________________________________________
dev-commits-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/dev-commits-src-all
To unsubscribe, send any mail to "dev-commits-src-all-unsubscr...@freebsd.org"

Reply via email to