The branch main has been updated by tuexen:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=2b5de4330ee1e99a7b5aacef0ff99febe5d6fc42

commit 2b5de4330ee1e99a7b5aacef0ff99febe5d6fc42
Author:     Michael Tuexen <tue...@freebsd.org>
AuthorDate: 2025-08-24 18:49:18 +0000
Commit:     Michael Tuexen <tue...@freebsd.org>
CommitDate: 2025-08-24 19:24:17 +0000

    tcp: improve the condition for detecting dup ACKs
    
    Take the condition of RFC 6675 into account.
    While there, remove stale comments.
    
    PR:                     282605
    Reviewed by:            cc (earlier version)
    MFC after:              1 week
    Sponsored by:           Netflix, Inc.
    Differential Revision:  https://reviews.freebsd.org/D51426
---
 sys/netinet/tcp_input.c | 499 +++++++++++++++++++++++-------------------------
 1 file changed, 236 insertions(+), 263 deletions(-)

diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c
index 6492495dc583..ec7102223c2d 100644
--- a/sys/netinet/tcp_input.c
+++ b/sys/netinet/tcp_input.c
@@ -2562,299 +2562,272 @@ tcp_do_segment(struct tcpcb *tp, struct mbuf *m, 
struct tcphdr *th,
                hhook_run_tcp_est_in(tp, th, &to);
 #endif
 
-               if (SEQ_LEQ(th->th_ack, tp->snd_una)) {
-                       maxseg = tcp_maxseg(tp);
-                       if (no_data &&
-                           (tiwin == tp->snd_wnd ||
-                           (tp->t_flags & TF_SACK_PERMIT))) {
+               if (SEQ_LT(th->th_ack, tp->snd_una)) {
+                       /* This is old ACK information, don't process it. */
+                       break;
+               }
+               if (th->th_ack == tp->snd_una) {
+                       /* Check if this is a duplicate ACK. */
+                       if ((tp->t_flags & TF_SACK_PERMIT) &&
+                           V_tcp_do_newsack) {
                                /*
-                                * If this is the first time we've seen a
-                                * FIN from the remote, this is not a
-                                * duplicate and it needs to be processed
-                                * normally.  This happens during a
-                                * simultaneous close.
+                                * If SEG.ACK == SND.UNA, RFC 6675 requires a
+                                * duplicate ACK to selectively acknowledge
+                                * at least one byte, which was not selectively
+                                * acknowledged before.
                                 */
-                               if ((thflags & TH_FIN) &&
-                                   (TCPS_HAVERCVDFIN(tp->t_state) == 0)) {
-                                       tp->t_dupacks = 0;
+                               if (sack_changed == SACK_NOCHANGE) {
                                        break;
                                }
-                               TCPSTAT_INC(tcps_rcvdupack);
-                               /*
-                                * If we have outstanding data (other than
-                                * a window probe), this is a completely
-                                * duplicate ack (ie, window info didn't
-                                * change and FIN isn't set),
-                                * the ack is the biggest we've
-                                * seen and we've seen exactly our rexmt
-                                * threshold of them, assume a packet
-                                * has been dropped and retransmit it.
-                                * Kludge snd_nxt & the congestion
-                                * window so we send only this one
-                                * packet.
-                                *
-                                * We know we're losing at the current
-                                * window size so do congestion avoidance
-                                * (set ssthresh to half the current window
-                                * and pull our congestion window back to
-                                * the new ssthresh).
-                                *
-                                * Dup acks mean that packets have left the
-                                * network (they're now cached at the receiver)
-                                * so bump cwnd by the amount in the receiver
-                                * to keep a constant cwnd packets in the
-                                * network.
-                                *
-                                * When using TCP ECN, notify the peer that
-                                * we reduced the cwnd.
-                                */
+                       } else {
                                /*
-                                * Following 2 kinds of acks should not affect
-                                * dupack counting:
-                                * 1) Old acks
-                                * 2) Acks with SACK but without any new SACK
-                                * information in them. These could result from
-                                * any anomaly in the network like a switch
-                                * duplicating packets or a possible DoS attack.
+                                * If SEG.ACK == SND.UNA, RFC 5681 requires a
+                                * duplicate ACK to have no data on it and to
+                                * not be a window update.
                                 */
-                               if (th->th_ack != tp->snd_una ||
-                                   (tcp_is_sack_recovery(tp, &to) &&
-                                   (sack_changed == SACK_NOCHANGE))) {
+                               if (!no_data || tiwin != tp->snd_wnd) {
                                        break;
-                               } else if (!tcp_timer_active(tp, TT_REXMT)) {
-                                       tp->t_dupacks = 0;
-                               } else if (++tp->t_dupacks > tcprexmtthresh ||
-                                           IN_FASTRECOVERY(tp->t_flags)) {
-                                       cc_ack_received(tp, th, nsegs,
-                                           CC_DUPACK);
-                                       if (V_tcp_do_prr &&
+                               }
+                       }
+                       /*
+                        * If this is the first time we've seen a
+                        * FIN from the remote, this is not a
+                        * duplicate ACK and it needs to be processed
+                        * normally.
+                        * This happens during a simultaneous close.
+                        */
+                       if ((thflags & TH_FIN) &&
+                           (TCPS_HAVERCVDFIN(tp->t_state) == 0)) {
+                               tp->t_dupacks = 0;
+                               break;
+                       }
+                       /* Perform duplicate ACK processing. */
+                       TCPSTAT_INC(tcps_rcvdupack);
+                       maxseg = tcp_maxseg(tp);
+                       if (!tcp_timer_active(tp, TT_REXMT)) {
+                               tp->t_dupacks = 0;
+                       } else if (++tp->t_dupacks > tcprexmtthresh ||
+                                   IN_FASTRECOVERY(tp->t_flags)) {
+                               cc_ack_received(tp, th, nsegs, CC_DUPACK);
+                               if (V_tcp_do_prr &&
+                                   IN_FASTRECOVERY(tp->t_flags) &&
+                                   (tp->t_flags & TF_SACK_PERMIT)) {
+                                       tcp_do_prr_ack(tp, th, &to,
+                                           sack_changed, &maxseg);
+                               } else if (tcp_is_sack_recovery(tp, &to) &&
                                            IN_FASTRECOVERY(tp->t_flags) &&
-                                           (tp->t_flags & TF_SACK_PERMIT)) {
-                                               tcp_do_prr_ack(tp, th, &to,
-                                                   sack_changed, &maxseg);
-                                       } else if (tcp_is_sack_recovery(tp, 
&to) &&
-                                                   
IN_FASTRECOVERY(tp->t_flags) &&
-                                                   (tp->snd_nxt == 
tp->snd_max)) {
-                                               int awnd;
+                                           (tp->snd_nxt == tp->snd_max)) {
+                                       int awnd;
 
-                                               /*
-                                                * Compute the amount of data 
in flight first.
-                                                * We can inject new data into 
the pipe iff
-                                                * we have less than ssthresh
-                                                * worth of data in flight.
-                                                */
-                                               awnd = tcp_compute_pipe(tp);
-                                               if (awnd < tp->snd_ssthresh) {
-                                                       tp->snd_cwnd += 
imax(maxseg,
-                                                           imin(2 * maxseg,
-                                                           
tp->sackhint.delivered_data));
-                                                       if (tp->snd_cwnd > 
tp->snd_ssthresh)
-                                                               tp->snd_cwnd = 
tp->snd_ssthresh;
-                                               }
-                                       } else if (tcp_is_sack_recovery(tp, 
&to) &&
-                                                   
IN_FASTRECOVERY(tp->t_flags) &&
-                                                   SEQ_LT(tp->snd_nxt, 
tp->snd_max)) {
+                                       /*
+                                        * Compute the amount of data in flight 
first.
+                                        * We can inject new data into the pipe 
iff
+                                        * we have less than ssthresh
+                                        * worth of data in flight.
+                                        */
+                                       awnd = tcp_compute_pipe(tp);
+                                       if (awnd < tp->snd_ssthresh) {
                                                tp->snd_cwnd += imax(maxseg,
                                                    imin(2 * maxseg,
                                                    
tp->sackhint.delivered_data));
-                                       } else {
-                                               tp->snd_cwnd += maxseg;
+                                               if (tp->snd_cwnd > 
tp->snd_ssthresh)
+                                                       tp->snd_cwnd = 
tp->snd_ssthresh;
                                        }
-                                       (void) tcp_output(tp);
-                                       goto drop;
-                               } else if (tp->t_dupacks == tcprexmtthresh ||
-                                           (tp->t_flags & TF_SACK_PERMIT &&
-                                            V_tcp_do_newsack &&
-                                            tp->sackhint.sacked_bytes >
-                                            (tcprexmtthresh - 1) * maxseg)) {
+                               } else if (tcp_is_sack_recovery(tp, &to) &&
+                                           IN_FASTRECOVERY(tp->t_flags) &&
+                                           SEQ_LT(tp->snd_nxt, tp->snd_max)) {
+                                       tp->snd_cwnd += imax(maxseg,
+                                           imin(2 * maxseg,
+                                           tp->sackhint.delivered_data));
+                               } else {
+                                       tp->snd_cwnd += maxseg;
+                               }
+                               (void) tcp_output(tp);
+                               goto drop;
+                       } else if (tp->t_dupacks == tcprexmtthresh ||
+                                   (tp->t_flags & TF_SACK_PERMIT &&
+                                    V_tcp_do_newsack &&
+                                    tp->sackhint.sacked_bytes >
+                                    (tcprexmtthresh - 1) * maxseg)) {
 enter_recovery:
-                                       /*
-                                        * Above is the RFC6675 trigger 
condition of
-                                        * more than (dupthresh-1)*maxseg 
sacked data.
-                                        * If the count of holes in the
-                                        * scoreboard is >= dupthresh, we could
-                                        * also enter loss recovery, but don't
-                                        * have that value readily available.
-                                        */
-                                       tp->t_dupacks = tcprexmtthresh;
-                                       tcp_seq onxt = tp->snd_nxt;
+                               /*
+                                * Above is the RFC6675 trigger condition of
+                                * more than (dupthresh-1)*maxseg sacked data.
+                                * If the count of holes in the
+                                * scoreboard is >= dupthresh, we could
+                                * also enter loss recovery, but don't
+                                * have that value readily available.
+                                */
+                               tp->t_dupacks = tcprexmtthresh;
+                               tcp_seq onxt = tp->snd_nxt;
 
-                                       /*
-                                        * If we're doing sack, check to
-                                        * see if we're already in sack
-                                        * recovery. If we're not doing sack,
-                                        * check to see if we're in newreno
-                                        * recovery.
-                                        */
-                                       if (tcp_is_sack_recovery(tp, &to)) {
-                                               if 
(IN_FASTRECOVERY(tp->t_flags)) {
-                                                       tp->t_dupacks = 0;
-                                                       break;
-                                               }
-                                       } else {
-                                               if (SEQ_LEQ(th->th_ack,
-                                                   tp->snd_recover)) {
-                                                       tp->t_dupacks = 0;
-                                                       break;
-                                               }
+                               /*
+                                * If we're doing sack, check to
+                                * see if we're already in sack
+                                * recovery. If we're not doing sack,
+                                * check to see if we're in newreno
+                                * recovery.
+                                */
+                               if (tcp_is_sack_recovery(tp, &to)) {
+                                       if (IN_FASTRECOVERY(tp->t_flags)) {
+                                               tp->t_dupacks = 0;
+                                               break;
                                        }
-                                       /* Congestion signal before ack. */
-                                       cc_cong_signal(tp, th, CC_NDUPACK);
-                                       cc_ack_received(tp, th, nsegs,
-                                           CC_DUPACK);
-                                       tcp_timer_activate(tp, TT_REXMT, 0);
-                                       tp->t_rtttime = 0;
-                                       if (V_tcp_do_prr) {
-                                               /*
-                                                * snd_ssthresh and snd_recover 
are
-                                                * already updated by 
cc_cong_signal.
-                                                */
-                                               if (tcp_is_sack_recovery(tp, 
&to)) {
-                                                       /*
-                                                        * Include Limited 
Transmit
-                                                        * segments here
-                                                        */
-                                                       
tp->sackhint.prr_delivered =
-                                                           imin(tp->snd_max - 
th->th_ack,
-                                                           (tp->snd_limited + 
1) * maxseg);
-                                               } else {
-                                                       
tp->sackhint.prr_delivered =
-                                                           maxseg;
-                                               }
-                                               tp->sackhint.recover_fs = max(1,
-                                                   tp->snd_nxt - tp->snd_una);
+                               } else {
+                                       if (SEQ_LEQ(th->th_ack,
+                                           tp->snd_recover)) {
+                                               tp->t_dupacks = 0;
+                                               break;
                                        }
-                                       tp->snd_limited = 0;
+                               }
+                               /* Congestion signal before ack. */
+                               cc_cong_signal(tp, th, CC_NDUPACK);
+                               cc_ack_received(tp, th, nsegs, CC_DUPACK);
+                               tcp_timer_activate(tp, TT_REXMT, 0);
+                               tp->t_rtttime = 0;
+                               if (V_tcp_do_prr) {
+                                       /*
+                                        * snd_ssthresh and snd_recover are
+                                        * already updated by cc_cong_signal.
+                                        */
                                        if (tcp_is_sack_recovery(tp, &to)) {
-                                               
TCPSTAT_INC(tcps_sack_recovery_episode);
                                                /*
-                                                * When entering LR after RTO 
due to
-                                                * Duplicate ACKs, retransmit 
existing
-                                                * holes from the scoreboard.
+                                                * Include Limited Transmit
+                                                * segments here
                                                 */
-                                               tcp_resend_sackholes(tp);
-                                               /* Avoid inflating cwnd in 
tcp_output */
-                                               tp->snd_nxt = tp->snd_max;
-                                               tp->snd_cwnd = 
tcp_compute_pipe(tp) +
+                                               tp->sackhint.prr_delivered =
+                                                   imin(tp->snd_max - 
th->th_ack,
+                                                   (tp->snd_limited + 1) * 
maxseg);
+                                       } else {
+                                               tp->sackhint.prr_delivered =
                                                    maxseg;
-                                               (void) tcp_output(tp);
-                                               /* Set cwnd to the expected 
flightsize */
-                                               tp->snd_cwnd = tp->snd_ssthresh;
-                                               if (SEQ_GT(th->th_ack, 
tp->snd_una)) {
-                                                       goto resume_partialack;
-                                               }
-                                               goto drop;
                                        }
-                                       tp->snd_nxt = th->th_ack;
-                                       tp->snd_cwnd = maxseg;
-                                       (void) tcp_output(tp);
-                                       KASSERT(tp->snd_limited <= 2,
-                                           ("%s: tp->snd_limited too big",
-                                           __func__));
-                                       tp->snd_cwnd = tp->snd_ssthresh +
-                                            maxseg *
-                                            (tp->t_dupacks - tp->snd_limited);
-                                       if (SEQ_GT(onxt, tp->snd_nxt))
-                                               tp->snd_nxt = onxt;
-                                       goto drop;
-                               } else if (V_tcp_do_rfc3042) {
-                                       /*
-                                        * Process first and second duplicate
-                                        * ACKs. Each indicates a segment
-                                        * leaving the network, creating room
-                                        * for more. Make sure we can send a
-                                        * packet on reception of each duplicate
-                                        * ACK by increasing snd_cwnd by one
-                                        * segment. Restore the original
-                                        * snd_cwnd after packet transmission.
-                                        */
-                                       cc_ack_received(tp, th, nsegs, 
CC_DUPACK);
-                                       uint32_t oldcwnd = tp->snd_cwnd;
-                                       tcp_seq oldsndmax = tp->snd_max;
-                                       u_int sent;
-                                       int avail;
-
-                                       KASSERT(tp->t_dupacks == 1 ||
-                                           tp->t_dupacks == 2,
-                                           ("%s: dupacks not 1 or 2",
-                                           __func__));
-                                       if (tp->t_dupacks == 1)
-                                               tp->snd_limited = 0;
-                                       if ((tp->snd_nxt == tp->snd_max) &&
-                                           (tp->t_rxtshift == 0))
-                                               tp->snd_cwnd =
-                                                   SEQ_SUB(tp->snd_nxt,
-                                                           tp->snd_una) -
-                                                       tcp_sack_adjust(tp);
-                                       tp->snd_cwnd +=
-                                           (tp->t_dupacks - tp->snd_limited) *
-                                           maxseg - tcp_sack_adjust(tp);
+                                       tp->sackhint.recover_fs = max(1,
+                                           tp->snd_nxt - tp->snd_una);
+                               }
+                               tp->snd_limited = 0;
+                               if (tcp_is_sack_recovery(tp, &to)) {
+                                       TCPSTAT_INC(tcps_sack_recovery_episode);
                                        /*
-                                        * Only call tcp_output when there
-                                        * is new data available to be sent
-                                        * or we need to send an ACK.
+                                        * When entering LR after RTO due to
+                                        * Duplicate ACKs, retransmit existing
+                                        * holes from the scoreboard.
                                         */
-                                       SOCK_SENDBUF_LOCK(so);
-                                       avail = sbavail(&so->so_snd);
-                                       SOCK_SENDBUF_UNLOCK(so);
-                                       if (tp->t_flags & TF_ACKNOW ||
-                                           (avail >=
-                                            SEQ_SUB(tp->snd_nxt, 
tp->snd_una))) {
-                                               (void) tcp_output(tp);
-                                       }
-                                       sent = SEQ_SUB(tp->snd_max, oldsndmax);
-                                       if (sent > maxseg) {
-                                               KASSERT((tp->t_dupacks == 2 &&
-                                                   tp->snd_limited == 0) ||
-                                                  (sent == maxseg + 1 &&
-                                                   tp->t_flags & TF_SENTFIN) ||
-                                                  (sent < 2 * maxseg &&
-                                                   tp->t_flags & TF_NODELAY),
-                                                   ("%s: sent too much: %u>%u",
-                                                   __func__, sent, maxseg));
-                                               tp->snd_limited = 2;
-                                       } else if (sent > 0) {
-                                               ++tp->snd_limited;
-                                       }
-                                       tp->snd_cwnd = oldcwnd;
+                                       tcp_resend_sackholes(tp);
+                                       /* Avoid inflating cwnd in tcp_output */
+                                       tp->snd_nxt = tp->snd_max;
+                                       tp->snd_cwnd = tcp_compute_pipe(tp) +
+                                           maxseg;
+                                       (void) tcp_output(tp);
+                                       /* Set cwnd to the expected flightsize 
*/
+                                       tp->snd_cwnd = tp->snd_ssthresh;
                                        goto drop;
                                }
-                       }
-                       break;
-               } else {
-                       /*
-                        * This ack is advancing the left edge, reset the
-                        * counter.
-                        */
-                       tp->t_dupacks = 0;
-                       /*
-                        * If this ack also has new SACK info, increment the
-                        * counter as per rfc6675. The variable
-                        * sack_changed tracks all changes to the SACK
-                        * scoreboard, including when partial ACKs without
-                        * SACK options are received, and clear the scoreboard
-                        * from the left side. Such partial ACKs should not be
-                        * counted as dupacks here.
-                        */
-                       if (tcp_is_sack_recovery(tp, &to) &&
-                           (((tp->t_rxtshift == 0) && (sack_changed != 
SACK_NOCHANGE)) ||
-                            ((tp->t_rxtshift > 0) && (sack_changed == 
SACK_NEWLOSS))) &&
-                           (tp->snd_nxt == tp->snd_max)) {
-                               tp->t_dupacks++;
-                               /* limit overhead by setting maxseg last */
-                               if (!IN_FASTRECOVERY(tp->t_flags) &&
-                                   (tp->sackhint.sacked_bytes >
-                                   ((tcprexmtthresh - 1) *
-                                   (maxseg = tcp_maxseg(tp))))) {
-                                       goto enter_recovery;
+                               tp->snd_nxt = th->th_ack;
+                               tp->snd_cwnd = maxseg;
+                               (void) tcp_output(tp);
+                               KASSERT(tp->snd_limited <= 2,
+                                   ("%s: tp->snd_limited too big",
+                                   __func__));
+                               tp->snd_cwnd = tp->snd_ssthresh +
+                                    maxseg *
+                                    (tp->t_dupacks - tp->snd_limited);
+                               if (SEQ_GT(onxt, tp->snd_nxt))
+                                       tp->snd_nxt = onxt;
+                               goto drop;
+                       } else if (V_tcp_do_rfc3042) {
+                               /*
+                                * Process first and second duplicate
+                                * ACKs. Each indicates a segment
+                                * leaving the network, creating room
+                                * for more. Make sure we can send a
+                                * packet on reception of each duplicate
+                                * ACK by increasing snd_cwnd by one
+                                * segment. Restore the original
+                                * snd_cwnd after packet transmission.
+                                */
+                               cc_ack_received(tp, th, nsegs, CC_DUPACK);
+                               uint32_t oldcwnd = tp->snd_cwnd;
+                               tcp_seq oldsndmax = tp->snd_max;
+                               u_int sent;
+                               int avail;
+
+                               KASSERT(tp->t_dupacks == 1 ||
+                                   tp->t_dupacks == 2,
+                                   ("%s: dupacks not 1 or 2",
+                                   __func__));
+                               if (tp->t_dupacks == 1)
+                                       tp->snd_limited = 0;
+                               if ((tp->snd_nxt == tp->snd_max) &&
+                                   (tp->t_rxtshift == 0))
+                                       tp->snd_cwnd =
+                                           SEQ_SUB(tp->snd_nxt,
+                                                   tp->snd_una) -
+                                               tcp_sack_adjust(tp);
+                               tp->snd_cwnd +=
+                                   (tp->t_dupacks - tp->snd_limited) *
+                                   maxseg - tcp_sack_adjust(tp);
+                               /*
+                                * Only call tcp_output when there
+                                * is new data available to be sent
+                                * or we need to send an ACK.
+                                */
+                               SOCK_SENDBUF_LOCK(so);
+                               avail = sbavail(&so->so_snd);
+                               SOCK_SENDBUF_UNLOCK(so);
+                               if (tp->t_flags & TF_ACKNOW ||
+                                   (avail >=
+                                    SEQ_SUB(tp->snd_nxt, tp->snd_una))) {
+                                       (void) tcp_output(tp);
+                               }
+                               sent = SEQ_SUB(tp->snd_max, oldsndmax);
+                               if (sent > maxseg) {
+                                       KASSERT((tp->t_dupacks == 2 &&
+                                           tp->snd_limited == 0) ||
+                                          (sent == maxseg + 1 &&
+                                           tp->t_flags & TF_SENTFIN) ||
+                                          (sent < 2 * maxseg &&
+                                           tp->t_flags & TF_NODELAY),
+                                           ("%s: sent too much: %u>%u",
+                                           __func__, sent, maxseg));
+                                       tp->snd_limited = 2;
+                               } else if (sent > 0) {
+                                       ++tp->snd_limited;
                                }
+                               tp->snd_cwnd = oldcwnd;
+                               goto drop;
                        }
+                       break;
                }
-
-resume_partialack:
                KASSERT(SEQ_GT(th->th_ack, tp->snd_una),
-                   ("%s: th_ack <= snd_una", __func__));
-
+                   ("%s: SEQ_LEQ(th_ack, snd_una)", __func__));
+               /*
+                * This ack is advancing the left edge, reset the
+                * counter.
+                */
+               tp->t_dupacks = 0;
+               /*
+                * If this ack also has new SACK info, increment the
+                * t_dupacks as per RFC 6675. The variable
+                * sack_changed tracks all changes to the SACK
+                * scoreboard, including when partial ACKs without
+                * SACK options are received, and clear the scoreboard
+                * from the left side. Such partial ACKs should not be
+                * counted as dupacks here.
+                */
+               if (V_tcp_do_newsack &&
+                   tcp_is_sack_recovery(tp, &to) &&
+                   (((tp->t_rxtshift == 0) && (sack_changed != SACK_NOCHANGE)) 
||
+                    ((tp->t_rxtshift > 0) && (sack_changed == SACK_NEWLOSS))) 
&&
+                   (tp->snd_nxt == tp->snd_max)) {
+                       tp->t_dupacks++;
+                       /* limit overhead by setting maxseg last */
+                       if (!IN_FASTRECOVERY(tp->t_flags) &&
+                           (tp->sackhint.sacked_bytes >
+                           (tcprexmtthresh - 1) * (maxseg = tcp_maxseg(tp)))) {
+                               goto enter_recovery;
+                       }
+               }
                /*
                 * If the congestion window was inflated to account
                 * for the other side's cached packets, retract it.

Reply via email to