Author: rscheff
Date: Fri Dec  4 11:29:27 2020
New Revision: 368327
URL: https://svnweb.freebsd.org/changeset/base/368327

Log:
  Add TCP feature Proportional Rate Reduction (PRR) - RFC6937
  
  PRR improves loss recovery and avoids RTOs in a wide range
  of scenarios (ACK thinning) over regular SACK loss recovery.
  
  PRR is disabled by default, enable by net.inet.tcp.do_prr = 1.
  Performance may be impeded by token bucket rate policers at
  the bottleneck, where net.inet.tcp.do_prr_conservate = 1
  should be enabled in addition.
  
  Submitted by: Aris Angelogiannopoulos
  Sponsored by: NetApp, Inc.
  Differential Revision:        https://reviews.freebsd.org/D18892

Modified:
  head/sys/netinet/tcp_input.c
  head/sys/netinet/tcp_var.h

Modified: head/sys/netinet/tcp_input.c
==============================================================================
--- head/sys/netinet/tcp_input.c        Fri Dec  4 04:39:48 2020        
(r368326)
+++ head/sys/netinet/tcp_input.c        Fri Dec  4 11:29:27 2020        
(r368327)
@@ -153,6 +153,16 @@ SYSCTL_INT(_net_inet_tcp, OID_AUTO, drop_synfin, CTLFL
     &VNET_NAME(drop_synfin), 0,
     "Drop TCP packets with SYN+FIN set");
 
+VNET_DEFINE(int, tcp_do_prr_conservative) = 0;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, do_prr_conservative, CTLFLAG_VNET | 
CTLFLAG_RW,
+    &VNET_NAME(tcp_do_prr_conservative), 0,
+    "Do conservative Proportional Rate Reduction");
+
+VNET_DEFINE(int, tcp_do_prr) = 1;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, do_prr, CTLFLAG_VNET | CTLFLAG_RW,
+    &VNET_NAME(tcp_do_prr), 1,
+    "Enable Proportional Rate Reduction per RFC 6937");
+
 VNET_DEFINE(int, tcp_do_newcwv) = 0;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, newcwv, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(tcp_do_newcwv), 0,
@@ -2554,7 +2564,55 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, stru
                                     IN_FASTRECOVERY(tp->t_flags)) {
                                        cc_ack_received(tp, th, nsegs,
                                            CC_DUPACK);
-                                       if ((tp->t_flags & TF_SACK_PERMIT) &&
+                                       if (V_tcp_do_prr &&
+                                           IN_FASTRECOVERY(tp->t_flags) &&
+                                           (tp->t_flags & TF_SACK_PERMIT)) {
+                                               long snd_cnt = 0, limit = 0;
+                                               long del_data = 0, pipe = 0;
+                                               /*
+                                                * In a duplicate ACK del_data 
is only the
+                                                * diff_in_sack. If no SACK is 
used del_data
+                                                * will be 0. Pipe is the 
amount of data we
+                                                * estimate to be in the 
network.
+                                                */
+                                               del_data = 
tp->sackhint.delivered_data;
+                                               pipe = (tp->snd_nxt - 
tp->snd_fack) +
+                                                       
tp->sackhint.sack_bytes_rexmit;
+                                               tp->sackhint.prr_delivered += 
del_data;
+                                               if (pipe > tp->snd_ssthresh) {
+                                                       snd_cnt = 
(tp->sackhint.prr_delivered *
+                                                           tp->snd_ssthresh /
+                                                           
tp->sackhint.recover_fs) +
+                                                           1 - 
tp->sackhint.sack_bytes_rexmit;
+                                               } else {
+                                                       if 
(V_tcp_do_prr_conservative)
+                                                               limit = 
tp->sackhint.prr_delivered -
+                                                                       
tp->sackhint.sack_bytes_rexmit;
+                                                       else
+                                                               if 
((tp->sackhint.prr_delivered -
+                                                                   
tp->sackhint.sack_bytes_rexmit) >
+                                                                   del_data)
+                                                                       limit = 
tp->sackhint.prr_delivered -
+                                                                           
tp->sackhint.sack_bytes_rexmit +
+                                                                           
maxseg;
+                                                               else
+                                                                       limit = 
del_data + maxseg;
+                                                       if ((tp->snd_ssthresh - 
pipe) < limit)
+                                                               snd_cnt = 
tp->snd_ssthresh - pipe;
+                                                       else
+                                                               snd_cnt = limit;
+                                               }
+                                               snd_cnt = max((snd_cnt / 
maxseg), 0);
+                                               /*
+                                                * Send snd_cnt new data into 
the network in
+                                                * response to this ACK. If 
there is a going
+                                                * to be a SACK retransmission, 
adjust snd_cwnd
+                                                * accordingly.
+                                                */
+                                               tp->snd_cwnd = tp->snd_nxt - 
tp->snd_recover +
+                                                   
tp->sackhint.sack_bytes_rexmit +
+                                                   (snd_cnt * maxseg);
+                                       } else if ((tp->t_flags & 
TF_SACK_PERMIT) &&
                                            IN_FASTRECOVERY(tp->t_flags)) {
                                                int awnd;
 
@@ -2583,13 +2641,14 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, stru
                                        tcp_seq onxt = tp->snd_nxt;
 
                                        /*
-                                        * If we're doing sack, check to
-                                        * see if we're already in sack
+                                        * If we're doing sack, or prr, check
+                                        * to see if we're already in sack
                                         * recovery. If we're not doing sack,
                                         * check to see if we're in newreno
                                         * recovery.
                                         */
-                                       if (tp->t_flags & TF_SACK_PERMIT) {
+                                       if (V_tcp_do_prr ||
+                                           (tp->t_flags & TF_SACK_PERMIT)) {
                                                if 
(IN_FASTRECOVERY(tp->t_flags)) {
                                                        tp->t_dupacks = 0;
                                                        break;
@@ -2607,6 +2666,16 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, stru
                                            CC_DUPACK);
                                        tcp_timer_activate(tp, TT_REXMT, 0);
                                        tp->t_rtttime = 0;
+                                       if (V_tcp_do_prr) {
+                                           /*
+                                            * snd_ssthresh is already updated 
by
+                                            * cc_cong_signal.
+                                            */
+                                           tp->sackhint.prr_delivered = 0;
+                                           tp->sackhint.sack_bytes_rexmit = 0;
+                                           if (!(tp->sackhint.recover_fs = 
tp->snd_nxt - tp->snd_una))
+                                               tp->sackhint.recover_fs = 1;
+                                       }
                                        if (tp->t_flags & TF_SACK_PERMIT) {
                                                TCPSTAT_INC(
                                                    tcps_sack_recovery_episode);
@@ -2713,7 +2782,10 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, stru
                if (IN_FASTRECOVERY(tp->t_flags)) {
                        if (SEQ_LT(th->th_ack, tp->snd_recover)) {
                                if (tp->t_flags & TF_SACK_PERMIT)
-                                       tcp_sack_partialack(tp, th);
+                                       if (V_tcp_do_prr)
+                                               tcp_prr_partialack(tp, th);
+                                       else
+                                               tcp_sack_partialack(tp, th);
                                else
                                        tcp_newreno_partial_ack(tp, th);
                        } else
@@ -3837,6 +3909,54 @@ tcp_mssopt(struct in_conninfo *inc)
                mss = max(maxmtu, thcmtu) - min_protoh;
 
        return (mss);
+}
+
+void
+tcp_prr_partialack(struct tcpcb *tp, struct tcphdr *th)
+{
+       long snd_cnt = 0, limit = 0, del_data = 0, pipe = 0;
+       int maxseg = tcp_maxseg(tp);
+
+       INP_WLOCK_ASSERT(tp->t_inpcb);
+
+       tcp_timer_activate(tp, TT_REXMT, 0);
+       tp->t_rtttime = 0;
+       /*
+        * Compute the amount of data that this ACK is indicating
+        * (del_data) and an estimate of how many bytes are in the
+        * network.
+        */
+       if (SEQ_GEQ(th->th_ack, tp->snd_una))
+               del_data = BYTES_THIS_ACK(tp, th);
+       del_data += tp->sackhint.delivered_data;
+       pipe = (tp->snd_nxt - tp->snd_fack) + tp->sackhint.sack_bytes_rexmit;
+       tp->sackhint.prr_delivered += del_data;
+       /*
+        * Proportional Rate Reduction
+        */
+       if (pipe > tp->snd_ssthresh)
+               snd_cnt = (tp->sackhint.prr_delivered * tp->snd_ssthresh / 
tp->sackhint.recover_fs) -
+                   tp->sackhint.sack_bytes_rexmit;
+       else {
+               if (V_tcp_do_prr_conservative)
+                       limit = tp->sackhint.prr_delivered - 
tp->sackhint.sack_bytes_rexmit;
+               else
+                       if ((tp->sackhint.prr_delivered - 
tp->sackhint.sack_bytes_rexmit) > del_data)
+                               limit = tp->sackhint.prr_delivered - 
tp->sackhint.sack_bytes_rexmit + maxseg;
+                       else
+                               limit = del_data + maxseg;
+               snd_cnt = min((tp->snd_ssthresh - pipe), limit);
+       }
+       snd_cnt = max((snd_cnt / maxseg), 0);
+       /*
+        * Send snd_cnt new data into the network in response to this ack.
+        * If there is going to be a SACK retransmission, adjust snd_cwnd
+        * accordingly.
+        */
+       tp->snd_cwnd = tp->snd_nxt - tp->snd_recover +
+               tp->sackhint.sack_bytes_rexmit + (snd_cnt * maxseg);
+       tp->t_flags |= TF_ACKNOW;
+       (void) tcp_output(tp);
 }
 
 /*

Modified: head/sys/netinet/tcp_var.h
==============================================================================
--- head/sys/netinet/tcp_var.h  Fri Dec  4 04:39:48 2020        (r368326)
+++ head/sys/netinet/tcp_var.h  Fri Dec  4 11:29:27 2020        (r368327)
@@ -113,8 +113,9 @@ struct sackhint {
        int32_t         sacked_bytes;   /* Total sacked bytes reported by the
                                         * receiver via sack option
                                         */
-       uint32_t        _pad1[1];       /* TBD */
-       uint64_t        _pad[1];        /* TBD */
+       uint32_t        recover_fs;     /* Flight Size at the start of Loss 
recovery */
+       uint32_t        prr_delivered;  /* Total bytes delivered using PRR */
+       uint32_t        _pad[1];        /* TBD */
 };
 
 #define SEGQ_EMPTY(tp) TAILQ_EMPTY(&(tp)->t_segq)
@@ -866,6 +867,8 @@ VNET_DECLARE(int, tcp_sendspace);
 VNET_DECLARE(struct inpcbhead, tcb);
 VNET_DECLARE(struct inpcbinfo, tcbinfo);
 
+#define        V_tcp_do_prr                    VNET(tcp_do_prr)
+#define        V_tcp_do_prr_conservative       VNET(tcp_do_prr_conservative)
 #define        V_tcp_do_newcwv                 VNET(tcp_do_newcwv)
 #define        V_drop_synfin                   VNET(drop_synfin)
 #define        V_path_mtu_discovery            VNET(path_mtu_discovery)
@@ -1051,6 +1054,7 @@ void       tcp_clean_dsack_blocks(struct tcpcb *tp);
 void    tcp_clean_sackreport(struct tcpcb *tp);
 void    tcp_sack_adjust(struct tcpcb *tp);
 struct sackhole *tcp_sack_output(struct tcpcb *tp, int *sack_bytes_rexmt);
+void    tcp_prr_partialack(struct tcpcb *, struct tcphdr *);
 void    tcp_sack_partialack(struct tcpcb *, struct tcphdr *);
 void    tcp_free_sackholes(struct tcpcb *tp);
 int     tcp_newreno(struct tcpcb *, struct tcphdr *);
_______________________________________________
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

Reply via email to