netinet: . tcp_stacks

Steven Hartland Mon, 10 Apr 2017 08:18:43 -0700

I don't tend to MFC 10.x now, but do agree given the impact that forthis one it should be done.

The fix is a little different, due to code restructuring in 11 / head,but I do have a 10.x version already.


    Regards
    Steve

On 10/04/2017 15:51, Julian Elischer wrote:

If possible MFC to 10 too would be nice..
thanks


On 10/4/17 4:19 pm, Steven Hartland wrote:

Author: smh
Date: Mon Apr 10 08:19:35 2017
New Revision: 316676
URL: https://svnweb.freebsd.org/changeset/base/316676

Log:

Use estimated RTT for receive buffer auto resizing instead oftimestampsSwitched from using timestamps to RTT estimates when performingTCP receivebuffer auto resizing, as not all hosts support / enable TCPtimestamps.Disabled reset of receive buffer auto scaling when not in bulkreceive mode,

   which gives an extra 20% performance increase.

Also extracted auto resizing to a common method shared betweenstandard and

   fastpath modules.

With this AWS S3 downloads at ~17ms latency on a 1Gbpsconnection jump from

   ~3MB/s to ~100MB/s using the default settings.
      Reviewed by:    lstewart, gnn
   MFC after:      2 weeks
   Relnotes:       Yes
   Sponsored by:   Multiplay
   Differential Revision:  https://reviews.freebsd.org/D9668

Modified:
   head/sys/netinet/in_kdtrace.c
   head/sys/netinet/in_kdtrace.h
   head/sys/netinet/tcp_input.c
   head/sys/netinet/tcp_output.c
   head/sys/netinet/tcp_stacks/fastpath.c
   head/sys/netinet/tcp_var.h

Modified: head/sys/netinet/in_kdtrace.c

==============================================================================

--- head/sys/netinet/in_kdtrace.c    Mon Apr 10 06:19:09 2017 (r316675)
+++ head/sys/netinet/in_kdtrace.c    Mon Apr 10 08:19:35 2017 (r316676)
@@ -132,6 +132,14 @@ SDT_PROBE_DEFINE6_XLATE(tcp, , , state__
      "void *", "void *",
      "int", "tcplsinfo_t *");
  +SDT_PROBE_DEFINE6_XLATE(tcp, , , receive__autoresize,
+    "void *", "void *",
+    "struct tcpcb *", "csinfo_t *",
+    "struct mbuf *", "ipinfo_t *",
+    "struct tcpcb *", "tcpsinfo_t *" ,
+    "struct tcphdr *", "tcpinfoh_t *",
+    "int", "int");
+
  SDT_PROBE_DEFINE5_XLATE(udp, , , receive,
      "void *", "pktinfo_t *",
      "struct inpcb *", "csinfo_t *",

Modified: head/sys/netinet/in_kdtrace.h

==============================================================================

--- head/sys/netinet/in_kdtrace.h    Mon Apr 10 06:19:09 2017 (r316675)
+++ head/sys/netinet/in_kdtrace.h    Mon Apr 10 08:19:35 2017 (r316676)
@@ -65,6 +65,7 @@ SDT_PROBE_DECLARE(tcp, , , debug__input)
  SDT_PROBE_DECLARE(tcp, , , debug__output);
  SDT_PROBE_DECLARE(tcp, , , debug__user);
  SDT_PROBE_DECLARE(tcp, , , debug__drop);
+SDT_PROBE_DECLARE(tcp, , , receive__autoresize);
    SDT_PROBE_DECLARE(udp, , , receive);
  SDT_PROBE_DECLARE(udp, , , send);

Modified: head/sys/netinet/tcp_input.c

==============================================================================

--- head/sys/netinet/tcp_input.c    Mon Apr 10 06:19:09 2017 (r316675)
+++ head/sys/netinet/tcp_input.c    Mon Apr 10 08:19:35 2017 (r316676)
@@ -1486,6 +1486,68 @@ drop:
      return (IPPROTO_DONE);
  }
  +/*
+ * Automatic sizing of receive socket buffer.  Often the send
+ * buffer size is not optimally adjusted to the actual network
+ * conditions at hand (delay bandwidth product).  Setting the
+ * buffer size too small limits throughput on links with high
+ * bandwidth and high delay (eg. trans-continental/oceanic links).
+ *
+ * On the receive side the socket buffer memory is only rarely
+ * used to any significant extent.  This allows us to be much
+ * more aggressive in scaling the receive socket buffer.  For
+ * the case that the buffer space is actually used to a large
+ * extent and we run out of kernel memory we can simply drop
+ * the new segments; TCP on the sender will just retransmit it
+ * later.  Setting the buffer size too big may only consume too
+ * much kernel memory if the application doesn't read() from
+ * the socket or packet loss or reordering makes use of the
+ * reassembly queue.
+ *
+ * The criteria to step up the receive buffer one notch are:
+ *  1. Application has not set receive buffer size with
+ *     SO_RCVBUF. Setting SO_RCVBUF clears SB_AUTOSIZE.
+ *  2. the number of bytes received during the time it takes
+ *     one timestamp to be reflected back to us (the RTT);
+ *  3. received bytes per RTT is within seven eighth of the
+ *     current socket buffer size;
+ *  4. receive buffer size has not hit maximal automatic size;
+ *
+ * This algorithm does one step per RTT at most and only if
+ * we receive a bulk stream w/o packet losses or reorderings.
+ * Shrinking the buffer during idle times is not necessary as
+ * it doesn't consume any memory when idle.
+ *
+ * TODO: Only step up if the application is actually serving
+ * the buffer to better manage the socket buffer resources.
+ */
+int
+tcp_autorcvbuf(struct mbuf *m, struct tcphdr *th, struct socket *so,
+    struct tcpcb *tp, int tlen)
+{
+    int newsize = 0;
+
+    if (V_tcp_do_autorcvbuf && (so->so_rcv.sb_flags & SB_AUTOSIZE) &&
+        tp->t_srtt != 0 && tp->rfbuf_ts != 0 &&
+        TCP_TS_TO_TICKS(tcp_ts_getticks() - tp->rfbuf_ts) >
+        (tp->t_srtt >> TCP_RTT_SHIFT)) {
+        if (tp->rfbuf_cnt > (so->so_rcv.sb_hiwat / 8 * 7) &&
+            so->so_rcv.sb_hiwat < V_tcp_autorcvbuf_max) {
+            newsize = min(so->so_rcv.sb_hiwat +
+                V_tcp_autorcvbuf_inc, V_tcp_autorcvbuf_max);
+        }
+        TCP_PROBE6(receive__autoresize, NULL, tp, m, tp, th, newsize);
+
+        /* Start over with next RTT. */
+        tp->rfbuf_ts = 0;
+        tp->rfbuf_cnt = 0;
+    } else {
+        tp->rfbuf_cnt += tlen;    /* add up */
+    }
+
+    return (newsize);
+}
+
  void
  tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
      struct tcpcb *tp, int drop_hdrlen, int tlen, uint8_t iptos,
@@ -1849,62 +1911,7 @@ tcp_do_segment(struct mbuf *m, struct tc
  #endif
              TCP_PROBE3(debug__input, tp, th, m);
  -        /*
-         * Automatic sizing of receive socket buffer.  Often the send
-         * buffer size is not optimally adjusted to the actual network
-         * conditions at hand (delay bandwidth product). Setting the
-         * buffer size too small limits throughput on links with high

- * bandwidth and high delay (eg. trans-continental/oceaniclinks).

-         *
-         * On the receive side the socket buffer memory is only rarely
-         * used to any significant extent.  This allows us to be much
-         * more aggressive in scaling the receive socket buffer.  For
-         * the case that the buffer space is actually used to a large
-         * extent and we run out of kernel memory we can simply drop
-         * the new segments; TCP on the sender will just retransmit it
-         * later.  Setting the buffer size too big may only consume too
-         * much kernel memory if the application doesn't read() from
-         * the socket or packet loss or reordering makes use of the
-         * reassembly queue.
-         *
-         * The criteria to step up the receive buffer one notch are:
-         *  1. Application has not set receive buffer size with
-         *     SO_RCVBUF. Setting SO_RCVBUF clears SB_AUTOSIZE.
-         *  2. the number of bytes received during the time it takes
-         *     one timestamp to be reflected back to us (the RTT);
-         *  3. received bytes per RTT is within seven eighth of the
-         *     current socket buffer size;
-         *  4. receive buffer size has not hit maximal automatic size;
-         *
-         * This algorithm does one step per RTT at most and only if
-         * we receive a bulk stream w/o packet losses or reorderings.
-         * Shrinking the buffer during idle times is not necessary as
-         * it doesn't consume any memory when idle.
-         *
-         * TODO: Only step up if the application is actually serving
-         * the buffer to better manage the socket buffer resources.
-         */
-            if (V_tcp_do_autorcvbuf &&
-                (to.to_flags & TOF_TS) &&
-                to.to_tsecr &&
-                (so->so_rcv.sb_flags & SB_AUTOSIZE)) {
-                if (TSTMP_GT(to.to_tsecr, tp->rfbuf_ts) &&
-                    to.to_tsecr - tp->rfbuf_ts < hz) {
-                    if (tp->rfbuf_cnt >
-                        (so->so_rcv.sb_hiwat / 8 * 7) &&
-                        so->so_rcv.sb_hiwat <
-                        V_tcp_autorcvbuf_max) {
-                        newsize =
-                            min(so->so_rcv.sb_hiwat +
-                            V_tcp_autorcvbuf_inc,
-                            V_tcp_autorcvbuf_max);
-                    }
-                    /* Start over with next RTT. */
-                    tp->rfbuf_ts = 0;
-                    tp->rfbuf_cnt = 0;
-                } else
-                    tp->rfbuf_cnt += tlen;    /* add up */
-            }
+            newsize = tcp_autorcvbuf(m, th, so, tp, tlen);
                /* Add data to socket buffer. */
              SOCKBUF_LOCK(&so->so_rcv);
@@ -1945,10 +1952,6 @@ tcp_do_segment(struct mbuf *m, struct tc
          win = 0;
      tp->rcv_wnd = imax(win, (int)(tp->rcv_adv - tp->rcv_nxt));

- /* Reset receive buffer auto scaling when not in bulk receivemode. */

-    tp->rfbuf_ts = 0;
-    tp->rfbuf_cnt = 0;
-
      switch (tp->t_state) {
        /*

Modified: head/sys/netinet/tcp_output.c

==============================================================================

--- head/sys/netinet/tcp_output.c    Mon Apr 10 06:19:09 2017 (r316675)
+++ head/sys/netinet/tcp_output.c    Mon Apr 10 08:19:35 2017 (r316676)
@@ -831,11 +831,13 @@ send:
              to.to_tsval = tcp_ts_getticks() + tp->ts_offset;
              to.to_tsecr = tp->ts_recent;
              to.to_flags |= TOF_TS;
-            /* Set receive buffer autosizing timestamp. */
-            if (tp->rfbuf_ts == 0 &&
-                (so->so_rcv.sb_flags & SB_AUTOSIZE))
-                tp->rfbuf_ts = tcp_ts_getticks();
          }
+
+        /* Set receive buffer autosizing timestamp. */
+        if (tp->rfbuf_ts == 0 &&
+            (so->so_rcv.sb_flags & SB_AUTOSIZE))
+            tp->rfbuf_ts = tcp_ts_getticks();
+
          /* Selective ACK's. */
          if (tp->t_flags & TF_SACK_PERMIT) {
              if (flags & TH_SYN)

Modified: head/sys/netinet/tcp_stacks/fastpath.c

==============================================================================--- head/sys/netinet/tcp_stacks/fastpath.c Mon Apr 10 06:19:092017 (r316675)+++ head/sys/netinet/tcp_stacks/fastpath.c Mon Apr 10 08:19:352017 (r316676)

@@ -399,62 +399,8 @@ tcp_do_fastnewdata(struct mbuf *m, struc
                (void *)tcp_saveipgen, &tcp_savetcp, 0);
  #endif
      TCP_PROBE3(debug__input, tp, th, m);
-    /*
-     * Automatic sizing of receive socket buffer.  Often the send
-     * buffer size is not optimally adjusted to the actual network
-     * conditions at hand (delay bandwidth product).  Setting the
-     * buffer size too small limits throughput on links with high
-     * bandwidth and high delay (eg. trans-continental/oceanic links).
-     *
-     * On the receive side the socket buffer memory is only rarely
-     * used to any significant extent.  This allows us to be much
-     * more aggressive in scaling the receive socket buffer. For
-     * the case that the buffer space is actually used to a large
-     * extent and we run out of kernel memory we can simply drop
-     * the new segments; TCP on the sender will just retransmit it
-     * later.  Setting the buffer size too big may only consume too
-     * much kernel memory if the application doesn't read() from
-     * the socket or packet loss or reordering makes use of the
-     * reassembly queue.
-     *
-     * The criteria to step up the receive buffer one notch are:
-     *  1. Application has not set receive buffer size with
-     *     SO_RCVBUF. Setting SO_RCVBUF clears SB_AUTOSIZE.
-     *  2. the number of bytes received during the time it takes
-     *     one timestamp to be reflected back to us (the RTT);
-     *  3. received bytes per RTT is within seven eighth of the
-     *     current socket buffer size;
-     *  4. receive buffer size has not hit maximal automatic size;
-     *
-     * This algorithm does one step per RTT at most and only if
-     * we receive a bulk stream w/o packet losses or reorderings.
-     * Shrinking the buffer during idle times is not necessary as
-     * it doesn't consume any memory when idle.
-     *
-     * TODO: Only step up if the application is actually serving
-     * the buffer to better manage the socket buffer resources.
-     */
-    if (V_tcp_do_autorcvbuf &&
-        (to->to_flags & TOF_TS) &&
-        to->to_tsecr &&
-        (so->so_rcv.sb_flags & SB_AUTOSIZE)) {
-        if (TSTMP_GT(to->to_tsecr, tp->rfbuf_ts) &&
-            to->to_tsecr - tp->rfbuf_ts < hz) {
-            if (tp->rfbuf_cnt >
-                (so->so_rcv.sb_hiwat / 8 * 7) &&
-                so->so_rcv.sb_hiwat <
-                V_tcp_autorcvbuf_max) {
-                newsize =
-                    min(so->so_rcv.sb_hiwat +
-                        V_tcp_autorcvbuf_inc,
-                        V_tcp_autorcvbuf_max);
-            }
-            /* Start over with next RTT. */
-            tp->rfbuf_ts = 0;
-            tp->rfbuf_cnt = 0;
-        } else
-            tp->rfbuf_cnt += tlen;    /* add up */
-    }
+
+    newsize = tcp_autorcvbuf(m, th, so, tp, tlen);
        /* Add data to socket buffer. */
      SOCKBUF_LOCK(&so->so_rcv);
@@ -532,10 +478,6 @@ tcp_do_slowpath(struct mbuf *m, struct t
          win = 0;
      tp->rcv_wnd = imax(win, (int)(tp->rcv_adv - tp->rcv_nxt));

- /* Reset receive buffer auto scaling when not in bulk receivemode. */

-    tp->rfbuf_ts = 0;
-    tp->rfbuf_cnt = 0;
-
      switch (tp->t_state) {
        /*

Modified: head/sys/netinet/tcp_var.h

==============================================================================

--- head/sys/netinet/tcp_var.h    Mon Apr 10 06:19:09 2017 (r316675)
+++ head/sys/netinet/tcp_var.h    Mon Apr 10 08:19:35 2017 (r316676)
@@ -778,6 +778,8 @@ void    hhook_run_tcp_est_in(struct tcpcb *
  #endif
    int     tcp_input(struct mbuf **, int *, int);
+int     tcp_autorcvbuf(struct mbuf *, struct tcphdr *, struct socket *,
+        struct tcpcb *, int);
  void     tcp_do_segment(struct mbuf *, struct tcphdr *,
              struct socket *, struct tcpcb *, int, int, uint8_t,
              int);


_______________________________________________
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"

Re: svn commit: r316676 - in head/sys/netinet: . tcp_stacks

Reply via email to