On 06/03/12 12:05, Andrew Gallatin wrote:
> On 06/03/12 12:51, Colin Percival wrote:
>> I've attached a new patch which:
>> 1. adds a IFCAP_TSO_MSS "capability" and a if_tx_tso_mss field to struct 
>> ifnet,
>> 2. sets these in netfront when the IFCAP_TSO4 flag is set,
>> 3. extends tcp_maxmtu to read this value,
>> 4. adds a tx_tso_mss field to struct tcpcb,
>> 5. makes tcp_mss_update set tx_tso_mss using tcp_maxmtu, and
>> 6. limits TSO lengths to tx_tso_mss in tcp_output.
> 
> Looks good to me.  I don't pretend to understand the bowels of
> the TCP stack, so I can't comment on the "sendalot" stuff to
> force segmentation.

The "sendalot" bit is rather confusing, and I'm not at all certain
that it's doing what it's supposed to be doing, but my reading of
the code is that it really means "there's more data buffered than
what we're sending right now so after we issue this write we need
to go back to the top and do another".

> I assume it works as well as your
> previous patch to solve the problems you were seeing with Xen?

Yes.

> One minor nit is that I envision this limit to always be
> 64K or less, so you could probably get away with stealing
> 16 bits from ifcspare.

With IPv6 I could imagine larger limits happening, so I figured
it was better to take one of the ints.

> The other trivial nit is that I would have made these new
> if_tx_tso_mss and  t_tx_tso_mss fields unsigned.

Oops, quite right.  I was trying to make sure I didn't break ABI,
but of course int and u_int should have the same size no matter
what platform we're on.

> Thanks so much for doing this!

Thanks for reviewing.  Updated patch attached with s/int/u_int/ and
some other minor cleanups.

Can anyone review this for ABI safety?

-- 
Colin Percival
Security Officer Emeritus, FreeBSD | The power to serve
Founder, Tarsnap | www.tarsnap.com | Online backups for the truly paranoid
Index: netinet/tcp_input.c
===================================================================
--- netinet/tcp_input.c (revision 235780)
+++ netinet/tcp_input.c (working copy)
@@ -3298,6 +3298,7 @@
 {
        int mss = 0;
        u_long maxmtu = 0;
+       u_int tx_tso_mss = 0;
        struct inpcb *inp = tp->t_inpcb;
        struct hc_metrics_lite metrics;
        int origoffer;
@@ -3321,8 +3322,10 @@
        /* Initialize. */
 #ifdef INET6
        if (isipv6) {
-               maxmtu = tcp_maxmtu6(&inp->inp_inc, mtuflags);
+               maxmtu = tcp_maxmtu6_ext(&inp->inp_inc, mtuflags,
+                   &tx_tso_mss);
                tp->t_maxopd = tp->t_maxseg = V_tcp_v6mssdflt;
+               tp->t_tx_tso_mss = tx_tso_mss;
        }
 #endif
 #if defined(INET) && defined(INET6)
@@ -3330,8 +3333,10 @@
 #endif
 #ifdef INET
        {
-               maxmtu = tcp_maxmtu(&inp->inp_inc, mtuflags);
+               maxmtu = tcp_maxmtu_ext(&inp->inp_inc, mtuflags,
+                   &tx_tso_mss);
                tp->t_maxopd = tp->t_maxseg = V_tcp_mssdflt;
+               tp->t_tx_tso_mss = tx_tso_mss;
        }
 #endif
 
Index: netinet/tcp_subr.c
===================================================================
--- netinet/tcp_subr.c  (revision 235780)
+++ netinet/tcp_subr.c  (working copy)
@@ -1708,7 +1708,7 @@
  * tcp_mss_update to get the peer/interface MTU.
  */
 u_long
-tcp_maxmtu(struct in_conninfo *inc, int *flags)
+tcp_maxmtu_ext(struct in_conninfo *inc, int *flags, u_int * tx_tso_mss)
 {
        struct route sro;
        struct sockaddr_in *dst;
@@ -1738,15 +1738,26 @@
                            ifp->if_hwassist & CSUM_TSO)
                                *flags |= CSUM_TSO;
                }
+               if (tx_tso_mss != NULL) {
+                       if (ifp->if_capabilities & IFCAP_TSO_MSS)
+                               *tx_tso_mss = ifp->if_tx_tso_mss;
+               }
                RTFREE(sro.ro_rt);
        }
        return (maxmtu);
 }
+
+u_long
+tcp_maxmtu(struct in_conninfo *inc, int *flags)
+{
+
+       return (tcp_maxmtu_ext(inc, flags, NULL));
+}
 #endif /* INET */
 
 #ifdef INET6
 u_long
-tcp_maxmtu6(struct in_conninfo *inc, int *flags)
+tcp_maxmtu6_ext(struct in_conninfo *inc, int *flags, u_int * tx_tso_mss)
 {
        struct route_in6 sro6;
        struct ifnet *ifp;
@@ -1775,11 +1786,22 @@
                            ifp->if_hwassist & CSUM_TSO)
                                *flags |= CSUM_TSO;
                }
+               if (tx_tso_mss != NULL) {
+                       if (ifp->if_capabilities & IFCAP_TSO_MSS)
+                               *tx_tso_mss = ifp->if_tx_tso_mss;
+               }
                RTFREE(sro6.ro_rt);
        }
 
        return (maxmtu);
 }
+
+u_long
+tcp_maxmtu6(struct in_conninfo *inc, int *flags)
+{
+
+       return (tcp_maxmtu6_ext(inc, flags, NULL));
+}
 #endif /* INET6 */
 
 #ifdef IPSEC
Index: netinet/tcp_var.h
===================================================================
--- netinet/tcp_var.h   (revision 235780)
+++ netinet/tcp_var.h   (working copy)
@@ -208,7 +208,9 @@
        u_int   t_keepintvl;            /* interval between keepalives */
        u_int   t_keepcnt;              /* number of keepalives before close */
 
-       uint32_t t_ispare[8];           /* 5 UTO, 3 TBD */
+       uint32_t t_ispare[5];           /* 5 UTO */
+       uint32_t t_tx_tso_mss;          /* max segment size for TSO offload */
+       uint32_t t_ispare2[2];          /* 2 TBD */
        void    *t_pspare2[4];          /* 4 TBD */
        uint64_t _pad[6];               /* 6 TBD (1-2 CC/RTT?) */
 };
@@ -674,7 +676,9 @@
 #endif
 void    tcp_input(struct mbuf *, int);
 u_long  tcp_maxmtu(struct in_conninfo *, int *);
+u_long  tcp_maxmtu_ext(struct in_conninfo *, int *, u_int *);
 u_long  tcp_maxmtu6(struct in_conninfo *, int *);
+u_long  tcp_maxmtu6_ext(struct in_conninfo *, int *, u_int *);
 void    tcp_mss_update(struct tcpcb *, int, int, struct hc_metrics_lite *,
            int *);
 void    tcp_mss(struct tcpcb *, int);
Index: netinet/tcp_output.c
===================================================================
--- netinet/tcp_output.c        (revision 235780)
+++ netinet/tcp_output.c        (working copy)
@@ -748,6 +748,15 @@
                        }
 
                        /*
+                        * Some drivers want an even shorter limit to the
+                        * length sent via TSO; respect their wishes.
+                        */
+                       if (tp->t_tx_tso_mss != 0 && len > tp->t_tx_tso_mss) {
+                               len = tp->t_tx_tso_mss;
+                               sendalot = 1;
+                       }
+
+                       /*
                         * Prevent the last segment from being
                         * fractional unless the send sockbuf can
                         * be emptied.
Index: dev/xen/netfront/netfront.c
===================================================================
--- dev/xen/netfront/netfront.c (revision 235780)
+++ dev/xen/netfront/netfront.c (working copy)
@@ -135,6 +135,7 @@
  * to mirror the Linux MAX_SKB_FRAGS constant.
  */
 #define        MAX_TX_REQ_FRAGS (65536 / PAGE_SIZE + 2)
+#define        TX_TSO_MSS (MAX_TX_REQ_FRAGS - 2) * MCLBYTES
 
 #define RX_COPY_THRESHOLD 256
 
@@ -2040,6 +2041,9 @@
        if (val) {
                np->xn_ifp->if_capabilities |= IFCAP_TSO4|IFCAP_LRO;
                printf(" feature-gso-tcp4");
+
+               np->xn_ifp->if_capabilities |= IFCAP_TSO_MSS;
+               np->xn_ifp->if_tx_tso_mss = TX_TSO_MSS;
        }
 
        printf("\n");
Index: net/if.h
===================================================================
--- net/if.h    (revision 235780)
+++ net/if.h    (working copy)
@@ -230,6 +230,7 @@
 #define        IFCAP_VLAN_HWTSO        0x40000 /* can do IFCAP_TSO on VLANs */
 #define        IFCAP_LINKSTATE         0x80000 /* the runtime link state is 
dynamic */
 #define        IFCAP_NETMAP            0x100000 /* netmap mode 
supported/enabled */
+#define        IFCAP_TSO_MSS           0x200000 /* TSO size is limited */
 
 #define IFCAP_HWCSUM   (IFCAP_RXCSUM | IFCAP_TXCSUM)
 #define        IFCAP_TSO       (IFCAP_TSO4 | IFCAP_TSO6)
Index: net/if_var.h
===================================================================
--- net/if_var.h        (revision 235780)
+++ net/if_var.h        (working copy)
@@ -205,7 +205,8 @@
         * be used with care where binary compatibility is required.
         */
        char    if_cspare[3];
-       int     if_ispare[4];
+       u_int   if_tx_tso_mss;          /* if IFCAP_TSO_MSS */
+       int     if_ispare[3];
        void    *if_pspare[8];          /* 1 netmap, 7 TDB */
 };
 
_______________________________________________
freebsd-net@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/freebsd-net
To unsubscribe, send any mail to "freebsd-net-unsubscr...@freebsd.org"

Reply via email to