On Wed, Oct 18, 2023 at 05:29:41PM +0200, Jan Klemkow wrote: > This diff implements TCP Segmentation Offloading for ixl(4). I tested > it successfully on amd64 and sparc64 with Intel X710. It should > increase the TCP bulk performance to 10 Gbit/s. On sparc64 I got an > increase from 600 MBit/s to 2.000 Gbit/s. > > Further testing is welcome.
tested on amd64 OK bluhm@ > Index: dev/pci/if_ixl.c > =================================================================== > RCS file: /cvs/src/sys/dev/pci/if_ixl.c,v > retrieving revision 1.89 > diff -u -p -r1.89 if_ixl.c > --- dev/pci/if_ixl.c 29 Sep 2023 19:44:47 -0000 1.89 > +++ dev/pci/if_ixl.c 18 Oct 2023 15:15:30 -0000 > @@ -71,6 +71,7 @@ > #include <net/if.h> > #include <net/if_dl.h> > #include <net/if_media.h> > +#include <net/route.h> > #include <net/toeplitz.h> > > #if NBPFILTER > 0 > @@ -85,6 +86,8 @@ > #include <netinet/ip.h> > #include <netinet/ip6.h> > #include <netinet/tcp.h> > +#include <netinet/tcp_timer.h> > +#include <netinet/tcp_var.h> > #include <netinet/udp.h> > #include <netinet/if_ether.h> > > @@ -827,6 +830,10 @@ struct ixl_tx_desc { > #define IXL_TX_DESC_BSIZE_MASK \ > (IXL_TX_DESC_BSIZE_MAX << IXL_TX_DESC_BSIZE_SHIFT) > > +#define IXL_TX_CTX_DESC_CMD_TSO 0x10 > +#define IXL_TX_CTX_DESC_TLEN_SHIFT 30 > +#define IXL_TX_CTX_DESC_MSS_SHIFT 50 > + > #define IXL_TX_DESC_L2TAG1_SHIFT 48 > } __packed __aligned(16); > > @@ -893,11 +900,19 @@ struct ixl_rx_wb_desc_32 { > uint64_t qword3; > } __packed __aligned(16); > > -#define IXL_TX_PKT_DESCS 8 > +#define IXL_TX_PKT_DESCS 32 > #define IXL_TX_QUEUE_ALIGN 128 > #define IXL_RX_QUEUE_ALIGN 128 > > #define IXL_HARDMTU 9712 /* 9726 - ETHER_HDR_LEN */ > +#define IXL_TSO_SIZE ((255 * 1024) - 1) > +#define IXL_MAX_DMA_SEG_SIZE ((16 * 1024) - 1) > + > +/* > + * Our TCP/IP Stack could not handle packets greater than MAXMCLBYTES. > + * This interface could not handle packets greater than IXL_TSO_SIZE. > + */ > +CTASSERT(MAXMCLBYTES < IXL_TSO_SIZE); > > #define IXL_PCIREG PCI_MAPREG_START > > @@ -1958,6 +1973,7 @@ ixl_attach(struct device *parent, struct > ifp->if_capabilities |= IFCAP_CSUM_IPv4 | > IFCAP_CSUM_TCPv4 | IFCAP_CSUM_UDPv4 | > IFCAP_CSUM_TCPv6 | IFCAP_CSUM_UDPv6; > + ifp->if_capabilities |= IFCAP_TSOv4 | IFCAP_TSOv6; > > ifmedia_init(&sc->sc_media, 0, ixl_media_change, ixl_media_status); > > @@ -2603,7 +2619,7 @@ ixl_txr_alloc(struct ixl_softc *sc, unsi > txm = &maps[i]; > > if (bus_dmamap_create(sc->sc_dmat, > - IXL_HARDMTU, IXL_TX_PKT_DESCS, IXL_HARDMTU, 0, > + MAXMCLBYTES, IXL_TX_PKT_DESCS, IXL_MAX_DMA_SEG_SIZE, 0, > BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW | BUS_DMA_64BIT, > &txm->txm_map) != 0) > goto uncreate; > @@ -2787,7 +2803,8 @@ ixl_load_mbuf(bus_dma_tag_t dmat, bus_dm > } > > static uint64_t > -ixl_tx_setup_offload(struct mbuf *m0) > +ixl_tx_setup_offload(struct mbuf *m0, struct ixl_tx_ring *txr, > + unsigned int prod) > { > struct ether_extracted ext; > uint64_t hlen; > @@ -2800,7 +2817,7 @@ ixl_tx_setup_offload(struct mbuf *m0) > } > > if (!ISSET(m0->m_pkthdr.csum_flags, > - M_IPV4_CSUM_OUT|M_TCP_CSUM_OUT|M_UDP_CSUM_OUT)) > + M_IPV4_CSUM_OUT|M_TCP_CSUM_OUT|M_UDP_CSUM_OUT|M_TCP_TSO)) > return (offload); > > ether_extract_headers(m0, &ext); > @@ -2833,6 +2850,28 @@ ixl_tx_setup_offload(struct mbuf *m0) > offload |= (sizeof(*ext.udp) >> 2) << IXL_TX_DESC_L4LEN_SHIFT; > } > > + if (ISSET(m0->m_pkthdr.csum_flags, M_TCP_TSO)) { > + if (ext.tcp) { > + struct ixl_tx_desc *ring, *txd; > + uint64_t cmd = 0; > + > + hlen += ext.tcp->th_off << 2; > + ring = IXL_DMA_KVA(&txr->txr_mem); > + txd = &ring[prod]; > + > + cmd |= IXL_TX_DESC_DTYPE_CONTEXT; > + cmd |= IXL_TX_CTX_DESC_CMD_TSO; > + cmd |= (uint64_t)(m0->m_pkthdr.len - ETHER_HDR_LEN > + - hlen) << IXL_TX_CTX_DESC_TLEN_SHIFT; > + cmd |= (uint64_t)(m0->m_pkthdr.ph_mss) > + << IXL_TX_CTX_DESC_MSS_SHIFT; > + > + htolem64(&txd->addr, 0); > + htolem64(&txd->cmd, cmd); > + } else > + tcpstat_inc(tcps_outbadtso); > + } > + > return (offload); > } > > @@ -2873,7 +2912,8 @@ ixl_start(struct ifqueue *ifq) > mask = sc->sc_tx_ring_ndescs - 1; > > for (;;) { > - if (free <= IXL_TX_PKT_DESCS) { > + /* We need one extra descriptor for TSO packets. */ > + if (free <= (IXL_TX_PKT_DESCS + 1)) { > ifq_set_oactive(ifq); > break; > } > @@ -2882,10 +2922,16 @@ ixl_start(struct ifqueue *ifq) > if (m == NULL) > break; > > - offload = ixl_tx_setup_offload(m); > + offload = ixl_tx_setup_offload(m, txr, prod); > > txm = &txr->txr_maps[prod]; > map = txm->txm_map; > + > + if (ISSET(m->m_pkthdr.csum_flags, M_TCP_TSO)) { > + prod++; > + prod &= mask; > + free--; > + } > > if (ixl_load_mbuf(sc->sc_dmat, map, m) != 0) { > ifq->ifq_errors++;