On 9/5/06, Andre Oppermann <[EMAIL PROTECTED]> wrote:
Jack Vogel wrote:
> On 9/5/06, Andre Oppermann <[EMAIL PROTECTED]> wrote:
>> Prafulla Deuskar wrote:
>> > Your patch looks good and is the way to go.
>> >
>> > So after Jack confirms that your patch works with the em driver
>> > would you commit to to -current?
>>
>> Absolutely.  :-)
>>
>> > The driver related changes can follow..
>> >
>> > Later we also need to fix ifconfig so that user can enable/disable
>> TSO on the interface.
>>
>> I'll do that together with the TSO code.
>
> OK, I've built and done some touch testing of this. I like it, the
> driver has
> some counters of the number of TSO bursts it does, and I think I see more
> per netperf test with your patch than mine.
>
> Hard to do real performance testing with all that WITNESS stuff in, but
> I will be making a 6.1 version of your patch to test with since I have my
> driver running on that anyway.

You can disable WITNESS and INVARIANTS pretty easily in -current and
get the full performance with it.

Last time I tried that I think the kernel wouldnt build, but that was
like 6 months ago, so I just kicked off a build with this stuff off, and
we'll see how it looks :)

> If you do the ifconfig changes there will need to be a small amount of
> code added to em_ioctl() but it should be trivial.
>
> You want me to reissue a driver patch with changes for your code?

Yes, please do so.  I've got a dual-em card which I can test with myself.

OK, attached new patch, this one even has the ioctl change so when
you get the ifconfig change in it will be ready.

Cheers,

Jack
diff -Naur /usr/src/sys.dist/dev/em/if_em.c /usr/src/sys/dev/em/if_em.c
--- /usr/src/sys.dist/dev/em/if_em.c	Fri Aug  4 00:56:33 2006
+++ /usr/src/sys/dev/em/if_em.c	Tue Sep  5 15:58:42 2006
@@ -72,6 +72,8 @@
 #include <netinet/tcp.h>
 #include <netinet/udp.h>
 
+#include <machine/in_cksum.h>
+
 #include <dev/pci/pcivar.h>
 #include <dev/pci/pcireg.h>
 #include <dev/em/if_em_hw.h>
@@ -229,6 +231,10 @@
 		    struct mbuf *);
 static void	em_transmit_checksum_setup(struct adapter *, struct mbuf *,
 		    uint32_t *, uint32_t *);
+#ifdef EM_TSO
+static boolean_t em_tso_setup(struct adapter *, struct mbuf *, u_int32_t *,
+                    uint32_t *);
+#endif
 static void	em_set_promisc(struct adapter *);
 static void	em_disable_promisc(struct adapter *);
 static void	em_set_multi(struct adapter *);
@@ -302,6 +308,7 @@
 
 #define E1000_TICKS_TO_USECS(ticks)	((1024 * (ticks) + 500) / 1000)
 #define E1000_USECS_TO_TICKS(usecs)	((1000 * (usecs) + 512) / 1024)
+#define M_TSO_LEN			66
 
 static int em_tx_int_delay_dflt = E1000_TICKS_TO_USECS(EM_TIDV);
 static int em_rx_int_delay_dflt = E1000_TICKS_TO_USECS(EM_RDTR);
@@ -905,6 +912,10 @@
 			ifp->if_capenable ^= IFCAP_HWCSUM;
 			reinit = 1;
 		}
+		if (mask & IFCAP_TSO) {
+			ifp->if_capenable ^= IFCAP_TSO;
+			reinit = 1;
+		}
 		if (mask & IFCAP_VLAN_HWTAGGING) {
 			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
 			reinit = 1;
@@ -1061,11 +1072,14 @@
 	ifp->if_drv_flags |= IFF_DRV_RUNNING;
 	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
 
+	ifp->if_hwassist = 0;
 	if (adapter->hw.mac_type >= em_82543) {
 		if (ifp->if_capenable & IFCAP_TXCSUM)
 			ifp->if_hwassist = EM_CHECKSUM_FEATURES;
-		else
-			ifp->if_hwassist = 0;
+#ifdef EM_TSO
+		if (ifp->if_capenable & IFCAP_TSO)
+			ifp->if_hwassist |= EM_TCPSEG_FEATURES;
+#endif
 	}
 
 	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
@@ -1416,11 +1430,17 @@
 	struct m_tag		*mtag;
 	uint32_t		txd_upper, txd_lower, txd_used, txd_saved;
 	int			nsegs, i, j;
-	int			error;
+	int			error, do_tso, tso_desc = 0;
 
 	m_head = *m_headp;
 	current_tx_desc = NULL;
-	txd_used = txd_saved = 0;
+	txd_upper = txd_lower = txd_used = txd_saved = 0;
+
+#ifdef EM_TSO
+        do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
+#else
+        do_tso = 0;
+#endif
 
 	/*
 	 * Force a cleanup if number of TX descriptors
@@ -1473,6 +1493,17 @@
 		*m_headp = m_head;
 	}
 
+        /*
+         * TSO workaround:
+         *  If an mbuf is only header we need
+         *     to pull 4 bytes of data into it.
+         */
+        if (do_tso && (m_head->m_len <= M_TSO_LEN)) {
+                m_head = m_pullup(m_head, M_TSO_LEN + 4);
+                if (m_head == NULL)
+                        return (ENOBUFS);
+        }
+
 	/*
 	 * Map the packet for DMA.
 	 */
@@ -1487,23 +1518,43 @@
 	}
 	KASSERT(nsegs != 0, ("em_encap: empty packet"));
 
-	if (nsegs > adapter->num_tx_desc_avail) {
+        /*
+         * TSO Hardware workaround, if this packet is not
+         * TSO, and is only a single descriptor long, and
+         * it follows a TSO burst, then we need to add a
+         * sentinel descriptor to prevent premature writeback.
+         */
+        if ((do_tso == 0) && (adapter->tx_tso == TRUE)) {
+                if (nsegs == 1)
+                        tso_desc = TRUE;
+                adapter->tx_tso = FALSE;
+        }
+
+	if (nsegs > adapter->num_tx_desc_avail - 2) {
 		adapter->no_tx_desc_avail2++;
 		error = ENOBUFS;
 		goto encap_fail;
 	}
 
-	if (ifp->if_hwassist > 0)
-		em_transmit_checksum_setup(adapter,  m_head, &txd_upper, &txd_lower);
-	else
-		txd_upper = txd_lower = 0;
+        /* Do hardware assists */
+        if ( ifp->if_hwassist > 0) {
+#ifdef EM_TSO
+                if (em_tso_setup(adapter, m_head, &txd_upper, &txd_lower)) {
+                        /* we need to make a final sentinel transmit desc */
+                        tso_desc = TRUE;
+                } else
+#endif
+                        em_transmit_checksum_setup(adapter,  m_head,
+                            &txd_upper, &txd_lower);
+        }
 
 	i = adapter->next_avail_tx_desc;
-	if (adapter->pcix_82544) {
+	if (adapter->pcix_82544)
 		txd_saved = i;
-		txd_used = 0;
-	}
+
 	for (j = 0; j < nsegs; j++) {
+                bus_size_t seg_len;
+                bus_addr_t seg_addr;
 		/* If adapter is 82544 and on PCIX bus. */
 		if(adapter->pcix_82544) {
 			DESC_ARRAY	desc_array;
@@ -1537,26 +1588,57 @@
 				txd_used++;
 			}
 		} else {
-			tx_buffer = &adapter->tx_buffer_area[i];
-			current_tx_desc = &adapter->tx_desc_base[i];
-
-			current_tx_desc->buffer_addr = htole64(segs[j].ds_addr);
-			current_tx_desc->lower.data = htole32(
-				adapter->txd_cmd | txd_lower | segs[j].ds_len);
-			current_tx_desc->upper.data = htole32(txd_upper);
-
-			if (++i == adapter->num_tx_desc)
-				i = 0;
-
-			tx_buffer->m_head = NULL;
+                       tx_buffer = &adapter->tx_buffer_area[i];
+                        current_tx_desc = &adapter->tx_desc_base[i];
+                        seg_addr = htole64(segs[j].ds_addr);
+                        seg_len  = segs[j].ds_len;
+                        /*
+                        ** TSO Workaround:
+                        ** If this is the last descriptor, we want to
+                        ** split it so we have a small final sentinel
+                        */
+                        if (tso_desc && (j == (nsegs -1)) && (seg_len > 8)) {
+                                seg_len -= 4;
+                                current_tx_desc->buffer_addr = seg_addr;
+                                current_tx_desc->lower.data = htole32(
+                                adapter->txd_cmd | txd_lower | seg_len);
+                                current_tx_desc->upper.data =
+                                    htole32(txd_upper);
+                                if (++i == adapter->num_tx_desc)
+                                        i = 0;
+                                /* Now make the sentinel */
+                                ++txd_used; /* using an extra txd */
+                                current_tx_desc = &adapter->tx_desc_base[i];
+                                tx_buffer = &adapter->tx_buffer_area[i];
+                                current_tx_desc->buffer_addr =
+                                    seg_addr + seg_len;
+                                current_tx_desc->lower.data = htole32(
+                                adapter->txd_cmd | txd_lower | 4);
+                                current_tx_desc->upper.data =
+                                    htole32(txd_upper);
+                                if (++i == adapter->num_tx_desc)
+                                        i = 0;
+                        } else {
+                                current_tx_desc->buffer_addr = seg_addr;
+                                current_tx_desc->lower.data = htole32(
+                                adapter->txd_cmd | txd_lower | seg_len);
+                                current_tx_desc->upper.data =
+                                    htole32(txd_upper);
+                                if (++i == adapter->num_tx_desc)
+                                        i = 0;
+                        }
+                        tx_buffer->m_head = NULL;
 		}
 	}
 
 	adapter->next_avail_tx_desc = i;
 	if (adapter->pcix_82544)
 		adapter->num_tx_desc_avail -= txd_used;
-	else
+	else {
 		adapter->num_tx_desc_avail -= nsegs;
+                if (tso_desc) /* TSO used an extra for sentinel */
+                        adapter->num_tx_desc_avail -= txd_used;
+        }
 
 	if (mtag != NULL) {
 		/* Set the vlan id. */
@@ -2226,6 +2308,15 @@
 		ifp->if_capenable |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
 	}
 
+#ifdef EM_TSO
+        /* Enable TSO if available */
+        if ((adapter->hw.mac_type > em_82544) &&
+            (adapter->hw.mac_type != em_82547)) {
+                ifp->if_capabilities |= IFCAP_TSO;
+                ifp->if_capenable |= IFCAP_TSO;
+        }
+#endif
+
 	/*
 	 * Tell the upper layer(s) we support long frames.
 	 */
@@ -2436,15 +2527,27 @@
 static int
 em_setup_transmit_structures(struct adapter *adapter)
 {
+#ifdef EM_TSO
+        struct ifnet   *ifp = adapter->ifp;
+#endif
 	device_t dev = adapter->dev;
 	struct em_buffer *tx_buffer;
-	bus_size_t size;
+	bus_size_t size, segsize;
 	int error, i;
 
 	/*
 	 * Setup DMA descriptor areas.
 	 */
-	size = roundup2(adapter->hw.max_frame_size, MCLBYTES);
+	segsize = size = roundup2(adapter->hw.max_frame_size, MCLBYTES);
+
+#ifdef EM_TSO
+        /* Overrides for TSO - want large sizes */
+        if (ifp->if_hwassist & EM_TCPSEG_FEATURES) {
+                size = EM_TSO_SIZE;
+                segsize = PAGE_SIZE;
+        }
+#endif
+
 	if ((error = bus_dma_tag_create(NULL,		/* parent */
 				1, 0,			/* alignment, bounds */
 				BUS_SPACE_MAXADDR,	/* lowaddr */
@@ -2452,7 +2555,7 @@
 				NULL, NULL,		/* filter, filterarg */
 				size,			/* maxsize */
 				EM_MAX_SCATTER,		/* nsegments */
-				size,			/* maxsegsize */
+				segsize,		/* maxsegsize */
 				0,			/* flags */
 				NULL,		/* lockfunc */
 				NULL,		/* lockarg */
@@ -2713,6 +2816,87 @@
 	adapter->next_avail_tx_desc = curr_txd;
 }
 
+#ifdef EM_TSO
+/**********************************************************************
+ *
+ *  Setup work for hardware segmentation offload (TSO)
+ *
+ **********************************************************************/
+static boolean_t
+em_tso_setup(struct adapter *adapter,
+             struct mbuf *mp,
+             u_int32_t *txd_upper,
+             u_int32_t *txd_lower)
+{
+        struct em_context_desc *TXD;
+        struct em_buffer *tx_buffer;
+        struct ip *ip;
+        struct tcphdr *th;
+        int curr_txd, hdr_len, ip_hlen, tcp_hlen;
+
+        if (((mp->m_pkthdr.csum_flags & CSUM_TSO) == 0) ||
+            (mp->m_pkthdr.len <= E1000_TX_BUFFER_SIZE)) {
+                return FALSE;
+        }
+
+        *txd_lower = (E1000_TXD_CMD_DEXT |
+                      E1000_TXD_DTYP_D |
+                      E1000_TXD_CMD_TSE);
+
+        *txd_upper = (E1000_TXD_POPTS_IXSM |
+                      E1000_TXD_POPTS_TXSM) << 8;
+
+        curr_txd = adapter->next_avail_tx_desc;
+        tx_buffer = &adapter->tx_buffer_area[curr_txd];
+        TXD = (struct em_context_desc *) &adapter->tx_desc_base[curr_txd];
+
+        mp->m_data += sizeof(struct ether_header);
+        ip = mtod(mp, struct ip *);
+        ip->ip_len = 0;
+        ip->ip_sum = 0;
+        ip_hlen = ip->ip_hl << 2 ;
+        th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
+        tcp_hlen = th->th_off << 2;
+
+        hdr_len = ETHER_HDR_LEN + ip_hlen + tcp_hlen;
+        th->th_sum = in_pseudo(ip->ip_src.s_addr,
+                                ip->ip_dst.s_addr,
+                                htons(IPPROTO_TCP));
+
+        mp->m_data -= sizeof(struct ether_header);
+        TXD->lower_setup.ip_fields.ipcss = ETHER_HDR_LEN;
+        TXD->lower_setup.ip_fields.ipcso =
+                ETHER_HDR_LEN + offsetof(struct ip, ip_sum);
+        TXD->lower_setup.ip_fields.ipcse =
+                htole16(ETHER_HDR_LEN + ip_hlen - 1);
+
+        TXD->upper_setup.tcp_fields.tucss =
+                ETHER_HDR_LEN + ip_hlen;
+        TXD->upper_setup.tcp_fields.tucse = 0;
+        TXD->upper_setup.tcp_fields.tucso =
+                ETHER_HDR_LEN + ip_hlen +
+                offsetof(struct tcphdr, th_sum);
+        TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
+        TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
+        TXD->cmd_and_length = htole32(adapter->txd_cmd |
+                                E1000_TXD_CMD_DEXT |
+                                E1000_TXD_CMD_TSE |
+                                E1000_TXD_CMD_IP | E1000_TXD_CMD_TCP |
+                                (mp->m_pkthdr.len - (hdr_len)));
+
+        tx_buffer->m_head = NULL;
+
+        if (++curr_txd == adapter->num_tx_desc)
+                curr_txd = 0;
+
+        adapter->num_tx_desc_avail--;
+        adapter->next_avail_tx_desc = curr_txd;
+        adapter->tx_tso = TRUE;
+
+        return TRUE;
+}
+#endif /* EM_TSO */
+
 /**********************************************************************
  *
  *  Examine each tx_buffer in the used queue. If the hardware is done
@@ -3639,6 +3823,12 @@
 	    (long long)adapter->stats.gprc);
 	device_printf(dev, "Good Packets Xmtd = %lld\n",
 	    (long long)adapter->stats.gptc);
+#ifdef EM_TSO
+        device_printf(dev, "TSO Contexts Xmtd = %lld\n",
+            (long long)adapter->stats.tsctc);
+        device_printf(dev, "TSO Contexts Failed = %lld\n",
+            (long long)adapter->stats.tsctfc);
+#endif
 }
 
 static int
diff -Naur /usr/src/sys.dist/dev/em/if_em.h /usr/src/sys/dev/em/if_em.h
--- /usr/src/sys.dist/dev/em/if_em.h	Thu Aug  3 12:05:04 2006
+++ /usr/src/sys/dev/em/if_em.h	Tue Sep  5 14:29:19 2006
@@ -36,6 +36,9 @@
 #ifndef _EM_H_DEFINED_
 #define _EM_H_DEFINED_
 
+/* Undefine this to remove TSO from driver */
+#define EM_TSO
+
 /* Tunables */
 
 /*
@@ -138,6 +141,11 @@
 #define EM_CHECKSUM_FEATURES            (CSUM_TCP | CSUM_UDP)
 
 /*
+ * Inform the stack about transmit segmentation offload capabilities.
+ */
+#define EM_TCPSEG_FEATURES		CSUM_TSO
+
+/*
  * This parameter controls the duration of transmit watchdog timer.
  */
 #define EM_TX_TIMEOUT                   5    /* set to 5 seconds */
@@ -225,6 +233,7 @@
 #define EM_RXBUFFER_16384      16384
 
 #define EM_MAX_SCATTER            64
+#define EM_TSO_SIZE		65535
 
 typedef enum _XSUM_CONTEXT_T {
 	OFFLOAD_NONE,
@@ -307,6 +316,7 @@
         uint32_t		txd_cmd;
 	struct em_buffer	*tx_buffer_area;
 	bus_dma_tag_t		txtag;		/* dma tag for tx */
+	uint32_t		tx_tso;		/* last tx was tso */
 
 	/* 
 	 * Receive definitions
_______________________________________________
freebsd-net@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/freebsd-net
To unsubscribe, send any mail to "[EMAIL PROTECTED]"

Reply via email to