On 9/5/06, Andre Oppermann <[EMAIL PROTECTED]> wrote:
Jack Vogel wrote:
> On 9/5/06, Andre Oppermann <[EMAIL PROTECTED]> wrote:
>> Prafulla Deuskar wrote:
>> > Your patch looks good and is the way to go.
>> >
>> > So after Jack confirms that your patch works with the em driver
>> > would you commit to to -current?
>>
>> Absolutely. :-)
>>
>> > The driver related changes can follow..
>> >
>> > Later we also need to fix ifconfig so that user can enable/disable
>> TSO on the interface.
>>
>> I'll do that together with the TSO code.
>
> OK, I've built and done some touch testing of this. I like it, the
> driver has
> some counters of the number of TSO bursts it does, and I think I see more
> per netperf test with your patch than mine.
>
> Hard to do real performance testing with all that WITNESS stuff in, but
> I will be making a 6.1 version of your patch to test with since I have my
> driver running on that anyway.
You can disable WITNESS and INVARIANTS pretty easily in -current and
get the full performance with it.
Last time I tried that I think the kernel wouldnt build, but that was
like 6 months ago, so I just kicked off a build with this stuff off, and
we'll see how it looks :)
> If you do the ifconfig changes there will need to be a small amount of
> code added to em_ioctl() but it should be trivial.
>
> You want me to reissue a driver patch with changes for your code?
Yes, please do so. I've got a dual-em card which I can test with myself.
OK, attached new patch, this one even has the ioctl change so when
you get the ifconfig change in it will be ready.
Cheers,
Jack
diff -Naur /usr/src/sys.dist/dev/em/if_em.c /usr/src/sys/dev/em/if_em.c
--- /usr/src/sys.dist/dev/em/if_em.c Fri Aug 4 00:56:33 2006
+++ /usr/src/sys/dev/em/if_em.c Tue Sep 5 15:58:42 2006
@@ -72,6 +72,8 @@
#include <netinet/tcp.h>
#include <netinet/udp.h>
+#include <machine/in_cksum.h>
+
#include <dev/pci/pcivar.h>
#include <dev/pci/pcireg.h>
#include <dev/em/if_em_hw.h>
@@ -229,6 +231,10 @@
struct mbuf *);
static void em_transmit_checksum_setup(struct adapter *, struct mbuf *,
uint32_t *, uint32_t *);
+#ifdef EM_TSO
+static boolean_t em_tso_setup(struct adapter *, struct mbuf *, u_int32_t *,
+ uint32_t *);
+#endif
static void em_set_promisc(struct adapter *);
static void em_disable_promisc(struct adapter *);
static void em_set_multi(struct adapter *);
@@ -302,6 +308,7 @@
#define E1000_TICKS_TO_USECS(ticks) ((1024 * (ticks) + 500) / 1000)
#define E1000_USECS_TO_TICKS(usecs) ((1000 * (usecs) + 512) / 1024)
+#define M_TSO_LEN 66
static int em_tx_int_delay_dflt = E1000_TICKS_TO_USECS(EM_TIDV);
static int em_rx_int_delay_dflt = E1000_TICKS_TO_USECS(EM_RDTR);
@@ -905,6 +912,10 @@
ifp->if_capenable ^= IFCAP_HWCSUM;
reinit = 1;
}
+ if (mask & IFCAP_TSO) {
+ ifp->if_capenable ^= IFCAP_TSO;
+ reinit = 1;
+ }
if (mask & IFCAP_VLAN_HWTAGGING) {
ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
reinit = 1;
@@ -1061,11 +1072,14 @@
ifp->if_drv_flags |= IFF_DRV_RUNNING;
ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
+ ifp->if_hwassist = 0;
if (adapter->hw.mac_type >= em_82543) {
if (ifp->if_capenable & IFCAP_TXCSUM)
ifp->if_hwassist = EM_CHECKSUM_FEATURES;
- else
- ifp->if_hwassist = 0;
+#ifdef EM_TSO
+ if (ifp->if_capenable & IFCAP_TSO)
+ ifp->if_hwassist |= EM_TCPSEG_FEATURES;
+#endif
}
callout_reset(&adapter->timer, hz, em_local_timer, adapter);
@@ -1416,11 +1430,17 @@
struct m_tag *mtag;
uint32_t txd_upper, txd_lower, txd_used, txd_saved;
int nsegs, i, j;
- int error;
+ int error, do_tso, tso_desc = 0;
m_head = *m_headp;
current_tx_desc = NULL;
- txd_used = txd_saved = 0;
+ txd_upper = txd_lower = txd_used = txd_saved = 0;
+
+#ifdef EM_TSO
+ do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
+#else
+ do_tso = 0;
+#endif
/*
* Force a cleanup if number of TX descriptors
@@ -1473,6 +1493,17 @@
*m_headp = m_head;
}
+ /*
+ * TSO workaround:
+ * If an mbuf is only header we need
+ * to pull 4 bytes of data into it.
+ */
+ if (do_tso && (m_head->m_len <= M_TSO_LEN)) {
+ m_head = m_pullup(m_head, M_TSO_LEN + 4);
+ if (m_head == NULL)
+ return (ENOBUFS);
+ }
+
/*
* Map the packet for DMA.
*/
@@ -1487,23 +1518,43 @@
}
KASSERT(nsegs != 0, ("em_encap: empty packet"));
- if (nsegs > adapter->num_tx_desc_avail) {
+ /*
+ * TSO Hardware workaround, if this packet is not
+ * TSO, and is only a single descriptor long, and
+ * it follows a TSO burst, then we need to add a
+ * sentinel descriptor to prevent premature writeback.
+ */
+ if ((do_tso == 0) && (adapter->tx_tso == TRUE)) {
+ if (nsegs == 1)
+ tso_desc = TRUE;
+ adapter->tx_tso = FALSE;
+ }
+
+ if (nsegs > adapter->num_tx_desc_avail - 2) {
adapter->no_tx_desc_avail2++;
error = ENOBUFS;
goto encap_fail;
}
- if (ifp->if_hwassist > 0)
- em_transmit_checksum_setup(adapter, m_head, &txd_upper, &txd_lower);
- else
- txd_upper = txd_lower = 0;
+ /* Do hardware assists */
+ if ( ifp->if_hwassist > 0) {
+#ifdef EM_TSO
+ if (em_tso_setup(adapter, m_head, &txd_upper, &txd_lower)) {
+ /* we need to make a final sentinel transmit desc */
+ tso_desc = TRUE;
+ } else
+#endif
+ em_transmit_checksum_setup(adapter, m_head,
+ &txd_upper, &txd_lower);
+ }
i = adapter->next_avail_tx_desc;
- if (adapter->pcix_82544) {
+ if (adapter->pcix_82544)
txd_saved = i;
- txd_used = 0;
- }
+
for (j = 0; j < nsegs; j++) {
+ bus_size_t seg_len;
+ bus_addr_t seg_addr;
/* If adapter is 82544 and on PCIX bus. */
if(adapter->pcix_82544) {
DESC_ARRAY desc_array;
@@ -1537,26 +1588,57 @@
txd_used++;
}
} else {
- tx_buffer = &adapter->tx_buffer_area[i];
- current_tx_desc = &adapter->tx_desc_base[i];
-
- current_tx_desc->buffer_addr = htole64(segs[j].ds_addr);
- current_tx_desc->lower.data = htole32(
- adapter->txd_cmd | txd_lower | segs[j].ds_len);
- current_tx_desc->upper.data = htole32(txd_upper);
-
- if (++i == adapter->num_tx_desc)
- i = 0;
-
- tx_buffer->m_head = NULL;
+ tx_buffer = &adapter->tx_buffer_area[i];
+ current_tx_desc = &adapter->tx_desc_base[i];
+ seg_addr = htole64(segs[j].ds_addr);
+ seg_len = segs[j].ds_len;
+ /*
+ ** TSO Workaround:
+ ** If this is the last descriptor, we want to
+ ** split it so we have a small final sentinel
+ */
+ if (tso_desc && (j == (nsegs -1)) && (seg_len > 8)) {
+ seg_len -= 4;
+ current_tx_desc->buffer_addr = seg_addr;
+ current_tx_desc->lower.data = htole32(
+ adapter->txd_cmd | txd_lower | seg_len);
+ current_tx_desc->upper.data =
+ htole32(txd_upper);
+ if (++i == adapter->num_tx_desc)
+ i = 0;
+ /* Now make the sentinel */
+ ++txd_used; /* using an extra txd */
+ current_tx_desc = &adapter->tx_desc_base[i];
+ tx_buffer = &adapter->tx_buffer_area[i];
+ current_tx_desc->buffer_addr =
+ seg_addr + seg_len;
+ current_tx_desc->lower.data = htole32(
+ adapter->txd_cmd | txd_lower | 4);
+ current_tx_desc->upper.data =
+ htole32(txd_upper);
+ if (++i == adapter->num_tx_desc)
+ i = 0;
+ } else {
+ current_tx_desc->buffer_addr = seg_addr;
+ current_tx_desc->lower.data = htole32(
+ adapter->txd_cmd | txd_lower | seg_len);
+ current_tx_desc->upper.data =
+ htole32(txd_upper);
+ if (++i == adapter->num_tx_desc)
+ i = 0;
+ }
+ tx_buffer->m_head = NULL;
}
}
adapter->next_avail_tx_desc = i;
if (adapter->pcix_82544)
adapter->num_tx_desc_avail -= txd_used;
- else
+ else {
adapter->num_tx_desc_avail -= nsegs;
+ if (tso_desc) /* TSO used an extra for sentinel */
+ adapter->num_tx_desc_avail -= txd_used;
+ }
if (mtag != NULL) {
/* Set the vlan id. */
@@ -2226,6 +2308,15 @@
ifp->if_capenable |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
}
+#ifdef EM_TSO
+ /* Enable TSO if available */
+ if ((adapter->hw.mac_type > em_82544) &&
+ (adapter->hw.mac_type != em_82547)) {
+ ifp->if_capabilities |= IFCAP_TSO;
+ ifp->if_capenable |= IFCAP_TSO;
+ }
+#endif
+
/*
* Tell the upper layer(s) we support long frames.
*/
@@ -2436,15 +2527,27 @@
static int
em_setup_transmit_structures(struct adapter *adapter)
{
+#ifdef EM_TSO
+ struct ifnet *ifp = adapter->ifp;
+#endif
device_t dev = adapter->dev;
struct em_buffer *tx_buffer;
- bus_size_t size;
+ bus_size_t size, segsize;
int error, i;
/*
* Setup DMA descriptor areas.
*/
- size = roundup2(adapter->hw.max_frame_size, MCLBYTES);
+ segsize = size = roundup2(adapter->hw.max_frame_size, MCLBYTES);
+
+#ifdef EM_TSO
+ /* Overrides for TSO - want large sizes */
+ if (ifp->if_hwassist & EM_TCPSEG_FEATURES) {
+ size = EM_TSO_SIZE;
+ segsize = PAGE_SIZE;
+ }
+#endif
+
if ((error = bus_dma_tag_create(NULL, /* parent */
1, 0, /* alignment, bounds */
BUS_SPACE_MAXADDR, /* lowaddr */
@@ -2452,7 +2555,7 @@
NULL, NULL, /* filter, filterarg */
size, /* maxsize */
EM_MAX_SCATTER, /* nsegments */
- size, /* maxsegsize */
+ segsize, /* maxsegsize */
0, /* flags */
NULL, /* lockfunc */
NULL, /* lockarg */
@@ -2713,6 +2816,87 @@
adapter->next_avail_tx_desc = curr_txd;
}
+#ifdef EM_TSO
+/**********************************************************************
+ *
+ * Setup work for hardware segmentation offload (TSO)
+ *
+ **********************************************************************/
+static boolean_t
+em_tso_setup(struct adapter *adapter,
+ struct mbuf *mp,
+ u_int32_t *txd_upper,
+ u_int32_t *txd_lower)
+{
+ struct em_context_desc *TXD;
+ struct em_buffer *tx_buffer;
+ struct ip *ip;
+ struct tcphdr *th;
+ int curr_txd, hdr_len, ip_hlen, tcp_hlen;
+
+ if (((mp->m_pkthdr.csum_flags & CSUM_TSO) == 0) ||
+ (mp->m_pkthdr.len <= E1000_TX_BUFFER_SIZE)) {
+ return FALSE;
+ }
+
+ *txd_lower = (E1000_TXD_CMD_DEXT |
+ E1000_TXD_DTYP_D |
+ E1000_TXD_CMD_TSE);
+
+ *txd_upper = (E1000_TXD_POPTS_IXSM |
+ E1000_TXD_POPTS_TXSM) << 8;
+
+ curr_txd = adapter->next_avail_tx_desc;
+ tx_buffer = &adapter->tx_buffer_area[curr_txd];
+ TXD = (struct em_context_desc *) &adapter->tx_desc_base[curr_txd];
+
+ mp->m_data += sizeof(struct ether_header);
+ ip = mtod(mp, struct ip *);
+ ip->ip_len = 0;
+ ip->ip_sum = 0;
+ ip_hlen = ip->ip_hl << 2 ;
+ th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
+ tcp_hlen = th->th_off << 2;
+
+ hdr_len = ETHER_HDR_LEN + ip_hlen + tcp_hlen;
+ th->th_sum = in_pseudo(ip->ip_src.s_addr,
+ ip->ip_dst.s_addr,
+ htons(IPPROTO_TCP));
+
+ mp->m_data -= sizeof(struct ether_header);
+ TXD->lower_setup.ip_fields.ipcss = ETHER_HDR_LEN;
+ TXD->lower_setup.ip_fields.ipcso =
+ ETHER_HDR_LEN + offsetof(struct ip, ip_sum);
+ TXD->lower_setup.ip_fields.ipcse =
+ htole16(ETHER_HDR_LEN + ip_hlen - 1);
+
+ TXD->upper_setup.tcp_fields.tucss =
+ ETHER_HDR_LEN + ip_hlen;
+ TXD->upper_setup.tcp_fields.tucse = 0;
+ TXD->upper_setup.tcp_fields.tucso =
+ ETHER_HDR_LEN + ip_hlen +
+ offsetof(struct tcphdr, th_sum);
+ TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
+ TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
+ TXD->cmd_and_length = htole32(adapter->txd_cmd |
+ E1000_TXD_CMD_DEXT |
+ E1000_TXD_CMD_TSE |
+ E1000_TXD_CMD_IP | E1000_TXD_CMD_TCP |
+ (mp->m_pkthdr.len - (hdr_len)));
+
+ tx_buffer->m_head = NULL;
+
+ if (++curr_txd == adapter->num_tx_desc)
+ curr_txd = 0;
+
+ adapter->num_tx_desc_avail--;
+ adapter->next_avail_tx_desc = curr_txd;
+ adapter->tx_tso = TRUE;
+
+ return TRUE;
+}
+#endif /* EM_TSO */
+
/**********************************************************************
*
* Examine each tx_buffer in the used queue. If the hardware is done
@@ -3639,6 +3823,12 @@
(long long)adapter->stats.gprc);
device_printf(dev, "Good Packets Xmtd = %lld\n",
(long long)adapter->stats.gptc);
+#ifdef EM_TSO
+ device_printf(dev, "TSO Contexts Xmtd = %lld\n",
+ (long long)adapter->stats.tsctc);
+ device_printf(dev, "TSO Contexts Failed = %lld\n",
+ (long long)adapter->stats.tsctfc);
+#endif
}
static int
diff -Naur /usr/src/sys.dist/dev/em/if_em.h /usr/src/sys/dev/em/if_em.h
--- /usr/src/sys.dist/dev/em/if_em.h Thu Aug 3 12:05:04 2006
+++ /usr/src/sys/dev/em/if_em.h Tue Sep 5 14:29:19 2006
@@ -36,6 +36,9 @@
#ifndef _EM_H_DEFINED_
#define _EM_H_DEFINED_
+/* Undefine this to remove TSO from driver */
+#define EM_TSO
+
/* Tunables */
/*
@@ -138,6 +141,11 @@
#define EM_CHECKSUM_FEATURES (CSUM_TCP | CSUM_UDP)
/*
+ * Inform the stack about transmit segmentation offload capabilities.
+ */
+#define EM_TCPSEG_FEATURES CSUM_TSO
+
+/*
* This parameter controls the duration of transmit watchdog timer.
*/
#define EM_TX_TIMEOUT 5 /* set to 5 seconds */
@@ -225,6 +233,7 @@
#define EM_RXBUFFER_16384 16384
#define EM_MAX_SCATTER 64
+#define EM_TSO_SIZE 65535
typedef enum _XSUM_CONTEXT_T {
OFFLOAD_NONE,
@@ -307,6 +316,7 @@
uint32_t txd_cmd;
struct em_buffer *tx_buffer_area;
bus_dma_tag_t txtag; /* dma tag for tx */
+ uint32_t tx_tso; /* last tx was tso */
/*
* Receive definitions
_______________________________________________
freebsd-net@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/freebsd-net
To unsubscribe, send any mail to "[EMAIL PROTECTED]"