This patch enables TSO(v4) hw feature for emac driver. As atleast the APM82181's TCP/IP acceleration hardware controller (TAH) provides TCP segmentation support in the transmit path.
Signed-off-by: Christian Lamparter <chunk...@gmail.com> --- drivers/net/ethernet/ibm/emac/core.c | 112 ++++++++++++++++++++++++++- drivers/net/ethernet/ibm/emac/core.h | 7 ++ drivers/net/ethernet/ibm/emac/emac.h | 7 ++ drivers/net/ethernet/ibm/emac/tah.c | 22 +++++- drivers/net/ethernet/ibm/emac/tah.h | 2 + 5 files changed, 148 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/ibm/emac/core.c b/drivers/net/ethernet/ibm/emac/core.c index be560f9031f4..80aafd7552aa 100644 --- a/drivers/net/ethernet/ibm/emac/core.c +++ b/drivers/net/ethernet/ibm/emac/core.c @@ -38,6 +38,9 @@ #include <linux/mii.h> #include <linux/bitops.h> #include <linux/if_vlan.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/tcp.h> #include <linux/workqueue.h> #include <linux/of.h> #include <linux/of_address.h> @@ -1118,6 +1121,32 @@ static int emac_resize_rx_ring(struct emac_instance *dev, int new_mtu) return ret; } +/* Restriction applied for the segmentation size + * to use HW segmentation offload feature. the size + * of the segment must not be less than 168 bytes for + * DIX formatted segments, or 176 bytes for + * IEEE formatted segments. However based on actual + * tests any MTU less than 416 causes excessive retries + * due to TX FIFO underruns. + */ +const u32 tah_ss[TAH_NO_SSR] = { 1500, 1344, 1152, 960, 768, 416 }; + +/* look-up matching segment size for the given mtu */ +static void emac_find_tso_ss_for_mtu(struct emac_instance *dev) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(tah_ss); i++) { + if (tah_ss[i] <= dev->ndev->mtu) + break; + } + /* if no matching segment size is found, set the tso_ss_mtu_start + * variable anyway. This will cause the emac_tx_tso to skip straight + * to the software fallback. + */ + dev->tso_ss_mtu_start = i; +} + /* Process ctx, rtnl_lock semaphore */ static int emac_change_mtu(struct net_device *ndev, int new_mtu) { @@ -1134,6 +1163,7 @@ static int emac_change_mtu(struct net_device *ndev, int new_mtu) if (!ret) { ndev->mtu = new_mtu; + emac_find_tso_ss_for_mtu(dev); dev->rx_skb_size = emac_rx_skb_size(new_mtu); dev->rx_sync_size = emac_rx_sync_size(new_mtu); } @@ -1410,6 +1440,33 @@ static inline u16 emac_tx_csum(struct emac_instance *dev, return 0; } +static int emac_tx_tso(struct emac_instance *dev, struct sk_buff *skb, + u16 *ctrl) +{ + if (emac_has_feature(dev, EMAC_FTR_TAH_HAS_TSO) && skb_is_gso(skb) && + !!(skb_shinfo(skb)->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) { + u32 seg_size = 0, i; + + /* Get the MTU */ + seg_size = skb_shinfo(skb)->gso_size + tcp_hdrlen(skb) + + skb_network_header_len(skb); + + for (i = dev->tso_ss_mtu_start; i < ARRAY_SIZE(tah_ss); i++) { + if (tah_ss[i] > seg_size) + continue; + + *ctrl |= EMAC_TX_CTRL_TAH_SSR(i); + return 0; + } + + /* none found fall back to software */ + return -EINVAL; + } + + *ctrl |= emac_tx_csum(dev, skb); + return 0; +} + static inline netdev_tx_t emac_xmit_finish(struct emac_instance *dev, int len) { struct emac_regs __iomem *p = dev->emacp; @@ -1452,6 +1509,46 @@ static inline u16 emac_tx_vlan(struct emac_instance *dev, struct sk_buff *skb) return 0; } +static netdev_tx_t +emac_start_xmit_sg(struct sk_buff *skb, struct net_device *ndev); + +static int +emac_sw_tso(struct sk_buff *skb, struct net_device *ndev) +{ + struct emac_instance *dev = netdev_priv(ndev); + struct sk_buff *segs, *curr; + unsigned int i, frag_slots; + + /* make sure to not overflow the tx ring */ + frag_slots = dev->tx_cnt; + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { + struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i]; + + frag_slots += mal_tx_chunks(skb_frag_size(frag)); + + if (frag_slots >= NUM_TX_BUFF) + return -ENOSPC; + }; + + segs = skb_gso_segment(skb, ndev->features & + ~(NETIF_F_TSO | NETIF_F_TSO6)); + if (IS_ERR_OR_NULL(segs)) { + ++dev->estats.tx_dropped; + dev_kfree_skb_any(skb); + } else { + while (segs) { + curr = segs; + segs = curr->next; + curr->next = NULL; + + emac_start_xmit_sg(curr, ndev); + } + dev_consume_skb_any(skb); + } + + return 0; +} + /* Tx lock BH */ static netdev_tx_t emac_start_xmit(struct sk_buff *skb, struct net_device *ndev) { @@ -1535,7 +1632,12 @@ emac_start_xmit_sg(struct sk_buff *skb, struct net_device *ndev) goto stop_queue; ctrl = EMAC_TX_CTRL_GFCS | EMAC_TX_CTRL_GP | MAL_TX_CTRL_READY | - emac_tx_csum(dev, skb) | emac_tx_vlan(dev, skb); + emac_tx_vlan(dev, skb); + if (emac_tx_tso(dev, skb, &ctrl)) { + if (emac_sw_tso(skb, ndev)) + goto stop_queue; + } + slot = dev->tx_slot; /* skb data */ @@ -2946,6 +3048,9 @@ static int emac_init_config(struct emac_instance *dev) if (dev->tah_ph != 0) { #ifdef CONFIG_IBM_EMAC_TAH dev->features |= EMAC_FTR_HAS_TAH; + + if (of_device_is_compatible(np, "ibm,emac-apm821xx")) + dev->features |= EMAC_FTR_TAH_HAS_TSO; #else printk(KERN_ERR "%pOF: TAH support not enabled !\n", np); return -ENXIO; @@ -3113,6 +3218,8 @@ static int emac_probe(struct platform_device *ofdev) } dev->rx_skb_size = emac_rx_skb_size(ndev->mtu); dev->rx_sync_size = emac_rx_sync_size(ndev->mtu); + ndev->gso_max_segs = NUM_TX_BUFF / 2; + emac_find_tso_ss_for_mtu(dev); /* Get pointers to BD rings */ dev->tx_desc = @@ -3167,6 +3274,9 @@ static int emac_probe(struct platform_device *ofdev) if (dev->tah_dev) { ndev->hw_features = NETIF_F_IP_CSUM | NETIF_F_SG; + if (emac_has_feature(dev, EMAC_FTR_TAH_HAS_TSO)) + ndev->hw_features |= NETIF_F_TSO; + if (emac_has_feature(dev, EMAC_FTR_HAS_VLAN_CTAG_TX)) { ndev->vlan_features |= ndev->hw_features; ndev->hw_features |= NETIF_F_HW_VLAN_CTAG_TX; diff --git a/drivers/net/ethernet/ibm/emac/core.h b/drivers/net/ethernet/ibm/emac/core.h index 8d84d439168c..0bcfe952a3cf 100644 --- a/drivers/net/ethernet/ibm/emac/core.h +++ b/drivers/net/ethernet/ibm/emac/core.h @@ -245,6 +245,9 @@ struct emac_instance { u32 xaht_slots_shift; u32 xaht_width_shift; + /* TAH TSO start index */ + int tso_ss_mtu_start; + /* Descriptor management */ struct mal_descriptor *tx_desc; @@ -336,6 +339,8 @@ struct emac_instance { #define EMAC_FTR_APM821XX_NO_HALF_DUPLEX 0x00001000 /* EMAC can insert 802.1Q tag */ #define EMAC_FTR_HAS_VLAN_CTAG_TX 0x00002000 +/* TAH can do TCP segmentation offload */ +#define EMAC_FTR_TAH_HAS_TSO 0x00004000 /* Right now, we don't quite handle the always/possible masks on the * most optimal way as we don't have a way to say something like @@ -352,6 +357,8 @@ enum { #endif #ifdef CONFIG_IBM_EMAC_TAH EMAC_FTR_HAS_TAH | + EMAC_FTR_TAH_HAS_TSO | + #endif #ifdef CONFIG_IBM_EMAC_ZMII EMAC_FTR_HAS_ZMII | diff --git a/drivers/net/ethernet/ibm/emac/emac.h b/drivers/net/ethernet/ibm/emac/emac.h index e2f80cca9bed..833967aceb2f 100644 --- a/drivers/net/ethernet/ibm/emac/emac.h +++ b/drivers/net/ethernet/ibm/emac/emac.h @@ -266,6 +266,13 @@ struct emac_regs { #define EMAC_TX_CTRL_IVT 0x0020 #define EMAC_TX_CTRL_RVT 0x0010 #define EMAC_TX_CTRL_TAH_CSUM 0x000e +#define EMAC_TX_CTRL_TAH_SSR(idx) (((idx) + 1) << 1) +#define EMAC_TX_CTRL_TAH_SSR5 0x000c +#define EMAC_TX_CTRL_TAH_SSR4 0x000a +#define EMAC_TX_CTRL_TAH_SSR3 0x0008 +#define EMAC_TX_CTRL_TAH_SSR2 0x0006 +#define EMAC_TX_CTRL_TAH_SSR1 0x0004 +#define EMAC_TX_CTRL_TAH_SSR0 0x0002 /* EMAC specific TX descriptor status fields (read access) */ #define EMAC_TX_ST_BFCS 0x0200 diff --git a/drivers/net/ethernet/ibm/emac/tah.c b/drivers/net/ethernet/ibm/emac/tah.c index 9912456dca48..619c08ee22f7 100644 --- a/drivers/net/ethernet/ibm/emac/tah.c +++ b/drivers/net/ethernet/ibm/emac/tah.c @@ -45,6 +45,24 @@ void tah_detach(struct platform_device *ofdev, int channel) mutex_unlock(&dev->lock); } +static void tah_set_ssr(struct platform_device *ofdev) +{ + struct tah_instance *dev = dev_get_drvdata(&ofdev->dev); + struct tah_regs __iomem *p = dev->base; + int i; + + mutex_lock(&dev->lock); + + for (i = 0; i < ARRAY_SIZE(tah_ss); i++) { + /* Segment size can be up to 16K, but needs + * to be a multiple of 2 bytes + */ + out_be32(&p->ssr0 + i, (tah_ss[i] & 0x3ffc) << 16); + } + + mutex_unlock(&dev->lock); +} + void tah_reset(struct platform_device *ofdev) { struct tah_instance *dev = platform_get_drvdata(ofdev); @@ -64,6 +82,8 @@ void tah_reset(struct platform_device *ofdev) out_be32(&p->mr, TAH_MR_CVR | TAH_MR_ST_768 | TAH_MR_TFS_10KB | TAH_MR_DTFP | TAH_MR_DIG); + + tah_set_ssr(ofdev); } int tah_get_regs_len(struct platform_device *ofdev) @@ -118,7 +138,7 @@ static int tah_probe(struct platform_device *ofdev) platform_set_drvdata(ofdev, dev); - /* Initialize TAH and enable IPv4 checksum verification, no TSO yet */ + /* Initialize TAH and enable IPv4 checksum verification */ tah_reset(ofdev); printk(KERN_INFO "TAH %pOF initialized\n", ofdev->dev.of_node); diff --git a/drivers/net/ethernet/ibm/emac/tah.h b/drivers/net/ethernet/ibm/emac/tah.h index 4d5f336f07b3..2cb0629f30e2 100644 --- a/drivers/net/ethernet/ibm/emac/tah.h +++ b/drivers/net/ethernet/ibm/emac/tah.h @@ -36,6 +36,8 @@ struct tah_regs { u32 tsr; }; +#define TAH_NO_SSR 6 +extern const u32 tah_ss[TAH_NO_SSR]; /* TAH device */ struct tah_instance { -- 2.19.1