If you want to poke at the ramips ethernet driver, I have a patch that I haven't submitted yet (I think it has occasional problems on bootup and probably unconditionally uses features that are not available on all chipset types):
This adds support for HW checksumming, scatter/gather DMA and generic segmentation offload, as well as vlan offload. Basically works fine for me on RT3050 and RT3052, but no long term stability tests have been done. Gives about 2x throughput boost in my testing (throughput is mostly memory-bound, my RT3052 devices has 2 ram chips and is also 2x as fast as the RT3050 device). Adding the remaining queues would be the next step. :) Index: target/linux/ramips/files/drivers/net/ethernet/ramips/ramips_main.c =================================================================== --- target/linux/ramips/files/drivers/net/ethernet/ramips/ramips_main.c (working copy) +++ target/linux/ramips/files/drivers/net/ethernet/ramips/ramips_main.c (working copy) @@ -23,6 +23,7 @@ #include <linux/skbuff.h> #include <linux/etherdevice.h> #include <linux/ethtool.h> +#include <linux/if_vlan.h> #include <linux/platform_device.h> #include <linux/phy.h> @@ -88,6 +89,52 @@ RAMIPS_GDMA1_MAC_ADRL); } +static void +ramips_hw_set_vid(struct raeth_priv *re, int idx, u16 vid) +{ + u32 reg = re->vlan_info.reg_shadow[idx / 2]; + if (idx & 1) + reg = (reg & 0x0000ffff) | (vid << 16); + else + reg = (reg & 0xffff0000) | vid; + ramips_fe_wr(reg, RAMIPS_CDMA_VLAN_ID_0001 + (idx / 2) * 4); + re->vlan_info.reg_shadow[idx / 2] = reg; + if (vid < NUM_VLANS) + re->vlan_info.direct_map[vid] = idx; +} + +static void +ramips_hw_setup_vid(struct raeth_priv *re) +{ + int i; + memset(&re->vlan_info, 0, sizeof(re->vlan_info)); + for (i=0; i<NUM_VLANS; i++) + ramips_hw_set_vid(re, i, 0); + for (i=0; i<NUM_VLANS; i++) + re->vlan_info.direct_map[i] = VLAN_DIRECT_INVALID; +} + +static int +ramips_hw_lookup_vid(struct raeth_priv *re, u16 vid) +{ + int i; + if (likely(vid > 0 && vid < NUM_VLANS)) { + u8 idx = re->vlan_info.direct_map[vid]; + if (idx == VLAN_DIRECT_INVALID) + return -EINVAL; + return idx; + } + for (i=0; i < NUM_VLANS/2; i++) { + u32 reg = re->vlan_info.reg_shadow[i]; + if ((reg & 0xffff) == vid) + return 2*i; + if ((reg >> 16) == vid) + return 2*i + 1; + } + + return -EINVAL; +} + static struct sk_buff * ramips_alloc_skb(struct raeth_priv *re) { @@ -118,8 +165,10 @@ struct ramips_tx_dma *txd; txd = &re->tx[i]; - txd->txd4 = TX_DMA_QN(3) | TX_DMA_PN(1); + txd->txd1 = 0; txd->txd2 = TX_DMA_LSO | TX_DMA_DONE; + txd->txd3 = 0; + txd->txd4 = TX_DMA_QN(3) | TX_DMA_PN(1) | TX_DMA_ICO(1) | TX_DMA_UCO(1) | TX_DMA_TCO(1); txi = &re->tx_info[i]; txi->tx_desc = txd; @@ -148,6 +197,8 @@ rxd->rxd1 = (unsigned int) dma_addr; rxd->rxd2 = RX_DMA_LSO; + rxd->rxd3 = 0; + rxd->rxd4 = 0; } /* flush descriptors */ @@ -177,6 +228,8 @@ txi->tx_skb = NULL; } } + + netdev_reset_queue(re->netdev); } #if defined(CONFIG_RALINK_RT288X) || defined(CONFIG_RALINK_RT3883) @@ -640,6 +693,7 @@ static void ramips_setup_dma(struct raeth_priv *re) { + re->tx_crls_idx = 0; ramips_fe_wr(re->tx_desc_dma, RAMIPS_TX_BASE_PTR0); ramips_fe_wr(NUM_TX_DESC, RAMIPS_TX_MAX_CNT0); ramips_fe_wr(0, RAMIPS_TX_CTX_IDX0); @@ -651,62 +705,138 @@ ramips_fe_wr(RAMIPS_PST_DRX_IDX0, RAMIPS_PDMA_RST_CFG); } +static void +ramips_eth_gso_xmit(struct net_device *dev, u32 *ctx_idx, dma_addr_t mapping, u32 len, u32 flags) +{ + struct raeth_priv *re = netdev_priv(dev); + struct raeth_tx_info *txi; + struct ramips_tx_dma *txd; + u32 ctx_next = (*ctx_idx + 1) % NUM_TX_DESC; + + txi = &re->tx_info[*ctx_idx]; + txd = txi->tx_desc; + + /* + * FIXME: Since the descriptor is 16 bytes and the cacheline is 32, I + * wonder if the following could happen: If dma is sending out the + * descriptor immediately preceding this one and they are on the same + * cacheline, the writeback may race with dma engine writeback. + */ + if (txd->txd2 & TX_DMA_DONE) { + txd->txd1 = (unsigned int) mapping; + txd->txd2 = TX_DMA_PLEN0(len); + txd->txd3 = 0; + txd->txd4 = flags; + } else { + txd->txd2 |= TX_DMA_PLEN1(len); + txd->txd3 = (unsigned int) mapping; + *ctx_idx = ctx_next; + } +} + +static void +ramips_eth_gso_xmit_finalize(struct net_device *dev, u32 ctx_idx) +{ + struct raeth_priv *re = netdev_priv(dev); + struct raeth_tx_info *txi; + struct ramips_tx_dma *txd; + + txi = &re->tx_info[ctx_idx]; + txd = txi->tx_desc; + + txd->txd2 |= txd->txd3 ? TX_DMA_LS1 : TX_DMA_LSO; + + /* flush descriptors */ + wmb(); +} + static int -ramips_eth_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) +ramips_eth_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct raeth_priv *re = netdev_priv(dev); - struct raeth_tx_info *txi, *txi_next; - struct ramips_tx_dma *txd, *txd_next; - unsigned long tx; - unsigned int tx_next; + struct raeth_tx_info *txi; + u32 tx, last_tx; + u32 dtx = ramips_fe_rr(RAMIPS_TX_DTX_IDX0); + u32 crls = re->tx_crls_idx; + u32 inflight = (NUM_TX_DESC + crls - dtx) % NUM_TX_DESC; + u32 budget = NUM_TX_DESC - inflight; + u32 nr_frags = skb_shinfo(skb)->nr_frags; + u32 flags = TX_DMA_QN(3) | TX_DMA_PN(1); dma_addr_t mapped_addr; + int i; - if (re->plat->min_pkt_len) { - if (skb->len < re->plat->min_pkt_len) { - if (skb_padto(skb, re->plat->min_pkt_len)) { - printk(KERN_ERR - "ramips_eth: skb_padto failed\n"); - kfree_skb(skb); - return 0; - } - skb_put(skb, re->plat->min_pkt_len - skb->len); + if (budget < NUM_TX_DESC / 2) { + tasklet_schedule(&re->tx_housekeeping_tasklet); + } + + if (unlikely(re->plat->min_pkt_len && + skb->len < re->plat->min_pkt_len)) { + if (skb_padto(skb, re->plat->min_pkt_len)) { + printk(KERN_ERR + "ramips_eth: skb_padto failed\n"); + kfree_skb(skb); + return 0; } + skb_put(skb, re->plat->min_pkt_len - skb->len); + nr_frags = skb_shinfo(skb)->nr_frags; } + if (unlikely(budget <= (1 + nr_frags/2 + 4))) { /* 4 descs paranoia */ + if (!netif_queue_stopped(dev)) { + netif_stop_queue(dev); + } else { + netdev_err(dev, + "BUG! Tx Ring full when queue awake!\n"); + } + return NETDEV_TX_BUSY; + } + + if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) { + flags |= TX_DMA_ICO(1) | TX_DMA_UCO(1) | TX_DMA_TCO(1); + re->tx_csum_ofld++; + } + + if (vlan_tx_tag_present(skb)) { + u32 vid = vlan_tx_tag_get(skb); + int idx = ramips_hw_lookup_vid(re, vid); + if (unlikely(idx < 0 || idx >= NUM_VLANS)) { + dev_kfree_skb(skb); + dev->stats.tx_dropped++; + return NETDEV_TX_OK; + } + flags |= TX_DMA_INSV(1) | TX_DMA_VIDX(idx); + re->tx_vlan++; + } + dev->trans_start = jiffies; mapped_addr = dma_map_single(&re->netdev->dev, skb->data, skb->len, DMA_TO_DEVICE); spin_lock(&re->page_lock); - tx = ramips_fe_rr(RAMIPS_TX_CTX_IDX0); - tx_next = (tx + 1) % NUM_TX_DESC; + last_tx = tx = ramips_fe_rr(RAMIPS_TX_CTX_IDX0); + ramips_eth_gso_xmit(dev, &tx, mapped_addr, skb_headlen(skb), flags); - txi = &re->tx_info[tx]; - txd = txi->tx_desc; - txi_next = &re->tx_info[tx_next]; - txd_next = txi_next->tx_desc; + for (i = 0; i < nr_frags; i++) { + skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; + u32 len = skb_frag_size(frag); - if ((txi->tx_skb) || (txi_next->tx_skb) || - !(txd->txd2 & TX_DMA_DONE) || - !(txd_next->txd2 & TX_DMA_DONE)) - goto out; - + mapped_addr = skb_frag_dma_map(&re->netdev->dev, frag, 0, len, + DMA_TO_DEVICE); + last_tx = tx; + ramips_eth_gso_xmit(dev, &tx, mapped_addr, len, flags); + } + ramips_eth_gso_xmit_finalize(dev, last_tx); + txi = &re->tx_info[last_tx]; txi->tx_skb = skb; - txd->txd1 = (unsigned int) mapped_addr; - wmb(); - txd->txd2 = TX_DMA_LSO | TX_DMA_PLEN0(skb->len); - dev->stats.tx_packets++; + (void)ramips_fe_rr(RAMIPS_TX_CTX_IDX0); + ramips_fe_wr((last_tx + 1) % NUM_TX_DESC, RAMIPS_TX_CTX_IDX0); + + netdev_sent_queue(dev, skb->len); dev->stats.tx_bytes += skb->len; - ramips_fe_wr(tx_next, RAMIPS_TX_CTX_IDX0); + dev->stats.tx_packets += nr_frags + 1; spin_unlock(&re->page_lock); return NETDEV_TX_OK; - - out: - spin_unlock(&re->page_lock); - dev->stats.tx_dropped++; - kfree_skb(skb); - return NETDEV_TX_OK; } static void @@ -739,6 +869,9 @@ /* Reuse the buffer on allocation failures */ if (new_skb) { dma_addr_t dma_addr; + u32 csum_mask = RX_DMA_IPFVLD | RX_DMA_I4FVLD | + RX_DMA_IPF | RX_DMA_I4F; + u32 csum_ok = RX_DMA_IPFVLD | RX_DMA_I4FVLD; dma_unmap_single(&re->netdev->dev, rxi->rx_dma, MAX_RX_LENGTH, DMA_FROM_DEVICE); @@ -746,7 +879,14 @@ skb_put(rx_skb, pktlen); rx_skb->dev = dev; rx_skb->protocol = eth_type_trans(rx_skb, dev); - rx_skb->ip_summed = CHECKSUM_NONE; + /* skip udp/tcp checksum if already validated by hw */ + if (dev->features & NETIF_F_RXCSUM && + (rxd->rxd3 & csum_mask) == csum_ok) { + rx_skb->ip_summed = CHECKSUM_UNNECESSARY; + re->rx_csum_vld++; + } else { + rx_skb->ip_summed = CHECKSUM_NONE; + } dev->stats.rx_packets++; dev->stats.rx_bytes += pktlen; netif_rx(rx_skb); @@ -780,24 +920,28 @@ { struct net_device *dev = (struct net_device*)ptr; struct raeth_priv *re = netdev_priv(dev); + unsigned int bytes_compl = 0, pkts_compl = 0; spin_lock(&re->page_lock); - while (1) { + while (re->tx_crls_idx != ramips_fe_rr(RAMIPS_TX_DTX_IDX0)) { struct raeth_tx_info *txi; struct ramips_tx_dma *txd; - txi = &re->tx_info[re->skb_free_idx]; + txi = &re->tx_info[re->tx_crls_idx]; txd = txi->tx_desc; - if (!(txd->txd2 & TX_DMA_DONE) || !(txi->tx_skb)) - break; + if (txi->tx_skb) { + pkts_compl++; + bytes_compl += txi->tx_skb->len; - dev_kfree_skb_irq(txi->tx_skb); - txi->tx_skb = NULL; - re->skb_free_idx++; - if (re->skb_free_idx >= NUM_TX_DESC) - re->skb_free_idx = 0; + dev_kfree_skb_irq(txi->tx_skb); + txi->tx_skb = NULL; + } + re->tx_crls_idx = (re->tx_crls_idx + 1) % NUM_TX_DESC; } + netdev_completed_queue(dev, pkts_compl, bytes_compl); + if (netif_queue_stopped(dev)) + netif_wake_queue(dev); spin_unlock(&re->page_lock); ramips_fe_int_enable(RAMIPS_TX_DLY_INT); @@ -811,11 +955,78 @@ tasklet_schedule(&re->tx_housekeeping_tasklet); } +static int +ramips_eth_vlan_add_vid(struct net_device *dev, unsigned short vid) +{ + struct raeth_priv *re = netdev_priv(dev); + int idx; + + printk(KERN_INFO "ramips_eth: vlan_add_vid(%d)\n", vid); + + if (vid == 0) + return 0; + + if (ramips_hw_lookup_vid(re, vid) >= 0) { + printk(KERN_ERR "ramips_eth: vlan %d already present\n", vid); + return -EINVAL; + } + + idx = ramips_hw_lookup_vid(re, 0); + if (idx < 0) { + printk(KERN_ERR "ramips_eth: can't add more than %d vlans\n\n", NUM_VLANS); + return -ENOSPC; + } + + ramips_hw_set_vid(re, idx, vid); + printk(KERN_INFO "ramips_eth: added vlan %d in slot %d\n", vid, idx); + + return 0; +} + +static int +ramips_eth_vlan_kill_vid(struct net_device *dev, unsigned short vid) +{ + struct raeth_priv *re = netdev_priv(dev); + int idx = ramips_hw_lookup_vid(re, vid); + + printk(KERN_INFO "ramips_eth: vlan_kill_vid(%d (idx=%d))\n", vid, idx); + if (vid == 0) + return 0; + + if (idx < 0) { + printk(KERN_ERR "ramips_eth: vlan %d not present\n", vid); + return -EINVAL; + } + ramips_hw_set_vid(re, idx, 0); + if (vid < NUM_VLANS) + re->vlan_info.direct_map[vid] = VLAN_DIRECT_INVALID; + + return 0; +} + +static int +ramips_eth_set_features(struct net_device *dev, netdev_features_t features) +{ + struct raeth_priv *re = netdev_priv(dev); + netdev_features_t changed = (features ^ dev->features) & dev->hw_features; + + /* TODO */ + printk(KERN_INFO "ramips_eth: rx_csum_vld %d\n", re->rx_csum_vld); + printk(KERN_INFO "ramips_eth: tx_csum_ofld %d\n", re->tx_csum_ofld); + printk(KERN_INFO "ramips_eth: tx_vlan %d\n", re->tx_vlan); + + dev->features ^= changed; + + return 0; +} + static irqreturn_t ramips_eth_irq(int irq, void *dev) { + struct net_device *netdev = dev; struct raeth_priv *re = netdev_priv(dev); - unsigned int status; + u32 status, unhandled; + u32 handled = 0; status = ramips_fe_rr(RAMIPS_FE_INT_STATUS); status &= ramips_fe_rr(RAMIPS_FE_INT_ENABLE); @@ -826,15 +1037,55 @@ ramips_fe_wr(status, RAMIPS_FE_INT_STATUS); if (status & RAMIPS_RX_DLY_INT) { + handled |= RAMIPS_RX_DLY_INT; ramips_fe_int_disable(RAMIPS_RX_DLY_INT); tasklet_schedule(&re->rx_tasklet); } if (status & RAMIPS_TX_DLY_INT) { + handled |= RAMIPS_TX_DLY_INT; ramips_fe_int_disable(RAMIPS_TX_DLY_INT); tasklet_schedule(&re->tx_housekeeping_tasklet); } + if (status & RAMIPS_GDM_OTHER_DROP) { + handled |= RAMIPS_GDM_OTHER_DROP; + netdev->stats.tx_errors++; + } + + if (status & RAMIPS_PSE_BUF_DROP) { + handled |= RAMIPS_PSE_BUF_DROP; + netdev->stats.tx_dropped++; + /* stop xmit for a bit */ + if (!netif_queue_stopped(netdev)) + netif_stop_queue(netdev); + tasklet_schedule(&re->tx_housekeeping_tasklet); + } + + if (status & RAMIPS_TX_COHERENT) { + u32 dtx = ramips_fe_rr(RAMIPS_TX_DTX_IDX0); + u32 ctx = ramips_fe_rr(RAMIPS_TX_CTX_IDX0); + u32 crls = re->tx_crls_idx; + printk(KERN_WARNING "ramips_eth: tx_coherent error: crls=%d(%08x) dtx=%d(%08x) ctx=%d(%08x)\n", + crls, re->tx_info[crls].tx_desc->txd2, + dtx, re->tx_info[dtx].tx_desc->txd2, + ctx, re->tx_info[ctx].tx_desc->txd2); + } + + if (status & RAMIPS_RX_COHERENT) { + u32 drx = ramips_fe_rr(RAMIPS_RX_DRX_IDX0); + u32 calc = ramips_fe_rr(RAMIPS_RX_CALC_IDX0); + printk(KERN_WARNING "ramips_eth: rx_coherent error: drx=%d(%08x) calc=%d(%08x)\n", + drx, re->rx_info[drx].rx_desc->rxd2, + calc, re->rx_info[calc].rx_desc->rxd2); + } + + unhandled = status & ~handled; + if (unhandled) { + ramips_fe_int_disable(unhandled); + printk(KERN_WARNING "ramips_eth: unhandled irq status 0x%08x\n", unhandled); + } + raeth_debugfs_update_int_stats(re, status); return IRQ_HANDLED; @@ -857,6 +1108,7 @@ ramips_ring_setup(re); ramips_hw_set_macaddr(dev->dev_addr); + ramips_hw_setup_vid(re); ramips_setup_dma(re); ramips_fe_wr((ramips_fe_rr(RAMIPS_PDMA_GLO_CFG) & 0xff) | @@ -875,13 +1127,19 @@ ramips_phy_start(re); ramips_fe_wr(RAMIPS_DELAY_INIT, RAMIPS_DLY_INT_CFG); - ramips_fe_wr(RAMIPS_TX_DLY_INT | RAMIPS_RX_DLY_INT, RAMIPS_FE_INT_ENABLE); + ramips_fe_wr(~(RAMIPS_TX_DONE_INT0 | RAMIPS_RX_DONE_INT0), RAMIPS_FE_INT_ENABLE); ramips_fe_wr(ramips_fe_rr(RAMIPS_GDMA1_FWD_CFG) & ~(RAMIPS_GDM1_ICS_EN | RAMIPS_GDM1_TCS_EN | RAMIPS_GDM1_UCS_EN | 0xffff), RAMIPS_GDMA1_FWD_CFG); + ramips_fe_wr(ramips_fe_rr(RAMIPS_GDMA1_FWD_CFG) | + RAMIPS_GDM1_ICS_EN | RAMIPS_GDM1_TCS_EN | RAMIPS_GDM1_UCS_EN, + RAMIPS_GDMA1_FWD_CFG); ramips_fe_wr(ramips_fe_rr(RAMIPS_CDMA_CSG_CFG) & ~(RAMIPS_ICS_GEN_EN | RAMIPS_TCS_GEN_EN | RAMIPS_UCS_GEN_EN), RAMIPS_CDMA_CSG_CFG); + ramips_fe_wr(ramips_fe_rr(RAMIPS_CDMA_CSG_CFG) | + RAMIPS_ICS_GEN_EN | RAMIPS_TCS_GEN_EN | RAMIPS_UCS_GEN_EN, + RAMIPS_CDMA_CSG_CFG); ramips_fe_wr(RAMIPS_PSE_FQFC_CFG_INIT, RAMIPS_PSE_FQ_CFG); ramips_fe_wr(1, RAMIPS_FE_RST_GL); ramips_fe_wr(0, RAMIPS_FE_RST_GL); @@ -965,16 +1223,44 @@ ramips_mdio_cleanup(re); } +static int +ramips_eth_get_settings(struct net_device *dev, struct ethtool_cmd *ecmd) +{ + // struct raeth_priv *re = netdev_priv(dev); + + ecmd->supported = SUPPORTED_1000baseT_Full; + ecmd->autoneg = AUTONEG_ENABLE; + ecmd->duplex = -1; + ethtool_cmd_speed_set(ecmd, -1); + + return 0; +} + +static void +ramips_eth_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info) +{ + strcpy(info->driver, "ramips_eth"); +} + +static const struct ethtool_ops ramips_eth_ethtool_ops = { + .get_settings = ramips_eth_get_settings, + .get_drvinfo = ramips_eth_get_drvinfo, + .get_link = ethtool_op_get_link, +}; + static const struct net_device_ops ramips_eth_netdev_ops = { .ndo_init = ramips_eth_probe, .ndo_uninit = ramips_eth_uninit, .ndo_open = ramips_eth_open, .ndo_stop = ramips_eth_stop, - .ndo_start_xmit = ramips_eth_hard_start_xmit, + .ndo_start_xmit = ramips_eth_start_xmit, .ndo_tx_timeout = ramips_eth_timeout, .ndo_change_mtu = eth_change_mtu, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, + .ndo_vlan_rx_add_vid = ramips_eth_vlan_add_vid, + .ndo_vlan_rx_kill_vid = ramips_eth_vlan_kill_vid, + .ndo_set_features = ramips_eth_set_features, }; static int @@ -1018,6 +1304,13 @@ ramips_dev->base_addr = (unsigned long)ramips_fe_base; ramips_dev->netdev_ops = &ramips_eth_netdev_ops; + ramips_dev->hw_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_RXCSUM | + NETIF_F_HW_VLAN_FILTER | NETIF_F_HW_VLAN_TX; + ramips_dev->vlan_features = NETIF_F_SG | NETIF_F_IP_CSUM; + ramips_dev->features |= ramips_dev->hw_features; + + SET_ETHTOOL_OPS(ramips_dev, &ramips_eth_ethtool_ops); + re = netdev_priv(ramips_dev); re->netdev = ramips_dev; -- Tobias PGP: http://8ef7ddba.uguu.de _______________________________________________ openwrt-devel mailing list openwrt-devel@lists.openwrt.org https://lists.openwrt.org/mailman/listinfo/openwrt-devel