From: Peter P Waskiewicz Jr <[EMAIL PROTECTED]> This patch is *not* intended to be integrated into any tree please. This is fulfilling a request to demonstrate the proposed multiqueue network device API in a driver. The necessary updates to the e1000 driver will come in a more official release. This is an as-is patch to this version of e1000, and should not be used outside of testing purposes only.
Signed-off-by: Peter P. Waskiewicz Jr <[EMAIL PROTECTED]> --- drivers/net/e1000/e1000.h | 8 ++ drivers/net/e1000/e1000_ethtool.c | 47 ++++++++++- drivers/net/e1000/e1000_main.c | 164 ++++++++++++++++++++++++++++++++----- 3 files changed, 194 insertions(+), 25 deletions(-) diff --git a/drivers/net/e1000/e1000.h b/drivers/net/e1000/e1000.h index dd4b728..15e484e 100644 --- a/drivers/net/e1000/e1000.h +++ b/drivers/net/e1000/e1000.h @@ -168,6 +168,10 @@ struct e1000_buffer { uint16_t next_to_watch; }; +struct e1000_queue_stats { + u64 packets; + u64 bytes; +}; struct e1000_ps_page { struct page *ps_page[PS_PAGE_BUFFERS]; }; struct e1000_ps_page_dma { uint64_t ps_page_dma[PS_PAGE_BUFFERS]; }; @@ -188,9 +192,11 @@ struct e1000_tx_ring { /* array of buffer information structs */ struct e1000_buffer *buffer_info; + spinlock_t tx_queue_lock; spinlock_t tx_lock; uint16_t tdh; uint16_t tdt; + struct e1000_queue_stats tx_stats; boolean_t last_tx_tso; }; @@ -218,6 +224,7 @@ struct e1000_rx_ring { uint16_t rdh; uint16_t rdt; + struct e1000_queue_stats rx_stats; }; #define E1000_DESC_UNUSED(R) \ @@ -271,6 +278,7 @@ struct e1000_adapter { /* TX */ struct e1000_tx_ring *tx_ring; /* One per active queue */ + struct e1000_tx_ring **cpu_tx_ring; unsigned int restart_queue; unsigned long tx_queue_len; uint32_t txd_cmd; diff --git a/drivers/net/e1000/e1000_ethtool.c b/drivers/net/e1000/e1000_ethtool.c index 6777887..fd466a1 100644 --- a/drivers/net/e1000/e1000_ethtool.c +++ b/drivers/net/e1000/e1000_ethtool.c @@ -105,7 +105,12 @@ static const struct e1000_stats e1000_gstrings_stats[] = { { "dropped_smbus", E1000_STAT(stats.mgpdc) }, }; -#define E1000_QUEUE_STATS_LEN 0 +#define E1000_QUEUE_STATS_LEN \ + ((((((struct e1000_adapter *)netdev->priv)->num_rx_queues > 1) ? \ + ((struct e1000_adapter *)netdev->priv)->num_rx_queues : 0 ) + \ + (((((struct e1000_adapter *)netdev->priv)->num_tx_queues > 1) ? \ + ((struct e1000_adapter *)netdev->priv)->num_tx_queues : 0 ))) * \ + (sizeof(struct e1000_queue_stats) / sizeof(u64))) #define E1000_GLOBAL_STATS_LEN \ sizeof(e1000_gstrings_stats) / sizeof(struct e1000_stats) #define E1000_STATS_LEN (E1000_GLOBAL_STATS_LEN + E1000_QUEUE_STATS_LEN) @@ -693,8 +698,10 @@ e1000_set_ringparam(struct net_device *netdev, E1000_MAX_TXD : E1000_MAX_82544_TXD)); E1000_ROUNDUP(txdr->count, REQ_TX_DESCRIPTOR_MULTIPLE); - for (i = 0; i < adapter->num_tx_queues; i++) + for (i = 0; i < adapter->num_tx_queues; i++) { txdr[i].count = txdr->count; + spin_lock_init(&adapter->tx_ring[i].tx_queue_lock); + } for (i = 0; i < adapter->num_rx_queues; i++) rxdr[i].count = rxdr->count; @@ -1909,6 +1916,9 @@ e1000_get_ethtool_stats(struct net_device *netdev, struct ethtool_stats *stats, uint64_t *data) { struct e1000_adapter *adapter = netdev_priv(netdev); + u64 *queue_stat; + int stat_count = sizeof(struct e1000_queue_stats) / sizeof(u64); + int j, k; int i; e1000_update_stats(adapter); @@ -1917,12 +1927,29 @@ e1000_get_ethtool_stats(struct net_device *netdev, data[i] = (e1000_gstrings_stats[i].sizeof_stat == sizeof(uint64_t)) ? *(uint64_t *)p : *(uint32_t *)p; } + if (adapter->num_tx_queues > 1) { + for (j = 0; j < adapter->num_tx_queues; j++) { + queue_stat = (u64 *)&adapter->tx_ring[j].tx_stats; + for (k = 0; k < stat_count; k++) + data[i + k] = queue_stat[k]; + i += k; + } + } + if (adapter->num_rx_queues > 1) { + for (j = 0; j < adapter->num_rx_queues; j++) { + queue_stat = (u64 *)&adapter->rx_ring[j].rx_stats; + for (k = 0; k < stat_count; k++) + data[i + k] = queue_stat[k]; + i += k; + } + } /* BUG_ON(i != E1000_STATS_LEN); */ } static void e1000_get_strings(struct net_device *netdev, uint32_t stringset, uint8_t *data) { + struct e1000_adapter *adapter = netdev_priv(netdev); uint8_t *p = data; int i; @@ -1937,6 +1964,22 @@ e1000_get_strings(struct net_device *netdev, uint32_t stringset, uint8_t *data) ETH_GSTRING_LEN); p += ETH_GSTRING_LEN; } + if (adapter->num_tx_queues > 1) { + for (i = 0; i < adapter->num_tx_queues; i++) { + sprintf(p, "tx_queue_%u_packets", i); + p += ETH_GSTRING_LEN; + sprintf(p, "tx_queue_%u_bytes", i); + p += ETH_GSTRING_LEN; + } + } + if (adapter->num_rx_queues > 1) { + for (i = 0; i < adapter->num_rx_queues; i++) { + sprintf(p, "rx_queue_%u_packets", i); + p += ETH_GSTRING_LEN; + sprintf(p, "rx_queue_%u_bytes", i); + p += ETH_GSTRING_LEN; + } + } /* BUG_ON(p - data != E1000_STATS_LEN * ETH_GSTRING_LEN); */ break; } diff --git a/drivers/net/e1000/e1000_main.c b/drivers/net/e1000/e1000_main.c index 913db0c..4753674 100644 --- a/drivers/net/e1000/e1000_main.c +++ b/drivers/net/e1000/e1000_main.c @@ -29,6 +29,9 @@ #include "e1000.h" #include <net/ip6_checksum.h> +#include <linux/cpu.h> +#include <linux/smp.h> + char e1000_driver_name[] = "e1000"; static char e1000_driver_string[] = "Intel(R) PRO/1000 Network Driver"; #ifndef CONFIG_E1000_NAPI @@ -137,6 +140,7 @@ static void e1000_exit_module(void); static int e1000_probe(struct pci_dev *pdev, const struct pci_device_id *ent); static void __devexit e1000_remove(struct pci_dev *pdev); static int e1000_alloc_queues(struct e1000_adapter *adapter); +static void e1000_setup_queue_mapping(struct e1000_adapter *adapter); static int e1000_sw_init(struct e1000_adapter *adapter); static int e1000_open(struct net_device *netdev); static int e1000_close(struct net_device *netdev); @@ -547,6 +551,8 @@ e1000_up(struct e1000_adapter *adapter) E1000_DESC_UNUSED(ring)); } + e1000_setup_queue_mapping(adapter); + adapter->tx_queue_len = netdev->tx_queue_len; #ifdef CONFIG_E1000_NAPI @@ -900,7 +906,7 @@ e1000_probe(struct pci_dev *pdev, pci_set_master(pdev); err = -ENOMEM; - netdev = alloc_etherdev(sizeof(struct e1000_adapter)); + netdev = alloc_etherdev_mq(sizeof(struct e1000_adapter), 2); if (!netdev) goto err_alloc_etherdev; @@ -1001,6 +1007,8 @@ e1000_probe(struct pci_dev *pdev, netdev->features |= NETIF_F_LLTX; + netdev->features |= NETIF_F_MULTI_QUEUE; + adapter->en_mng_pt = e1000_enable_mng_pass_thru(&adapter->hw); /* initialize eeprom parameters */ @@ -1317,8 +1325,8 @@ e1000_sw_init(struct e1000_adapter *adapter) hw->master_slave = E1000_MASTER_SLAVE; } - adapter->num_tx_queues = 1; - adapter->num_rx_queues = 1; + adapter->num_tx_queues = 2; + adapter->num_rx_queues = 2; if (e1000_alloc_queues(adapter)) { DPRINTK(PROBE, ERR, "Unable to allocate memory for queues\n"); @@ -1334,6 +1342,8 @@ e1000_sw_init(struct e1000_adapter *adapter) set_bit(__LINK_STATE_START, &adapter->polling_netdev[i].state); } spin_lock_init(&adapter->tx_queue_lock); + for (i = 0; i < adapter->num_tx_queues; i++) + spin_lock_init(&adapter->tx_ring[i].tx_queue_lock); #endif atomic_set(&adapter->irq_sem, 1); @@ -1382,10 +1392,26 @@ e1000_alloc_queues(struct e1000_adapter *adapter) } memset(adapter->polling_netdev, 0, size); #endif + adapter->cpu_tx_ring = alloc_percpu(struct e1000_tx_ring *); return E1000_SUCCESS; } +static void +e1000_setup_queue_mapping(struct e1000_adapter *adapter) +{ + int i, cpu; + + lock_cpu_hotplug(); + i = 0; + for_each_online_cpu(cpu) { + *per_cpu_ptr(adapter->cpu_tx_ring, cpu) = + &adapter->tx_ring[i % adapter->num_tx_queues]; + i++; + } + unlock_cpu_hotplug(); +} + /** * e1000_open - Called when a network interface is made active * @netdev: network interface device structure @@ -1640,7 +1666,17 @@ e1000_configure_tx(struct e1000_adapter *adapter) /* Setup the HW Tx Head and Tail descriptor pointers */ switch (adapter->num_tx_queues) { - case 1: + case 2: + tdba = adapter->tx_ring[1].dma; + tdlen = adapter->tx_ring[1].count * + sizeof(struct e1000_tx_desc); + E1000_WRITE_REG(hw, TDLEN1, tdlen); + E1000_WRITE_REG(hw, TDBAH1, (tdba >> 32)); + E1000_WRITE_REG(hw, TDBAL1, (tdba & 0x00000000ffffffffULL)); + E1000_WRITE_REG(hw, TDT1, 0); + E1000_WRITE_REG(hw, TDH1, 0); + adapter->tx_ring[1].tdh = ((hw->mac_type >= e1000_82543) ? E1000_TDH1 : E1000_82542_TDH1); + adapter->tx_ring[1].tdt = ((hw->mac_type >= e1000_82543) ? E1000_TDT1 : E1000_82542_TDT1); default: tdba = adapter->tx_ring[0].dma; tdlen = adapter->tx_ring[0].count * @@ -2043,8 +2079,7 @@ e1000_configure_rx(struct e1000_adapter *adapter) /* Setup the HW Rx Head and Tail Descriptor Pointers and * the Base and Length of the Rx Descriptor Ring */ switch (adapter->num_rx_queues) { - case 1: - default: + case 2: rdba = adapter->rx_ring[0].dma; E1000_WRITE_REG(hw, RDLEN, rdlen); E1000_WRITE_REG(hw, RDBAH, (rdba >> 32)); @@ -2053,11 +2088,45 @@ e1000_configure_rx(struct e1000_adapter *adapter) E1000_WRITE_REG(hw, RDH, 0); adapter->rx_ring[0].rdh = ((hw->mac_type >= e1000_82543) ? E1000_RDH : E1000_82542_RDH); adapter->rx_ring[0].rdt = ((hw->mac_type >= e1000_82543) ? E1000_RDT : E1000_82542_RDT); + /* fall through */ + default: + rdba = adapter->rx_ring[1].dma; + E1000_WRITE_REG(hw, RDLEN1, rdlen); + E1000_WRITE_REG(hw, RDBAH1, (rdba >> 32)); + E1000_WRITE_REG(hw, RDBAL1, (rdba & 0x00000000ffffffffULL)); + E1000_WRITE_REG(hw, RDT1, 0); + E1000_WRITE_REG(hw, RDH1, 0); + adapter->rx_ring[1].rdh = ((hw->mac_type >= e1000_82543) ? E1000_RDH1 : E1000_82542_RDH1); + adapter->rx_ring[1].rdt = ((hw->mac_type >= e1000_82543) ? E1000_RDT1 : E1000_82542_RDT1); break; } - /* Enable 82543 Receive Checksum Offload for TCP and UDP */ - if (hw->mac_type >= e1000_82543) { + if (adapter->num_rx_queues > 1) { + u32 random[10]; + u32 reta, mrqc; + int i; + + get_random_bytes(&random[0], 40); + + reta = 0x00800080; + mrqc = E1000_MRQC_ENABLE_RSS_2Q; + /* Fill out redirection table */ + for (i = 0; i < 32; i++) + E1000_WRITE_REG_ARRAY(hw, RETA, i, reta); + /* Fill out hash function seeds */ + for (i = 0; i < 10; i++) + E1000_WRITE_REG_ARRAY(hw, RSSRK, i, random[i]); + + mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 | + E1000_MRQC_RSS_FIELD_IPV4_TCP); + E1000_WRITE_REG(hw, MRQC, mrqc); + + /* Multiqueue and packet checksumming are mutually exclusive. */ + rxcsum = E1000_READ_REG(hw, RXCSUM); + rxcsum |= E1000_RXCSUM_PCSD; + E1000_WRITE_REG(hw, RXCSUM, rxcsum); + } else if (hw->mac_type >= e1000_82543) { + /* Enable 82543 Receive Checksum Offload for TCP and UDP */ rxcsum = E1000_READ_REG(hw, RXCSUM); if (adapter->rx_csum == TRUE) { rxcsum |= E1000_RXCSUM_TUOFL; @@ -2555,6 +2624,7 @@ e1000_watchdog(unsigned long data) struct e1000_tx_ring *txdr = adapter->tx_ring; uint32_t link, tctl; int32_t ret_val; + int i; ret_val = e1000_check_for_link(&adapter->hw); if ((ret_val == E1000_ERR_PHY) && @@ -2652,6 +2722,8 @@ e1000_watchdog(unsigned long data) netif_carrier_on(netdev); netif_wake_queue(netdev); + for (i = 0; i < adapter->num_tx_queues; i++) + netif_wake_subqueue(netdev, i); mod_timer(&adapter->phy_info_timer, jiffies + 2 * HZ); adapter->smartspeed = 0; } else { @@ -3266,7 +3338,7 @@ e1000_xmit_frame(struct sk_buff *skb, struct net_device *netdev) * to a flow. Right now, performance is impacted slightly negatively * if using multiple tx queues. If the stack breaks away from a * single qdisc implementation, we can look at this again. */ - tx_ring = adapter->tx_ring; + tx_ring = &adapter->tx_ring[skb->queue_mapping]; if (unlikely(skb->len <= 0)) { dev_kfree_skb_any(skb); @@ -3751,7 +3823,8 @@ e1000_intr_msi(int irq, void *data) struct e1000_adapter *adapter = netdev_priv(netdev); struct e1000_hw *hw = &adapter->hw; #ifndef CONFIG_E1000_NAPI - int i; + int i, j; + int rx_cleaned, tx_cleaned; #endif uint32_t icr = E1000_READ_REG(hw, ICR); @@ -3791,10 +3864,20 @@ e1000_intr_msi(int irq, void *data) adapter->total_tx_packets = 0; adapter->total_rx_packets = 0; - for (i = 0; i < E1000_MAX_INTR; i++) - if (unlikely(!adapter->clean_rx(adapter, adapter->rx_ring) & - e1000_clean_tx_irq(adapter, adapter->tx_ring))) + for (i = 0; i < E1000_MAX_INTR; i++) { + rx_cleaned = 0; + for (j = 0; j < adapter->num_rx_queues; j++) + rx_cleaned |= adapter->clean_rx(adapter, + &adapter->rx_ring[j]); + + tx_cleaned = 0; + for (j = 0 ; j < adapter->num_tx_queues ; j++) + tx_cleaned |= e1000_clean_tx_irq(adapter, + &adapter->tx_ring[j]); + + if (!rx_cleaned & tx_cleaned) break; + } if (likely(adapter->itr_setting & 3)) e1000_set_itr(adapter); @@ -3818,7 +3901,7 @@ e1000_intr(int irq, void *data) struct e1000_hw *hw = &adapter->hw; uint32_t rctl, icr = E1000_READ_REG(hw, ICR); #ifndef CONFIG_E1000_NAPI - int i; + int i, j; #endif if (unlikely(!icr)) return IRQ_NONE; /* Not our interrupt */ @@ -3894,10 +3977,20 @@ e1000_intr(int irq, void *data) adapter->total_tx_packets = 0; adapter->total_rx_packets = 0; - for (i = 0; i < E1000_MAX_INTR; i++) - if (unlikely(!adapter->clean_rx(adapter, adapter->rx_ring) & - e1000_clean_tx_irq(adapter, adapter->tx_ring))) + for (i = 0; i < E1000_MAX_INTR; i++) { + rx_cleaned = 0; + for (j = 0; j < adapter->num_rx_queues; j++) + rx_cleaned |= adapter->clean_rx(adapter, + &adapter->rx_ring[j]); + + tx_cleaned = 0; + for (j = 0 ; j < adapter->num_tx_queues ; j++) + tx_cleaned |= e1000_clean_tx_irq(adapter, + &adapter->tx_ring[j]); + + if (!rx_cleaned & tx_cleaned) break; + } if (likely(adapter->itr_setting & 3)) e1000_set_itr(adapter); @@ -3920,7 +4013,8 @@ e1000_clean(struct net_device *poll_dev, int *budget) { struct e1000_adapter *adapter; int work_to_do = min(*budget, poll_dev->quota); - int tx_cleaned = 0, work_done = 0; + int tx_cleaned = 1, work_done = 0; + int i; /* Must NOT use netdev_priv macro here. */ adapter = poll_dev->priv; @@ -3933,14 +4027,29 @@ e1000_clean(struct net_device *poll_dev, int *budget) * tx_ring[0] from being cleaned by multiple cpus * simultaneously. A failure obtaining the lock means * tx_ring[0] is currently being cleaned anyway. */ - if (spin_trylock(&adapter->tx_queue_lock)) { + for (i = 0; i < adapter->num_tx_queues; i++) { + if (spin_trylock(&adapter->tx_ring[i].tx_queue_lock)) { + tx_cleaned &= e1000_clean_tx_irq(adapter, + &adapter->tx_ring[i]); + spin_unlock(&adapter->tx_ring[i].tx_queue_lock); + } + } + if (adapter->num_tx_queues == 1 && + spin_trylock(&adapter->tx_queue_lock)) { tx_cleaned = e1000_clean_tx_irq(adapter, &adapter->tx_ring[0]); spin_unlock(&adapter->tx_queue_lock); } - adapter->clean_rx(adapter, &adapter->rx_ring[0], - &work_done, work_to_do); + for (i = 0; i < adapter->num_rx_queues; i++) { + /* XXX if the number of queues was limited to a power of two + * this would not need a div */ + adapter->clean_rx(adapter, &adapter->rx_ring[i], + &work_done, + work_to_do / adapter->num_rx_queues); + *budget -= work_done; + poll_dev->quota -= work_done; + } *budget -= work_done; poll_dev->quota -= work_done; @@ -3989,6 +4098,8 @@ e1000_clean_tx_irq(struct e1000_adapter *adapter, buffer_info = &tx_ring->buffer_info[i]; cleaned = (i == eop); + tx_ring->tx_stats.bytes += buffer_info->length; + if (cleaned) { struct sk_buff *skb = buffer_info->skb; unsigned int segs, bytecount; @@ -4005,6 +4116,8 @@ e1000_clean_tx_irq(struct e1000_adapter *adapter, if (unlikely(++i == tx_ring->count)) i = 0; } + tx_ring->tx_stats.packets++; + eop = tx_ring->buffer_info[i].next_to_watch; eop_desc = E1000_TX_DESC(*tx_ring, eop); #ifdef CONFIG_E1000_NAPI @@ -4266,6 +4379,8 @@ e1000_clean_rx_irq(struct e1000_adapter *adapter, } #endif /* CONFIG_E1000_NAPI */ netdev->last_rx = jiffies; + rx_ring->rx_stats.packets++; + rx_ring->rx_stats.bytes += length; next_desc: rx_desc->status = 0; @@ -5222,12 +5337,15 @@ static void e1000_netpoll(struct net_device *netdev) { struct e1000_adapter *adapter = netdev_priv(netdev); + int i; disable_irq(adapter->pdev->irq); e1000_intr(adapter->pdev->irq, netdev); - e1000_clean_tx_irq(adapter, adapter->tx_ring); + for (i = 0; i < adapter->num_tx_queues; i++) + e1000_clean_tx_irq(adapter, &adapter->tx_ring[i]); #ifndef CONFIG_E1000_NAPI - adapter->clean_rx(adapter, adapter->rx_ring); + for (i = 0; i < adatper->num_rx_queues; i++) + adapter->clean_rx(adapter, &adapter->rx_ring[i]); #endif enable_irq(adapter->pdev->irq); } - To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html