e1000 driver update
Signed-off-by: Jeff Kirsher <[EMAIL PROTECTED]>
Signed-off-by: John Ronciak <[EMAIL PROTECTED]>
Signed-off-by: Jesse Brandeburg <[EMAIL PROTECTED]>
2. Performance Enhancements
- aggressive prefetch of rx_desc and skb->data just like we do for 10gig
- align the prefetches to a dword to help speed them up
- copybreak for packets < 256 bytes, ideally we would like to modify ethtool to
allow this value to be changed, helps small MTU, many reassemblies case
- Fix RX buffer size changes
- Fixed Jumbo frames and memory allocation
diff -up linux-2.6/drivers/net/e1000/e1000.h
linux-2.6.new/drivers/net/e1000/e1000.h
--- linux-2.6/drivers/net/e1000/e1000.h 2005-11-14 16:20:34.000000000 -0800
+++ linux-2.6.new/drivers/net/e1000/e1000.h 2005-11-04 01:23:40.000000000
-0800
@@ -216,6 +216,12 @@ struct e1000_rx_ring {
struct e1000_ps_page *ps_page;
struct e1000_ps_page_dma *ps_page_dma;
+ struct sk_buff *rx_skb_top;
+ struct sk_buff *rx_skb_prev;
+
+ /* cpu for rx queue */
+ int cpu;
+
uint16_t rdh;
uint16_t rdt;
uint64_t pkt;
@@ -288,7 +288,8 @@ struct e1000_adapter {
struct e1000_rx_ring *rx_ring);
#endif
void (*alloc_rx_buf) (struct e1000_adapter *adapter,
- struct e1000_rx_ring *rx_ring);
+ struct e1000_rx_ring *rx_ring,
+ int cleaned_count);
struct e1000_rx_ring *rx_ring; /* One per active queue */
#ifdef CONFIG_E1000_NAPI
struct net_device *polling_netdev; /* One per active queue */
diff -up linux-2.6/drivers/net/e1000/e1000_main.c
linux-2.6.new/drivers/net/e1000/e1000_main.c
--- linux-2.6/drivers/net/e1000/e1000_main.c 2005-11-14 16:20:34.000000000
-0800
+++ linux-2.6.new/drivers/net/e1000/e1000_main.c 2005-11-04
01:23:40.000000000 -0800
@@ -171,9 +171,11 @@ static boolean_t e1000_clean_rx_irq_ps(s
struct e1000_rx_ring *rx_ring);
#endif
static void e1000_alloc_rx_buffers(struct e1000_adapter *adapter,
- struct e1000_rx_ring *rx_ring);
+ struct e1000_rx_ring *rx_ring,
+ int cleaned_count);
static void e1000_alloc_rx_buffers_ps(struct e1000_adapter *adapter,
- struct e1000_rx_ring *rx_ring);
+ struct e1000_rx_ring *rx_ring,
+ int cleaned_count);
static int e1000_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd);
static int e1000_mii_ioctl(struct net_device *netdev, struct ifreq *ifr,
int cmd);
@@ -344,7 +344,8 @@ e1000_up(struct e1000_adapter *adapter)
e1000_setup_rctl(adapter);
e1000_configure_rx(adapter);
for (i = 0; i < adapter->num_queues; i++)
- adapter->alloc_rx_buf(adapter, &adapter->rx_ring[i]);
+ adapter->alloc_rx_buf(adapter, &adapter->rx_ring[i],
+ adapter->rx_ring[i].count);
#ifdef CONFIG_PCI_MSI
if(adapter->hw.mac_type > e1000_82547_rev_2) {
@@ -1454,6 +1457,8 @@ setup_rx_desc_die:
rxdr->next_to_clean = 0;
rxdr->next_to_use = 0;
+ rxdr->rx_skb_top = NULL;
+ rxdr->rx_skb_prev = NULL;
return 0;
}
@@ -1527,23 +1532,8 @@ e1000_setup_rctl(struct e1000_adapter *a
rctl |= adapter->rx_buffer_len << 0x11;
} else {
rctl &= ~E1000_RCTL_SZ_4096;
- rctl |= E1000_RCTL_BSEX;
- switch (adapter->rx_buffer_len) {
- case E1000_RXBUFFER_2048:
- default:
- rctl |= E1000_RCTL_SZ_2048;
- rctl &= ~E1000_RCTL_BSEX;
- break;
- case E1000_RXBUFFER_4096:
- rctl |= E1000_RCTL_SZ_4096;
- break;
- case E1000_RXBUFFER_8192:
- rctl |= E1000_RCTL_SZ_8192;
- break;
- case E1000_RXBUFFER_16384:
- rctl |= E1000_RCTL_SZ_16384;
- break;
- }
+ rctl &= ~E1000_RCTL_BSEX;
+ rctl |= E1000_RCTL_SZ_2048;
}
#ifdef CONFIG_E1000_PACKET_SPLIT
@@ -1935,6 +1925,16 @@ e1000_clean_rx_ring(struct e1000_adapter
}
}
+ /* there also may be some cached data in our adapter */
+ if(rx_ring->rx_skb_top) {
+ dev_kfree_skb(rx_ring->rx_skb_top);
+
+ /* rx_skb_prev will be wiped out by rx_skb_top */
+ rx_ring->rx_skb_top = NULL;
+ rx_ring->rx_skb_prev = NULL;
+ }
+
+
size = sizeof(struct e1000_buffer) * rx_ring->count;
memset(rx_ring->buffer_info, 0, size);
size = sizeof(struct e1000_ps_page) * rx_ring->count;
@@ -2005,7 +2005,8 @@ e1000_leave_82542_rst(struct e1000_adapt
if(netif_running(netdev)) {
e1000_configure_rx(adapter);
- e1000_alloc_rx_buffers(adapter, &adapter->rx_ring[0]);
+ e1000_alloc_rx_buffers(adapter, &adapter->rx_ring[0],
+ adapter->rx_ring[0].count);
}
}
@@ -2903,29 +2904,30 @@ e1000_change_mtu(struct net_device *netd
"on 82573\n");
return -EINVAL;
}
+ if(unlikely((adapter->hw.mac_type < e1000_82543) &&
+ (max_frame > MAXIMUM_ETHERNET_FRAME_SIZE))) {
+ DPRINTK(PROBE, ERR, "Jumbo Frames not supported on 82542\n");
+ return -EINVAL;
+ }
+
+ /* since the driver code now supports splitting a packet across
+ * multiple descriptors, most of the fifo related limitations on
+ * jumbo frame traffic have gone away.
+ * simply use 2k descriptors for everything.
+ *
+ * NOTE: dev_alloc_skb reserves 16 bytes, and typically NET_IP_ALIGN
+ * means we reserve 2 more, this pushes us to allocate from the next
+ * larger slab size
+ * i.e. RXBUFFER_2048 --> size-4096 slab */
+ /* recent hardware supports 1KB granularity */
if(adapter->hw.mac_type > e1000_82547_rev_2) {
- adapter->rx_buffer_len = max_frame;
+ adapter->rx_buffer_len =
+ ((max_frame < E1000_RXBUFFER_2048) ?
+ max_frame : E1000_RXBUFFER_2048);
E1000_ROUNDUP(adapter->rx_buffer_len, 1024);
- } else {
- if(unlikely((adapter->hw.mac_type < e1000_82543) &&
- (max_frame > MAXIMUM_ETHERNET_FRAME_SIZE))) {
- DPRINTK(PROBE, ERR, "Jumbo Frames not supported "
- "on 82542\n");
- return -EINVAL;
-
- } else {
- if(max_frame <= E1000_RXBUFFER_2048) {
- adapter->rx_buffer_len = E1000_RXBUFFER_2048;
- } else if(max_frame <= E1000_RXBUFFER_4096) {
- adapter->rx_buffer_len = E1000_RXBUFFER_4096;
- } else if(max_frame <= E1000_RXBUFFER_8192) {
- adapter->rx_buffer_len = E1000_RXBUFFER_8192;
- } else if(max_frame <= E1000_RXBUFFER_16384) {
- adapter->rx_buffer_len = E1000_RXBUFFER_16384;
- }
- }
- }
+ } else
+ adapter->rx_buffer_len = E1000_RXBUFFER_2048;
netdev->mtu = new_mtu;
@@ -3049,8 +3188,8 @@ e1000_update_stats(struct e1000_adapter
adapter->net_stats.rx_errors = adapter->stats.rxerrc +
adapter->stats.crcerrs + adapter->stats.algnerrc +
- adapter->stats.rlec + adapter->stats.mpc +
- adapter->stats.cexterr;
+ adapter->stats.rlec + adapter->stats.cexterr;
+ adapter->net_stats.rx_dropped = 0;
adapter->net_stats.rx_length_errors = adapter->stats.rlec;
adapter->net_stats.rx_crc_errors = adapter->stats.crcerrs;
adapter->net_stats.rx_frame_errors = adapter->stats.algnerrc;
@@ -3294,9 +3454,6 @@ e1000_clean_tx_irq(struct e1000_adapter
E1000_STATUS_TXOFF)) {
/* detected Tx unit hang */
- i = tx_ring->next_to_clean;
- eop = tx_ring->buffer_info[i].next_to_watch;
- eop_desc = E1000_TX_DESC(*tx_ring, eop);
DPRINTK(DRV, ERR, "Detected Tx Unit Hang\n"
" TDH <%x>\n"
" TDT <%x>\n"
@@ -3303,7 +3454,6 @@
" next_to_use <%x>\n"
" next_to_clean <%x>\n"
"buffer_info[next_to_clean]\n"
- " dma <%llx>\n"
" time_stamp <%lx>\n"
" next_to_watch <%x>\n"
" jiffies <%lx>\n"
@@ -3311,9 +3477,8 @@ e1000_clean_tx_irq(struct e1000_adapter
readl(adapter->hw.hw_addr + tx_ring->tdh),
readl(adapter->hw.hw_addr + tx_ring->tdt),
tx_ring->next_to_use,
- i,
- (unsigned long long)tx_ring->buffer_info[i].dma,
- tx_ring->buffer_info[i].time_stamp,
+ tx_ring->next_to_clean,
+ tx_ring->buffer_info[eop].time_stamp,
eop,
jiffies,
eop_desc->upper.fields.status);
@@ -3391,46 +3388,98 @@ e1000_clean_rx_irq(struct e1000_adapter
{
struct net_device *netdev = adapter->netdev;
struct pci_dev *pdev = adapter->pdev;
- struct e1000_rx_desc *rx_desc;
- struct e1000_buffer *buffer_info;
- struct sk_buff *skb;
+ struct e1000_rx_desc *rx_desc, *next_rxd;
+ struct e1000_buffer *buffer_info, *next_buffer, *next2_buffer;
unsigned long flags;
uint32_t length;
uint8_t last_byte;
- unsigned int i;
- boolean_t cleaned = FALSE;
+ unsigned int i, j;
+ int cleaned_count = 0;
+ boolean_t cleaned = FALSE, multi_descriptor = FALSE;
i = rx_ring->next_to_clean;
rx_desc = E1000_RX_DESC(*rx_ring, i);
+ buffer_info = &rx_ring->buffer_info[i];
while(rx_desc->status & E1000_RXD_STAT_DD) {
- buffer_info = &rx_ring->buffer_info[i];
+ struct sk_buff *skb, *next_skb;
+ u8 status;
+
#ifdef CONFIG_E1000_NAPI
if(*work_done >= work_to_do)
break;
(*work_done)++;
#endif
- cleaned = TRUE;
+ status = rx_desc->status;
+ skb = buffer_info->skb;
+ buffer_info->skb = NULL;
+
+ prefetch(skb->data - NET_IP_ALIGN);
+ if(++i == rx_ring->count) i = 0;
+ next_rxd = E1000_RX_DESC(*rx_ring, i);
+ prefetch(next_rxd);
+
+ if((j = i + 1) == rx_ring->count) j = 0;
+ next2_buffer = &rx_ring->buffer_info[j];
+ prefetch(next2_buffer);
+
+ next_buffer = &rx_ring->buffer_info[i];
+ next_skb = next_buffer->skb;
+ prefetch(next_skb);
+ prefetch(next_skb->data - NET_IP_ALIGN);
+
+ cleaned = TRUE;
+ cleaned_count++;
pci_unmap_single(pdev,
buffer_info->dma,
buffer_info->length,
PCI_DMA_FROMDEVICE);
- skb = buffer_info->skb;
length = le16_to_cpu(rx_desc->length);
- if(unlikely(!(rx_desc->status & E1000_RXD_STAT_EOP))) {
- /* All receives must fit into a single buffer */
- E1000_DBG("%s: Receive packet consumed multiple"
- " buffers\n", netdev->name);
- dev_kfree_skb_irq(skb);
+ if(!(status & E1000_RXD_STAT_EOP)) {
+ skb_put(skb, length);
+ if(!rx_ring->rx_skb_top) {
+ rx_ring->rx_skb_top = skb;
+ rx_ring->rx_skb_top->len = length;
+ rx_ring->rx_skb_prev = skb;
+ } else {
+ if(skb_shinfo(rx_ring->rx_skb_top)->frag_list) {
+ rx_ring->rx_skb_prev->next = skb;
+ skb->prev = rx_ring->rx_skb_prev;
+ } else {
+
skb_shinfo(rx_ring->rx_skb_top)->frag_list = skb;
+ }
+ rx_ring->rx_skb_prev = skb;
+ rx_ring->rx_skb_top->data_len += length;
+ }
goto next_desc;
+ } else {
+ skb_put(skb, length);
+ if (rx_ring->rx_skb_top) {
+ if(skb_shinfo(rx_ring->rx_skb_top)
+ ->frag_list) {
+ rx_ring->rx_skb_prev->next = skb;
+ skb->prev = rx_ring->rx_skb_prev;
+ } else
+ skb_shinfo(rx_ring->rx_skb_top)
+ ->frag_list = skb;
+
+ rx_ring->rx_skb_top->data_len += length;
+ rx_ring->rx_skb_top->len +=
+ rx_ring->rx_skb_top->data_len;
+
+ skb = rx_ring->rx_skb_top;
+ multi_descriptor = TRUE;
+ rx_ring->rx_skb_top = NULL;
+ rx_ring->rx_skb_prev = NULL;
+ }
}
if(unlikely(rx_desc->errors & E1000_RXD_ERR_FRAME_ERR_MASK)) {
last_byte = *(skb->data + length - 1);
- if(TBI_ACCEPT(&adapter->hw, rx_desc->status,
+ if(TBI_ACCEPT(&adapter->hw, status,
rx_desc->errors, length, last_byte)) {
spin_lock_irqsave(&adapter->stats_lock, flags);
e1000_tbi_adjust_stats(&adapter->hw,
@@ -3445,18 +3494,41 @@ e1000_clean_rx_irq(struct e1000_adapter
}
}
- /* Good Receive */
- skb_put(skb, length - ETHERNET_FCS_SIZE);
+ /* code added for copybreak, this should improve
+ * performance for small packets with large amounts
+ * of reassembly being done in the stack */
+#define E1000_CB_LENGTH 256
+ if((length < E1000_CB_LENGTH) &&
+ !rx_ring->rx_skb_top &&
+ /* or maybe (status & E1000_RXD_STAT_EOP) && */
+ !multi_descriptor) {
+ struct sk_buff *new_skb =
+ dev_alloc_skb(length + NET_IP_ALIGN);
+ if(new_skb) {
+ skb_reserve(new_skb, NET_IP_ALIGN);
+ new_skb->dev = netdev;
+ memcpy(new_skb->data - NET_IP_ALIGN,
+ skb->data - NET_IP_ALIGN,
+ length + NET_IP_ALIGN);
+ /* save the skb in buffer_info as good */
+ buffer_info->skb = skb;
+ skb = new_skb;
+ skb_put(skb, length);
+ }
+ }
+
+ /* end copybreak code */
/* Receive Checksum Offload */
e1000_rx_checksum(adapter,
- (uint32_t)(rx_desc->status) |
+ (uint32_t)(status) |
((uint32_t)(rx_desc->errors) << 24),
rx_desc->csum, skb);
+
skb->protocol = eth_type_trans(skb, netdev);
#ifdef CONFIG_E1000_NAPI
if(unlikely(adapter->vlgrp &&
- (rx_desc->status & E1000_RXD_STAT_VP))) {
+ (status & E1000_RXD_STAT_VP))) {
vlan_hwaccel_receive_skb(skb, adapter->vlgrp,
le16_to_cpu(rx_desc->special) &
E1000_RXD_SPC_VLAN_MASK);
@@ -3465,7 +3537,7 @@ e1000_clean_rx_irq(struct e1000_adapter
}
#else /* CONFIG_E1000_NAPI */
if(unlikely(adapter->vlgrp &&
- (rx_desc->status & E1000_RXD_STAT_VP))) {
+ (status & E1000_RXD_STAT_VP))) {
vlan_hwaccel_rx(skb, adapter->vlgrp,
le16_to_cpu(rx_desc->special) &
E1000_RXD_SPC_VLAN_MASK);
@@ -3478,13 +3646,22 @@ e1000_clean_rx_irq(struct e1000_adapter
next_desc:
rx_desc->status = 0;
- buffer_info->skb = NULL;
- if(unlikely(++i == rx_ring->count)) i = 0;
- rx_desc = E1000_RX_DESC(*rx_ring, i);
+ /* return some buffers to hardware, one at a time is too slow */
+ if (unlikely(cleaned_count >= E1000_RX_BUFFER_WRITE)) {
+ adapter->alloc_rx_buf(adapter, rx_ring, cleaned_count);
+ cleaned_count = 0;
+ }
+
+ /* use prefetched values */
+ rx_desc = next_rxd;
+ buffer_info = next_buffer;
}
rx_ring->next_to_clean = i;
- adapter->alloc_rx_buf(adapter, rx_ring);
+
+ cleaned_count = E1000_DESC_UNUSED(rx_ring);
+ if (cleaned_count)
+ adapter->alloc_rx_buf(adapter, rx_ring, cleaned_count);
return cleaned;
}
@@ -3504,16 +3585,17 @@ e1000_clean_rx_irq_ps(struct e1000_adapt
struct e1000_rx_ring *rx_ring)
#endif
{
- union e1000_rx_desc_packet_split *rx_desc;
+ union e1000_rx_desc_packet_split *rx_desc, *next_rxd;
struct net_device *netdev = adapter->netdev;
struct pci_dev *pdev = adapter->pdev;
- struct e1000_buffer *buffer_info;
+ struct e1000_buffer *buffer_info, *next_buffer, *next2_buffer;
struct e1000_ps_page *ps_page;
struct e1000_ps_page_dma *ps_page_dma;
- struct sk_buff *skb;
+ struct sk_buff *skb, *next_skb;
unsigned int i, j;
uint32_t length, staterr;
boolean_t cleaned = FALSE;
+ int cleaned_count = 0;
i = rx_ring->next_to_clean;
rx_desc = E1000_RX_DESC_PS(*rx_ring, i);
@@ -3528,13 +3610,29 @@ e1000_clean_rx_irq_ps(struct e1000_adapt
break;
(*work_done)++;
#endif
+ skb = buffer_info->skb;
+
+ prefetch(skb->data - NET_IP_ALIGN);
+
+ if(++i == rx_ring->count) i = 0;
+ next_rxd = E1000_RX_DESC_PS(*rx_ring, i);
+ prefetch(next_rxd);
+
+ if((j = i + 1) == rx_ring->count) j = 0;
+ next2_buffer = &rx_ring->buffer_info[j];
+ prefetch(next2_buffer);
+
+ next_buffer = &rx_ring->buffer_info[i];
+ next_skb = next_buffer->skb;
+ prefetch(next_skb);
+ prefetch(next_skb->data - NET_IP_ALIGN);
+
cleaned = TRUE;
+ cleaned_count++;
pci_unmap_single(pdev, buffer_info->dma,
buffer_info->length,
PCI_DMA_FROMDEVICE);
- skb = buffer_info->skb;
-
if(unlikely(!(staterr & E1000_RXD_STAT_EOP))) {
E1000_DBG("%s: Packet Split buffers didn't pick up"
" the full packet\n", netdev->name);
@@ -3610,13 +3780,24 @@ e1000_clean_rx_irq_ps(struct e1000_adapt
next_desc:
rx_desc->wb.middle.status_error &= ~0xFF;
buffer_info->skb = NULL;
- if(unlikely(++i == rx_ring->count)) i = 0;
- rx_desc = E1000_RX_DESC_PS(*rx_ring, i);
+ /* return some buffers to hardware, one at a time is too slow */
+ if (unlikely(cleaned_count >= E1000_RX_BUFFER_WRITE)) {
+ adapter->alloc_rx_buf(adapter, rx_ring, cleaned_count);
+ cleaned_count = 0;
+ }
+
+ /* use prefetched values */
+ rx_desc = next_rxd;
+ buffer_info = next_buffer;
+
staterr = le32_to_cpu(rx_desc->wb.middle.status_error);
}
rx_ring->next_to_clean = i;
- adapter->alloc_rx_buf(adapter, rx_ring);
+
+ cleaned_count = E1000_DESC_UNUSED(rx_ring);
+ if (cleaned_count)
+ adapter->alloc_rx_buf(adapter, rx_ring, cleaned_count);
return cleaned;
}
@@ -3628,7 +3737,8 @@ next_desc:
static void
e1000_alloc_rx_buffers(struct e1000_adapter *adapter,
- struct e1000_rx_ring *rx_ring)
+ struct e1000_rx_ring *rx_ring,
+ int cleaned_count)
{
struct net_device *netdev = adapter->netdev;
struct pci_dev *pdev = adapter->pdev;
@@ -3641,8 +3819,14 @@ e1000_alloc_rx_buffers(struct e1000_adap
i = rx_ring->next_to_use;
buffer_info = &rx_ring->buffer_info[i];
- while(!buffer_info->skb) {
- skb = dev_alloc_skb(bufsz);
+ while(cleaned_count--) {
+ if(!(skb = buffer_info->skb))
+ skb = dev_alloc_skb(bufsz);
+ else {
+ skb->tail = skb->head;
+ skb->len = 0;
+ goto map_skb;
+ }
if(unlikely(!skb)) {
/* Better luck next round */
@@ -3682,6 +3861,7 @@ e1000_alloc_rx_buffers(struct e1000_adap
buffer_info->skb = skb;
buffer_info->length = adapter->rx_buffer_len;
+map_skb:
buffer_info->dma = pci_map_single(pdev,
skb->data,
adapter->rx_buffer_len,
@@ -3707,20 +3824,21 @@ e1000_alloc_rx_buffers(struct e1000_adap
rx_desc = E1000_RX_DESC(*rx_ring, i);
rx_desc->buffer_addr = cpu_to_le64(buffer_info->dma);
- if(unlikely((i & ~(E1000_RX_BUFFER_WRITE - 1)) == i)) {
- /* Force memory writes to complete before letting h/w
- * know there are new descriptors to fetch. (Only
- * applicable for weak-ordered memory model archs,
- * such as IA-64). */
- wmb();
- writel(i, adapter->hw.hw_addr + rx_ring->rdt);
- }
-
if(unlikely(++i == rx_ring->count)) i = 0;
buffer_info = &rx_ring->buffer_info[i];
}
- rx_ring->next_to_use = i;
+ if (rx_ring->next_to_use != i) {
+ rx_ring->next_to_use = i;
+ if(unlikely(i-- == 0)) i = (rx_ring->count - 1);
+
+ /* Force memory writes to complete before letting h/w
+ * know there are new descriptors to fetch. (Only
+ * applicable for weak-ordered memory model archs,
+ * such as IA-64). */
+ wmb();
+ writel(i, adapter->hw.hw_addr + rx_ring->rdt);
+ }
}
/**
@@ -3730,7 +3848,8 @@ e1000_alloc_rx_buffers(struct e1000_adap
static void
e1000_alloc_rx_buffers_ps(struct e1000_adapter *adapter,
- struct e1000_rx_ring *rx_ring)
+ struct e1000_rx_ring *rx_ring,
+ int cleaned_count)
{
struct net_device *netdev = adapter->netdev;
struct pci_dev *pdev = adapter->pdev;
@@ -3746,7 +3925,7 @@ e1000_alloc_rx_buffers_ps(struct e1000_a
ps_page = &rx_ring->ps_page[i];
ps_page_dma = &rx_ring->ps_page_dma[i];
- while(!buffer_info->skb) {
+ while (cleaned_count--) {
rx_desc = E1000_RX_DESC_PS(*rx_ring, i);
for(j = 0; j < PS_PAGE_BUFFERS; j++) {
@@ -3793,19 +3976,6 @@ e1000_alloc_rx_buffers_ps(struct e1000_a
rx_desc->read.buffer_addr[0] = cpu_to_le64(buffer_info->dma);
- if(unlikely((i & ~(E1000_RX_BUFFER_WRITE - 1)) == i)) {
- /* Force memory writes to complete before letting h/w
- * know there are new descriptors to fetch. (Only
- * applicable for weak-ordered memory model archs,
- * such as IA-64). */
- wmb();
- /* Hardware increments by 16 bytes, but packet split
- * descriptors are 32 bytes...so we increment tail
- * twice as much.
- */
- writel(i<<1, adapter->hw.hw_addr + rx_ring->rdt);
- }
-
if(unlikely(++i == rx_ring->count)) i = 0;
buffer_info = &rx_ring->buffer_info[i];
ps_page = &rx_ring->ps_page[i];
@@ -3813,7 +3976,21 @@
}
no_buffers:
- rx_ring->next_to_use = i;
+ if (rx_ring->next_to_use != i) {
+ rx_ring->next_to_use = i;
+ if(unlikely(i-- == 0)) i = (rx_ring->count - 1);
+
+ /* Force memory writes to complete before letting h/w
+ * know there are new descriptors to fetch. (Only
+ * applicable for weak-ordered memory model archs,
+ * such as IA-64). */
+ wmb();
+ /* Hardware increments by 16 bytes, but packet split
+ * descriptors are 32 bytes...so we increment tail
+ * twice as much.
+ */
+ writel(i<<1, adapter->hw.hw_addr + rx_ring->rdt);
+ }
}
/**
Cheers,
Jeff
diff -up linux-2.6/drivers/net/e1000/e1000.h
linux-2.6.new/drivers/net/e1000/e1000.h
--- linux-2.6/drivers/net/e1000/e1000.h 2005-11-14 16:20:34.000000000 -0800
+++ linux-2.6.new/drivers/net/e1000/e1000.h 2005-11-04 01:23:40.000000000
-0800
@@ -216,6 +216,12 @@ struct e1000_rx_ring {
struct e1000_ps_page *ps_page;
struct e1000_ps_page_dma *ps_page_dma;
+ struct sk_buff *rx_skb_top;
+ struct sk_buff *rx_skb_prev;
+
+ /* cpu for rx queue */
+ int cpu;
+
uint16_t rdh;
uint16_t rdt;
uint64_t pkt;
@@ -288,7 +288,8 @@ struct e1000_adapter {
struct e1000_rx_ring *rx_ring);
#endif
void (*alloc_rx_buf) (struct e1000_adapter *adapter,
- struct e1000_rx_ring *rx_ring);
+ struct e1000_rx_ring *rx_ring,
+ int cleaned_count);
struct e1000_rx_ring *rx_ring; /* One per active queue */
#ifdef CONFIG_E1000_NAPI
struct net_device *polling_netdev; /* One per active queue */
diff -up linux-2.6/drivers/net/e1000/e1000_main.c
linux-2.6.new/drivers/net/e1000/e1000_main.c
--- linux-2.6/drivers/net/e1000/e1000_main.c 2005-11-14 16:20:34.000000000
-0800
+++ linux-2.6.new/drivers/net/e1000/e1000_main.c 2005-11-04
01:23:40.000000000 -0800
@@ -171,9 +171,11 @@ static boolean_t e1000_clean_rx_irq_ps(s
struct e1000_rx_ring *rx_ring);
#endif
static void e1000_alloc_rx_buffers(struct e1000_adapter *adapter,
- struct e1000_rx_ring *rx_ring);
+ struct e1000_rx_ring *rx_ring,
+ int cleaned_count);
static void e1000_alloc_rx_buffers_ps(struct e1000_adapter *adapter,
- struct e1000_rx_ring *rx_ring);
+ struct e1000_rx_ring *rx_ring,
+ int cleaned_count);
static int e1000_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd);
static int e1000_mii_ioctl(struct net_device *netdev, struct ifreq *ifr,
int cmd);
@@ -344,7 +344,8 @@ e1000_up(struct e1000_adapter *adapter)
e1000_setup_rctl(adapter);
e1000_configure_rx(adapter);
for (i = 0; i < adapter->num_queues; i++)
- adapter->alloc_rx_buf(adapter, &adapter->rx_ring[i]);
+ adapter->alloc_rx_buf(adapter, &adapter->rx_ring[i],
+ adapter->rx_ring[i].count);
#ifdef CONFIG_PCI_MSI
if(adapter->hw.mac_type > e1000_82547_rev_2) {
@@ -1454,6 +1457,8 @@ setup_rx_desc_die:
rxdr->next_to_clean = 0;
rxdr->next_to_use = 0;
+ rxdr->rx_skb_top = NULL;
+ rxdr->rx_skb_prev = NULL;
return 0;
}
@@ -1527,23 +1532,8 @@ e1000_setup_rctl(struct e1000_adapter *a
rctl |= adapter->rx_buffer_len << 0x11;
} else {
rctl &= ~E1000_RCTL_SZ_4096;
- rctl |= E1000_RCTL_BSEX;
- switch (adapter->rx_buffer_len) {
- case E1000_RXBUFFER_2048:
- default:
- rctl |= E1000_RCTL_SZ_2048;
- rctl &= ~E1000_RCTL_BSEX;
- break;
- case E1000_RXBUFFER_4096:
- rctl |= E1000_RCTL_SZ_4096;
- break;
- case E1000_RXBUFFER_8192:
- rctl |= E1000_RCTL_SZ_8192;
- break;
- case E1000_RXBUFFER_16384:
- rctl |= E1000_RCTL_SZ_16384;
- break;
- }
+ rctl &= ~E1000_RCTL_BSEX;
+ rctl |= E1000_RCTL_SZ_2048;
}
#ifdef CONFIG_E1000_PACKET_SPLIT
@@ -1935,6 +1925,16 @@ e1000_clean_rx_ring(struct e1000_adapter
}
}
+ /* there also may be some cached data in our adapter */
+ if(rx_ring->rx_skb_top) {
+ dev_kfree_skb(rx_ring->rx_skb_top);
+
+ /* rx_skb_prev will be wiped out by rx_skb_top */
+ rx_ring->rx_skb_top = NULL;
+ rx_ring->rx_skb_prev = NULL;
+ }
+
+
size = sizeof(struct e1000_buffer) * rx_ring->count;
memset(rx_ring->buffer_info, 0, size);
size = sizeof(struct e1000_ps_page) * rx_ring->count;
@@ -2005,7 +2005,8 @@ e1000_leave_82542_rst(struct e1000_adapt
if(netif_running(netdev)) {
e1000_configure_rx(adapter);
- e1000_alloc_rx_buffers(adapter, &adapter->rx_ring[0]);
+ e1000_alloc_rx_buffers(adapter, &adapter->rx_ring[0],
+ adapter->rx_ring[0].count);
}
}
@@ -2903,29 +2904,30 @@ e1000_change_mtu(struct net_device *netd
"on 82573\n");
return -EINVAL;
}
+ if(unlikely((adapter->hw.mac_type < e1000_82543) &&
+ (max_frame > MAXIMUM_ETHERNET_FRAME_SIZE))) {
+ DPRINTK(PROBE, ERR, "Jumbo Frames not supported on 82542\n");
+ return -EINVAL;
+ }
+
+ /* since the driver code now supports splitting a packet across
+ * multiple descriptors, most of the fifo related limitations on
+ * jumbo frame traffic have gone away.
+ * simply use 2k descriptors for everything.
+ *
+ * NOTE: dev_alloc_skb reserves 16 bytes, and typically NET_IP_ALIGN
+ * means we reserve 2 more, this pushes us to allocate from the next
+ * larger slab size
+ * i.e. RXBUFFER_2048 --> size-4096 slab */
+ /* recent hardware supports 1KB granularity */
if(adapter->hw.mac_type > e1000_82547_rev_2) {
- adapter->rx_buffer_len = max_frame;
+ adapter->rx_buffer_len =
+ ((max_frame < E1000_RXBUFFER_2048) ?
+ max_frame : E1000_RXBUFFER_2048);
E1000_ROUNDUP(adapter->rx_buffer_len, 1024);
- } else {
- if(unlikely((adapter->hw.mac_type < e1000_82543) &&
- (max_frame > MAXIMUM_ETHERNET_FRAME_SIZE))) {
- DPRINTK(PROBE, ERR, "Jumbo Frames not supported "
- "on 82542\n");
- return -EINVAL;
-
- } else {
- if(max_frame <= E1000_RXBUFFER_2048) {
- adapter->rx_buffer_len = E1000_RXBUFFER_2048;
- } else if(max_frame <= E1000_RXBUFFER_4096) {
- adapter->rx_buffer_len = E1000_RXBUFFER_4096;
- } else if(max_frame <= E1000_RXBUFFER_8192) {
- adapter->rx_buffer_len = E1000_RXBUFFER_8192;
- } else if(max_frame <= E1000_RXBUFFER_16384) {
- adapter->rx_buffer_len = E1000_RXBUFFER_16384;
- }
- }
- }
+ } else
+ adapter->rx_buffer_len = E1000_RXBUFFER_2048;
netdev->mtu = new_mtu;
@@ -3049,8 +3188,8 @@ e1000_update_stats(struct e1000_adapter
adapter->net_stats.rx_errors = adapter->stats.rxerrc +
adapter->stats.crcerrs + adapter->stats.algnerrc +
- adapter->stats.rlec + adapter->stats.mpc +
- adapter->stats.cexterr;
+ adapter->stats.rlec + adapter->stats.cexterr;
+ adapter->net_stats.rx_dropped = 0;
adapter->net_stats.rx_length_errors = adapter->stats.rlec;
adapter->net_stats.rx_crc_errors = adapter->stats.crcerrs;
adapter->net_stats.rx_frame_errors = adapter->stats.algnerrc;
@@ -3294,9 +3454,6 @@ e1000_clean_tx_irq(struct e1000_adapter
E1000_STATUS_TXOFF)) {
/* detected Tx unit hang */
- i = tx_ring->next_to_clean;
- eop = tx_ring->buffer_info[i].next_to_watch;
- eop_desc = E1000_TX_DESC(*tx_ring, eop);
DPRINTK(DRV, ERR, "Detected Tx Unit Hang\n"
" TDH <%x>\n"
" TDT <%x>\n"
@@ -3303,7 +3454,6 @@
" next_to_use <%x>\n"
" next_to_clean <%x>\n"
"buffer_info[next_to_clean]\n"
- " dma <%llx>\n"
" time_stamp <%lx>\n"
" next_to_watch <%x>\n"
" jiffies <%lx>\n"
@@ -3311,9 +3477,8 @@ e1000_clean_tx_irq(struct e1000_adapter
readl(adapter->hw.hw_addr + tx_ring->tdh),
readl(adapter->hw.hw_addr + tx_ring->tdt),
tx_ring->next_to_use,
- i,
- (unsigned long long)tx_ring->buffer_info[i].dma,
- tx_ring->buffer_info[i].time_stamp,
+ tx_ring->next_to_clean,
+ tx_ring->buffer_info[eop].time_stamp,
eop,
jiffies,
eop_desc->upper.fields.status);
@@ -3391,46 +3388,98 @@ e1000_clean_rx_irq(struct e1000_adapter
{
struct net_device *netdev = adapter->netdev;
struct pci_dev *pdev = adapter->pdev;
- struct e1000_rx_desc *rx_desc;
- struct e1000_buffer *buffer_info;
- struct sk_buff *skb;
+ struct e1000_rx_desc *rx_desc, *next_rxd;
+ struct e1000_buffer *buffer_info, *next_buffer, *next2_buffer;
unsigned long flags;
uint32_t length;
uint8_t last_byte;
- unsigned int i;
- boolean_t cleaned = FALSE;
+ unsigned int i, j;
+ int cleaned_count = 0;
+ boolean_t cleaned = FALSE, multi_descriptor = FALSE;
i = rx_ring->next_to_clean;
rx_desc = E1000_RX_DESC(*rx_ring, i);
+ buffer_info = &rx_ring->buffer_info[i];
while(rx_desc->status & E1000_RXD_STAT_DD) {
- buffer_info = &rx_ring->buffer_info[i];
+ struct sk_buff *skb, *next_skb;
+ u8 status;
+
#ifdef CONFIG_E1000_NAPI
if(*work_done >= work_to_do)
break;
(*work_done)++;
#endif
- cleaned = TRUE;
+ status = rx_desc->status;
+ skb = buffer_info->skb;
+ buffer_info->skb = NULL;
+
+ prefetch(skb->data - NET_IP_ALIGN);
+ if(++i == rx_ring->count) i = 0;
+ next_rxd = E1000_RX_DESC(*rx_ring, i);
+ prefetch(next_rxd);
+
+ if((j = i + 1) == rx_ring->count) j = 0;
+ next2_buffer = &rx_ring->buffer_info[j];
+ prefetch(next2_buffer);
+
+ next_buffer = &rx_ring->buffer_info[i];
+ next_skb = next_buffer->skb;
+ prefetch(next_skb);
+ prefetch(next_skb->data - NET_IP_ALIGN);
+
+ cleaned = TRUE;
+ cleaned_count++;
pci_unmap_single(pdev,
buffer_info->dma,
buffer_info->length,
PCI_DMA_FROMDEVICE);
- skb = buffer_info->skb;
length = le16_to_cpu(rx_desc->length);
- if(unlikely(!(rx_desc->status & E1000_RXD_STAT_EOP))) {
- /* All receives must fit into a single buffer */
- E1000_DBG("%s: Receive packet consumed multiple"
- " buffers\n", netdev->name);
- dev_kfree_skb_irq(skb);
+ if(!(status & E1000_RXD_STAT_EOP)) {
+ skb_put(skb, length);
+ if(!rx_ring->rx_skb_top) {
+ rx_ring->rx_skb_top = skb;
+ rx_ring->rx_skb_top->len = length;
+ rx_ring->rx_skb_prev = skb;
+ } else {
+ if(skb_shinfo(rx_ring->rx_skb_top)->frag_list) {
+ rx_ring->rx_skb_prev->next = skb;
+ skb->prev = rx_ring->rx_skb_prev;
+ } else {
+
skb_shinfo(rx_ring->rx_skb_top)->frag_list = skb;
+ }
+ rx_ring->rx_skb_prev = skb;
+ rx_ring->rx_skb_top->data_len += length;
+ }
goto next_desc;
+ } else {
+ skb_put(skb, length);
+ if (rx_ring->rx_skb_top) {
+ if(skb_shinfo(rx_ring->rx_skb_top)
+ ->frag_list) {
+ rx_ring->rx_skb_prev->next = skb;
+ skb->prev = rx_ring->rx_skb_prev;
+ } else
+ skb_shinfo(rx_ring->rx_skb_top)
+ ->frag_list = skb;
+
+ rx_ring->rx_skb_top->data_len += length;
+ rx_ring->rx_skb_top->len +=
+ rx_ring->rx_skb_top->data_len;
+
+ skb = rx_ring->rx_skb_top;
+ multi_descriptor = TRUE;
+ rx_ring->rx_skb_top = NULL;
+ rx_ring->rx_skb_prev = NULL;
+ }
}
if(unlikely(rx_desc->errors & E1000_RXD_ERR_FRAME_ERR_MASK)) {
last_byte = *(skb->data + length - 1);
- if(TBI_ACCEPT(&adapter->hw, rx_desc->status,
+ if(TBI_ACCEPT(&adapter->hw, status,
rx_desc->errors, length, last_byte)) {
spin_lock_irqsave(&adapter->stats_lock, flags);
e1000_tbi_adjust_stats(&adapter->hw,
@@ -3445,18 +3494,41 @@ e1000_clean_rx_irq(struct e1000_adapter
}
}
- /* Good Receive */
- skb_put(skb, length - ETHERNET_FCS_SIZE);
+ /* code added for copybreak, this should improve
+ * performance for small packets with large amounts
+ * of reassembly being done in the stack */
+#define E1000_CB_LENGTH 256
+ if((length < E1000_CB_LENGTH) &&
+ !rx_ring->rx_skb_top &&
+ /* or maybe (status & E1000_RXD_STAT_EOP) && */
+ !multi_descriptor) {
+ struct sk_buff *new_skb =
+ dev_alloc_skb(length + NET_IP_ALIGN);
+ if(new_skb) {
+ skb_reserve(new_skb, NET_IP_ALIGN);
+ new_skb->dev = netdev;
+ memcpy(new_skb->data - NET_IP_ALIGN,
+ skb->data - NET_IP_ALIGN,
+ length + NET_IP_ALIGN);
+ /* save the skb in buffer_info as good */
+ buffer_info->skb = skb;
+ skb = new_skb;
+ skb_put(skb, length);
+ }
+ }
+
+ /* end copybreak code */
/* Receive Checksum Offload */
e1000_rx_checksum(adapter,
- (uint32_t)(rx_desc->status) |
+ (uint32_t)(status) |
((uint32_t)(rx_desc->errors) << 24),
rx_desc->csum, skb);
+
skb->protocol = eth_type_trans(skb, netdev);
#ifdef CONFIG_E1000_NAPI
if(unlikely(adapter->vlgrp &&
- (rx_desc->status & E1000_RXD_STAT_VP))) {
+ (status & E1000_RXD_STAT_VP))) {
vlan_hwaccel_receive_skb(skb, adapter->vlgrp,
le16_to_cpu(rx_desc->special) &
E1000_RXD_SPC_VLAN_MASK);
@@ -3465,7 +3537,7 @@ e1000_clean_rx_irq(struct e1000_adapter
}
#else /* CONFIG_E1000_NAPI */
if(unlikely(adapter->vlgrp &&
- (rx_desc->status & E1000_RXD_STAT_VP))) {
+ (status & E1000_RXD_STAT_VP))) {
vlan_hwaccel_rx(skb, adapter->vlgrp,
le16_to_cpu(rx_desc->special) &
E1000_RXD_SPC_VLAN_MASK);
@@ -3478,13 +3646,22 @@ e1000_clean_rx_irq(struct e1000_adapter
next_desc:
rx_desc->status = 0;
- buffer_info->skb = NULL;
- if(unlikely(++i == rx_ring->count)) i = 0;
- rx_desc = E1000_RX_DESC(*rx_ring, i);
+ /* return some buffers to hardware, one at a time is too slow */
+ if (unlikely(cleaned_count >= E1000_RX_BUFFER_WRITE)) {
+ adapter->alloc_rx_buf(adapter, rx_ring, cleaned_count);
+ cleaned_count = 0;
+ }
+
+ /* use prefetched values */
+ rx_desc = next_rxd;
+ buffer_info = next_buffer;
}
rx_ring->next_to_clean = i;
- adapter->alloc_rx_buf(adapter, rx_ring);
+
+ cleaned_count = E1000_DESC_UNUSED(rx_ring);
+ if (cleaned_count)
+ adapter->alloc_rx_buf(adapter, rx_ring, cleaned_count);
return cleaned;
}
@@ -3504,16 +3585,17 @@ e1000_clean_rx_irq_ps(struct e1000_adapt
struct e1000_rx_ring *rx_ring)
#endif
{
- union e1000_rx_desc_packet_split *rx_desc;
+ union e1000_rx_desc_packet_split *rx_desc, *next_rxd;
struct net_device *netdev = adapter->netdev;
struct pci_dev *pdev = adapter->pdev;
- struct e1000_buffer *buffer_info;
+ struct e1000_buffer *buffer_info, *next_buffer, *next2_buffer;
struct e1000_ps_page *ps_page;
struct e1000_ps_page_dma *ps_page_dma;
- struct sk_buff *skb;
+ struct sk_buff *skb, *next_skb;
unsigned int i, j;
uint32_t length, staterr;
boolean_t cleaned = FALSE;
+ int cleaned_count = 0;
i = rx_ring->next_to_clean;
rx_desc = E1000_RX_DESC_PS(*rx_ring, i);
@@ -3528,13 +3610,29 @@ e1000_clean_rx_irq_ps(struct e1000_adapt
break;
(*work_done)++;
#endif
+ skb = buffer_info->skb;
+
+ prefetch(skb->data - NET_IP_ALIGN);
+
+ if(++i == rx_ring->count) i = 0;
+ next_rxd = E1000_RX_DESC_PS(*rx_ring, i);
+ prefetch(next_rxd);
+
+ if((j = i + 1) == rx_ring->count) j = 0;
+ next2_buffer = &rx_ring->buffer_info[j];
+ prefetch(next2_buffer);
+
+ next_buffer = &rx_ring->buffer_info[i];
+ next_skb = next_buffer->skb;
+ prefetch(next_skb);
+ prefetch(next_skb->data - NET_IP_ALIGN);
+
cleaned = TRUE;
+ cleaned_count++;
pci_unmap_single(pdev, buffer_info->dma,
buffer_info->length,
PCI_DMA_FROMDEVICE);
- skb = buffer_info->skb;
-
if(unlikely(!(staterr & E1000_RXD_STAT_EOP))) {
E1000_DBG("%s: Packet Split buffers didn't pick up"
" the full packet\n", netdev->name);
@@ -3610,13 +3780,24 @@ e1000_clean_rx_irq_ps(struct e1000_adapt
next_desc:
rx_desc->wb.middle.status_error &= ~0xFF;
buffer_info->skb = NULL;
- if(unlikely(++i == rx_ring->count)) i = 0;
- rx_desc = E1000_RX_DESC_PS(*rx_ring, i);
+ /* return some buffers to hardware, one at a time is too slow */
+ if (unlikely(cleaned_count >= E1000_RX_BUFFER_WRITE)) {
+ adapter->alloc_rx_buf(adapter, rx_ring, cleaned_count);
+ cleaned_count = 0;
+ }
+
+ /* use prefetched values */
+ rx_desc = next_rxd;
+ buffer_info = next_buffer;
+
staterr = le32_to_cpu(rx_desc->wb.middle.status_error);
}
rx_ring->next_to_clean = i;
- adapter->alloc_rx_buf(adapter, rx_ring);
+
+ cleaned_count = E1000_DESC_UNUSED(rx_ring);
+ if (cleaned_count)
+ adapter->alloc_rx_buf(adapter, rx_ring, cleaned_count);
return cleaned;
}
@@ -3628,7 +3737,8 @@ next_desc:
static void
e1000_alloc_rx_buffers(struct e1000_adapter *adapter,
- struct e1000_rx_ring *rx_ring)
+ struct e1000_rx_ring *rx_ring,
+ int cleaned_count)
{
struct net_device *netdev = adapter->netdev;
struct pci_dev *pdev = adapter->pdev;
@@ -3641,8 +3819,14 @@ e1000_alloc_rx_buffers(struct e1000_adap
i = rx_ring->next_to_use;
buffer_info = &rx_ring->buffer_info[i];
- while(!buffer_info->skb) {
- skb = dev_alloc_skb(bufsz);
+ while(cleaned_count--) {
+ if(!(skb = buffer_info->skb))
+ skb = dev_alloc_skb(bufsz);
+ else {
+ skb->tail = skb->head;
+ skb->len = 0;
+ goto map_skb;
+ }
if(unlikely(!skb)) {
/* Better luck next round */
@@ -3682,6 +3861,7 @@ e1000_alloc_rx_buffers(struct e1000_adap
buffer_info->skb = skb;
buffer_info->length = adapter->rx_buffer_len;
+map_skb:
buffer_info->dma = pci_map_single(pdev,
skb->data,
adapter->rx_buffer_len,
@@ -3707,20 +3824,21 @@ e1000_alloc_rx_buffers(struct e1000_adap
rx_desc = E1000_RX_DESC(*rx_ring, i);
rx_desc->buffer_addr = cpu_to_le64(buffer_info->dma);
- if(unlikely((i & ~(E1000_RX_BUFFER_WRITE - 1)) == i)) {
- /* Force memory writes to complete before letting h/w
- * know there are new descriptors to fetch. (Only
- * applicable for weak-ordered memory model archs,
- * such as IA-64). */
- wmb();
- writel(i, adapter->hw.hw_addr + rx_ring->rdt);
- }
-
if(unlikely(++i == rx_ring->count)) i = 0;
buffer_info = &rx_ring->buffer_info[i];
}
- rx_ring->next_to_use = i;
+ if (rx_ring->next_to_use != i) {
+ rx_ring->next_to_use = i;
+ if(unlikely(i-- == 0)) i = (rx_ring->count - 1);
+
+ /* Force memory writes to complete before letting h/w
+ * know there are new descriptors to fetch. (Only
+ * applicable for weak-ordered memory model archs,
+ * such as IA-64). */
+ wmb();
+ writel(i, adapter->hw.hw_addr + rx_ring->rdt);
+ }
}
/**
@@ -3730,7 +3848,8 @@ e1000_alloc_rx_buffers(struct e1000_adap
static void
e1000_alloc_rx_buffers_ps(struct e1000_adapter *adapter,
- struct e1000_rx_ring *rx_ring)
+ struct e1000_rx_ring *rx_ring,
+ int cleaned_count)
{
struct net_device *netdev = adapter->netdev;
struct pci_dev *pdev = adapter->pdev;
@@ -3746,7 +3925,7 @@ e1000_alloc_rx_buffers_ps(struct e1000_a
ps_page = &rx_ring->ps_page[i];
ps_page_dma = &rx_ring->ps_page_dma[i];
- while(!buffer_info->skb) {
+ while (cleaned_count--) {
rx_desc = E1000_RX_DESC_PS(*rx_ring, i);
for(j = 0; j < PS_PAGE_BUFFERS; j++) {
@@ -3793,19 +3976,6 @@ e1000_alloc_rx_buffers_ps(struct e1000_a
rx_desc->read.buffer_addr[0] = cpu_to_le64(buffer_info->dma);
- if(unlikely((i & ~(E1000_RX_BUFFER_WRITE - 1)) == i)) {
- /* Force memory writes to complete before letting h/w
- * know there are new descriptors to fetch. (Only
- * applicable for weak-ordered memory model archs,
- * such as IA-64). */
- wmb();
- /* Hardware increments by 16 bytes, but packet split
- * descriptors are 32 bytes...so we increment tail
- * twice as much.
- */
- writel(i<<1, adapter->hw.hw_addr + rx_ring->rdt);
- }
-
if(unlikely(++i == rx_ring->count)) i = 0;
buffer_info = &rx_ring->buffer_info[i];
ps_page = &rx_ring->ps_page[i];
@@ -3813,7 +3976,21 @@
}
no_buffers:
- rx_ring->next_to_use = i;
+ if (rx_ring->next_to_use != i) {
+ rx_ring->next_to_use = i;
+ if(unlikely(i-- == 0)) i = (rx_ring->count - 1);
+
+ /* Force memory writes to complete before letting h/w
+ * know there are new descriptors to fetch. (Only
+ * applicable for weak-ordered memory model archs,
+ * such as IA-64). */
+ wmb();
+ /* Hardware increments by 16 bytes, but packet split
+ * descriptors are 32 bytes...so we increment tail
+ * twice as much.
+ */
+ writel(i<<1, adapter->hw.hw_addr + rx_ring->rdt);
+ }
}
/**