e1000 driver update

Signed-off-by: Jeff Kirsher <[EMAIL PROTECTED]>
Signed-off-by: John Ronciak <[EMAIL PROTECTED]>
Signed-off-by: Jesse Brandeburg <[EMAIL PROTECTED]>

2. Performance Enhancements
- aggressive prefetch of rx_desc and skb->data just like we do for 10gig
- align the prefetches to a dword to help speed them up
- copybreak for packets < 256 bytes, ideally we would like to modify ethtool to 
allow this value to be changed, helps small MTU, many reassemblies case
- Fix RX buffer size changes
- Fixed Jumbo frames and memory allocation

diff -up linux-2.6/drivers/net/e1000/e1000.h 
linux-2.6.new/drivers/net/e1000/e1000.h
--- linux-2.6/drivers/net/e1000/e1000.h 2005-11-14 16:20:34.000000000 -0800
+++ linux-2.6.new/drivers/net/e1000/e1000.h     2005-11-04 01:23:40.000000000 
-0800
@@ -216,6 +216,12 @@ struct e1000_rx_ring {
        struct e1000_ps_page *ps_page;
        struct e1000_ps_page_dma *ps_page_dma;
 
+       struct sk_buff *rx_skb_top;
+       struct sk_buff *rx_skb_prev;
+
+       /* cpu for rx queue */
+       int cpu;
+
        uint16_t rdh;
        uint16_t rdt;
        uint64_t pkt;
@@ -288,7 +288,8 @@ struct e1000_adapter {
                               struct e1000_rx_ring *rx_ring);
 #endif
        void (*alloc_rx_buf) (struct e1000_adapter *adapter,
-                             struct e1000_rx_ring *rx_ring);
+                             struct e1000_rx_ring *rx_ring,
+                               int cleaned_count);
        struct e1000_rx_ring *rx_ring;      /* One per active queue */
 #ifdef CONFIG_E1000_NAPI
        struct net_device *polling_netdev;  /* One per active queue */
diff -up linux-2.6/drivers/net/e1000/e1000_main.c 
linux-2.6.new/drivers/net/e1000/e1000_main.c
--- linux-2.6/drivers/net/e1000/e1000_main.c    2005-11-14 16:20:34.000000000 
-0800
+++ linux-2.6.new/drivers/net/e1000/e1000_main.c        2005-11-04 
01:23:40.000000000 -0800
@@ -171,9 +171,11 @@ static boolean_t e1000_clean_rx_irq_ps(s
                                        struct e1000_rx_ring *rx_ring);
 #endif
 static void e1000_alloc_rx_buffers(struct e1000_adapter *adapter,
-                                   struct e1000_rx_ring *rx_ring);
+                                   struct e1000_rx_ring *rx_ring,
+                                  int cleaned_count);
 static void e1000_alloc_rx_buffers_ps(struct e1000_adapter *adapter,
-                                      struct e1000_rx_ring *rx_ring);
+                                      struct e1000_rx_ring *rx_ring,
+                                     int cleaned_count);
 static int e1000_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd);
 static int e1000_mii_ioctl(struct net_device *netdev, struct ifreq *ifr,
                           int cmd);
@@ -344,7 +344,8 @@ e1000_up(struct e1000_adapter *adapter)
        e1000_setup_rctl(adapter);
        e1000_configure_rx(adapter);
        for (i = 0; i < adapter->num_queues; i++)
-               adapter->alloc_rx_buf(adapter, &adapter->rx_ring[i]);
+               adapter->alloc_rx_buf(adapter, &adapter->rx_ring[i],
+                                       adapter->rx_ring[i].count);
 
 #ifdef CONFIG_PCI_MSI
        if(adapter->hw.mac_type > e1000_82547_rev_2) {
@@ -1454,6 +1457,8 @@ setup_rx_desc_die:
 
        rxdr->next_to_clean = 0;
        rxdr->next_to_use = 0;
+       rxdr->rx_skb_top = NULL;
+       rxdr->rx_skb_prev = NULL;
 
        return 0;
 }
@@ -1527,23 +1532,8 @@ e1000_setup_rctl(struct e1000_adapter *a
                rctl |= adapter->rx_buffer_len << 0x11;
        } else {
                rctl &= ~E1000_RCTL_SZ_4096;
-               rctl |= E1000_RCTL_BSEX; 
-               switch (adapter->rx_buffer_len) {
-               case E1000_RXBUFFER_2048:
-               default:
-                       rctl |= E1000_RCTL_SZ_2048;
-                       rctl &= ~E1000_RCTL_BSEX;
-                       break;
-               case E1000_RXBUFFER_4096:
-                       rctl |= E1000_RCTL_SZ_4096;
-                       break;
-               case E1000_RXBUFFER_8192:
-                       rctl |= E1000_RCTL_SZ_8192;
-                       break;
-               case E1000_RXBUFFER_16384:
-                       rctl |= E1000_RCTL_SZ_16384;
-                       break;
-               }
+               rctl &= ~E1000_RCTL_BSEX;
+               rctl |= E1000_RCTL_SZ_2048;
        }
 
 #ifdef CONFIG_E1000_PACKET_SPLIT
@@ -1935,6 +1925,16 @@ e1000_clean_rx_ring(struct e1000_adapter
                }
        }
 
+       /* there also may be some cached data in our adapter */
+       if(rx_ring->rx_skb_top) {
+               dev_kfree_skb(rx_ring->rx_skb_top);
+
+               /* rx_skb_prev will be wiped out by rx_skb_top */
+               rx_ring->rx_skb_top = NULL;
+               rx_ring->rx_skb_prev = NULL;
+       }
+
+
        size = sizeof(struct e1000_buffer) * rx_ring->count;
        memset(rx_ring->buffer_info, 0, size);
        size = sizeof(struct e1000_ps_page) * rx_ring->count;
@@ -2005,7 +2005,8 @@ e1000_leave_82542_rst(struct e1000_adapt
 
        if(netif_running(netdev)) {
                e1000_configure_rx(adapter);
-               e1000_alloc_rx_buffers(adapter, &adapter->rx_ring[0]);
+               e1000_alloc_rx_buffers(adapter, &adapter->rx_ring[0],
+                                       adapter->rx_ring[0].count);
        }
 }
 
@@ -2903,29 +2904,30 @@ e1000_change_mtu(struct net_device *netd
                                    "on 82573\n");
                return -EINVAL;
        }
+       if(unlikely((adapter->hw.mac_type < e1000_82543) &&
+                   (max_frame > MAXIMUM_ETHERNET_FRAME_SIZE))) {
+               DPRINTK(PROBE, ERR, "Jumbo Frames not supported on 82542\n");
+               return -EINVAL;
+       }
+
+       /* since the driver code now supports splitting a packet across
+        * multiple descriptors, most of the fifo related limitations on
+        * jumbo frame traffic have gone away.
+        * simply use 2k descriptors for everything.
+        *
+        * NOTE: dev_alloc_skb reserves 16 bytes, and typically NET_IP_ALIGN
+        * means we reserve 2 more, this pushes us to allocate from the next
+        * larger slab size
+        * i.e. RXBUFFER_2048 --> size-4096 slab */
 
+       /* recent hardware supports 1KB granularity */
        if(adapter->hw.mac_type > e1000_82547_rev_2) {
-               adapter->rx_buffer_len = max_frame;
+               adapter->rx_buffer_len =
+                   ((max_frame < E1000_RXBUFFER_2048) ?
+                       max_frame : E1000_RXBUFFER_2048);
                E1000_ROUNDUP(adapter->rx_buffer_len, 1024);
-       } else {
-               if(unlikely((adapter->hw.mac_type < e1000_82543) &&
-                  (max_frame > MAXIMUM_ETHERNET_FRAME_SIZE))) {
-                       DPRINTK(PROBE, ERR, "Jumbo Frames not supported "
-                                           "on 82542\n");
-                       return -EINVAL;
-
-               } else {
-                       if(max_frame <= E1000_RXBUFFER_2048) {
-                               adapter->rx_buffer_len = E1000_RXBUFFER_2048;
-                       } else if(max_frame <= E1000_RXBUFFER_4096) {
-                               adapter->rx_buffer_len = E1000_RXBUFFER_4096;
-                       } else if(max_frame <= E1000_RXBUFFER_8192) {
-                               adapter->rx_buffer_len = E1000_RXBUFFER_8192;
-                       } else if(max_frame <= E1000_RXBUFFER_16384) {
-                               adapter->rx_buffer_len = E1000_RXBUFFER_16384;
-                       }
-               }
-       }
+       } else
+               adapter->rx_buffer_len = E1000_RXBUFFER_2048;
 
        netdev->mtu = new_mtu;
 
@@ -3049,8 +3188,8 @@ e1000_update_stats(struct e1000_adapter 
 
        adapter->net_stats.rx_errors = adapter->stats.rxerrc +
                adapter->stats.crcerrs + adapter->stats.algnerrc +
-               adapter->stats.rlec + adapter->stats.mpc + 
-               adapter->stats.cexterr;
+               adapter->stats.rlec + adapter->stats.cexterr;
+       adapter->net_stats.rx_dropped = 0;
        adapter->net_stats.rx_length_errors = adapter->stats.rlec;
        adapter->net_stats.rx_crc_errors = adapter->stats.crcerrs;
        adapter->net_stats.rx_frame_errors = adapter->stats.algnerrc;
@@ -3294,9 +3454,6 @@ e1000_clean_tx_irq(struct e1000_adapter 
                        E1000_STATUS_TXOFF)) {
 
                        /* detected Tx unit hang */
-                       i = tx_ring->next_to_clean;
-                       eop = tx_ring->buffer_info[i].next_to_watch;
-                       eop_desc = E1000_TX_DESC(*tx_ring, eop);
                        DPRINTK(DRV, ERR, "Detected Tx Unit Hang\n"
                                        "  TDH                  <%x>\n"
                                        "  TDT                  <%x>\n"
@@ -3303,7 +3454,6 @@ 
                                        "  next_to_use          <%x>\n"
                                        "  next_to_clean        <%x>\n"
                                        "buffer_info[next_to_clean]\n"
-                                       "  dma                  <%llx>\n"
                                        "  time_stamp           <%lx>\n"
                                        "  next_to_watch        <%x>\n"
                                        "  jiffies              <%lx>\n"
@@ -3311,9 +3477,8 @@ e1000_clean_tx_irq(struct e1000_adapter 
                                readl(adapter->hw.hw_addr + tx_ring->tdh),
                                readl(adapter->hw.hw_addr + tx_ring->tdt),
                                tx_ring->next_to_use,
-                               i,
-                               (unsigned long long)tx_ring->buffer_info[i].dma,
-                               tx_ring->buffer_info[i].time_stamp,
+                               tx_ring->next_to_clean,
+                               tx_ring->buffer_info[eop].time_stamp,
                                eop,
                                jiffies,
                                eop_desc->upper.fields.status);
@@ -3391,46 +3388,98 @@ e1000_clean_rx_irq(struct e1000_adapter 
 {
        struct net_device *netdev = adapter->netdev;
        struct pci_dev *pdev = adapter->pdev;
-       struct e1000_rx_desc *rx_desc;
-       struct e1000_buffer *buffer_info;
-       struct sk_buff *skb;
+       struct e1000_rx_desc *rx_desc, *next_rxd;
+       struct e1000_buffer *buffer_info, *next_buffer, *next2_buffer;
        unsigned long flags;
        uint32_t length;
        uint8_t last_byte;
-       unsigned int i;
-       boolean_t cleaned = FALSE;
+       unsigned int i, j;
+       int cleaned_count = 0; 
+       boolean_t cleaned = FALSE, multi_descriptor = FALSE;
 
        i = rx_ring->next_to_clean;
        rx_desc = E1000_RX_DESC(*rx_ring, i);
+       buffer_info = &rx_ring->buffer_info[i];
 
        while(rx_desc->status & E1000_RXD_STAT_DD) {
-               buffer_info = &rx_ring->buffer_info[i];
+               struct sk_buff *skb, *next_skb;
+               u8 status;
+
 #ifdef CONFIG_E1000_NAPI
                if(*work_done >= work_to_do)
                        break;
                (*work_done)++;
 #endif
-               cleaned = TRUE;
+               status = rx_desc->status;
+               skb = buffer_info->skb;
+               buffer_info->skb = NULL;
+
+               prefetch(skb->data - NET_IP_ALIGN);
 
+               if(++i == rx_ring->count) i = 0;
+               next_rxd = E1000_RX_DESC(*rx_ring, i);
+               prefetch(next_rxd);
+
+               if((j = i + 1) == rx_ring->count) j = 0;
+               next2_buffer = &rx_ring->buffer_info[j];
+               prefetch(next2_buffer);
+
+               next_buffer = &rx_ring->buffer_info[i];
+               next_skb = next_buffer->skb;
+               prefetch(next_skb);
+               prefetch(next_skb->data - NET_IP_ALIGN);
+
+               cleaned = TRUE;
+               cleaned_count++;
                pci_unmap_single(pdev,
                                 buffer_info->dma,
                                 buffer_info->length,
                                 PCI_DMA_FROMDEVICE);
 
-               skb = buffer_info->skb;
                length = le16_to_cpu(rx_desc->length);
 
-               if(unlikely(!(rx_desc->status & E1000_RXD_STAT_EOP))) {
-                       /* All receives must fit into a single buffer */
-                       E1000_DBG("%s: Receive packet consumed multiple"
-                                 " buffers\n", netdev->name);
-                       dev_kfree_skb_irq(skb);
+               if(!(status & E1000_RXD_STAT_EOP)) {
+                       skb_put(skb, length);
+                       if(!rx_ring->rx_skb_top) {
+                               rx_ring->rx_skb_top = skb;
+                               rx_ring->rx_skb_top->len = length;
+                               rx_ring->rx_skb_prev = skb;
+                       } else {
+                               if(skb_shinfo(rx_ring->rx_skb_top)->frag_list) {
+                                       rx_ring->rx_skb_prev->next = skb;
+                                       skb->prev = rx_ring->rx_skb_prev;
+                               } else {
+                                       
skb_shinfo(rx_ring->rx_skb_top)->frag_list = skb;
+                               }
+                               rx_ring->rx_skb_prev = skb;
+                               rx_ring->rx_skb_top->data_len += length;
+                       }
                        goto next_desc;
+               } else {
+                       skb_put(skb, length);
+                       if (rx_ring->rx_skb_top) {
+                               if(skb_shinfo(rx_ring->rx_skb_top)
+                                                       ->frag_list) {
+                                       rx_ring->rx_skb_prev->next = skb;
+                                       skb->prev = rx_ring->rx_skb_prev;
+                               } else
+                                       skb_shinfo(rx_ring->rx_skb_top)
+                                                       ->frag_list = skb;
+
+                               rx_ring->rx_skb_top->data_len += length;
+                               rx_ring->rx_skb_top->len +=
+                                       rx_ring->rx_skb_top->data_len;
+
+                               skb = rx_ring->rx_skb_top;
+                               multi_descriptor = TRUE;
+                               rx_ring->rx_skb_top = NULL;
+                               rx_ring->rx_skb_prev = NULL;
+                       }
                }
 
                if(unlikely(rx_desc->errors & E1000_RXD_ERR_FRAME_ERR_MASK)) {
                        last_byte = *(skb->data + length - 1);
-                       if(TBI_ACCEPT(&adapter->hw, rx_desc->status,
+                       if(TBI_ACCEPT(&adapter->hw, status,
                                      rx_desc->errors, length, last_byte)) {
                                spin_lock_irqsave(&adapter->stats_lock, flags);
                                e1000_tbi_adjust_stats(&adapter->hw,
@@ -3445,18 +3494,41 @@ e1000_clean_rx_irq(struct e1000_adapter 
                        }
                }
 
-               /* Good Receive */
-               skb_put(skb, length - ETHERNET_FCS_SIZE);
+               /* code added for copybreak, this should improve
+                * performance for small packets with large amounts
+                * of reassembly being done in the stack */
+#define E1000_CB_LENGTH 256
+               if((length < E1000_CB_LENGTH) &&
+                  !rx_ring->rx_skb_top &&
+                  /* or maybe (status & E1000_RXD_STAT_EOP) && */
+                  !multi_descriptor) {
+                       struct sk_buff *new_skb =
+                           dev_alloc_skb(length + NET_IP_ALIGN);
+                       if(new_skb) {
+                               skb_reserve(new_skb, NET_IP_ALIGN);
+                               new_skb->dev = netdev;
+                               memcpy(new_skb->data - NET_IP_ALIGN,
+                                      skb->data - NET_IP_ALIGN,
+                                      length + NET_IP_ALIGN);
+                               /* save the skb in buffer_info as good */
+                               buffer_info->skb = skb;
+                               skb = new_skb;
+                               skb_put(skb, length);
+                       }
+               }
+
+               /* end copybreak code */
 
                /* Receive Checksum Offload */
                e1000_rx_checksum(adapter,
-                                 (uint32_t)(rx_desc->status) |
+                                 (uint32_t)(status) |
                                  ((uint32_t)(rx_desc->errors) << 24),
                                  rx_desc->csum, skb);
+
                skb->protocol = eth_type_trans(skb, netdev);
 #ifdef CONFIG_E1000_NAPI
                if(unlikely(adapter->vlgrp &&
-                           (rx_desc->status & E1000_RXD_STAT_VP))) {
+                           (status & E1000_RXD_STAT_VP))) {
                        vlan_hwaccel_receive_skb(skb, adapter->vlgrp,
                                                 le16_to_cpu(rx_desc->special) &
                                                 E1000_RXD_SPC_VLAN_MASK);
@@ -3465,7 +3537,7 @@ e1000_clean_rx_irq(struct e1000_adapter 
                }
 #else /* CONFIG_E1000_NAPI */
                if(unlikely(adapter->vlgrp &&
-                           (rx_desc->status & E1000_RXD_STAT_VP))) {
+                           (status & E1000_RXD_STAT_VP))) {
                        vlan_hwaccel_rx(skb, adapter->vlgrp,
                                        le16_to_cpu(rx_desc->special) &
                                        E1000_RXD_SPC_VLAN_MASK);
@@ -3478,13 +3646,22 @@ e1000_clean_rx_irq(struct e1000_adapter 
 
 next_desc:
                rx_desc->status = 0;
-               buffer_info->skb = NULL;
-               if(unlikely(++i == rx_ring->count)) i = 0;
 
-               rx_desc = E1000_RX_DESC(*rx_ring, i);
+               /* return some buffers to hardware, one at a time is too slow */
+               if (unlikely(cleaned_count >= E1000_RX_BUFFER_WRITE)) {
+                       adapter->alloc_rx_buf(adapter, rx_ring, cleaned_count);
+                       cleaned_count = 0;
+               }
+
+               /* use prefetched values */
+               rx_desc = next_rxd;
+               buffer_info = next_buffer;
        }
        rx_ring->next_to_clean = i;
-       adapter->alloc_rx_buf(adapter, rx_ring);
+
+       cleaned_count = E1000_DESC_UNUSED(rx_ring);
+       if (cleaned_count)
+               adapter->alloc_rx_buf(adapter, rx_ring, cleaned_count);
 
        return cleaned;
 }
@@ -3504,16 +3585,17 @@ e1000_clean_rx_irq_ps(struct e1000_adapt
                       struct e1000_rx_ring *rx_ring)
 #endif
 {
-       union e1000_rx_desc_packet_split *rx_desc;
+       union e1000_rx_desc_packet_split *rx_desc, *next_rxd;
        struct net_device *netdev = adapter->netdev;
        struct pci_dev *pdev = adapter->pdev;
-       struct e1000_buffer *buffer_info;
+       struct e1000_buffer *buffer_info, *next_buffer, *next2_buffer;
        struct e1000_ps_page *ps_page;
        struct e1000_ps_page_dma *ps_page_dma;
-       struct sk_buff *skb;
+       struct sk_buff *skb, *next_skb;
        unsigned int i, j;
        uint32_t length, staterr;
        boolean_t cleaned = FALSE;
+       int cleaned_count = 0;
 
        i = rx_ring->next_to_clean;
        rx_desc = E1000_RX_DESC_PS(*rx_ring, i);
@@ -3528,13 +3610,29 @@ e1000_clean_rx_irq_ps(struct e1000_adapt
                        break;
                (*work_done)++;
 #endif
+               skb = buffer_info->skb;
+
+               prefetch(skb->data - NET_IP_ALIGN);
+
+               if(++i == rx_ring->count) i = 0;
+               next_rxd = E1000_RX_DESC_PS(*rx_ring, i);
+               prefetch(next_rxd);
+
+               if((j = i + 1) == rx_ring->count) j = 0;
+               next2_buffer = &rx_ring->buffer_info[j];
+               prefetch(next2_buffer);
+
+               next_buffer = &rx_ring->buffer_info[i];
+               next_skb = next_buffer->skb;
+               prefetch(next_skb);
+               prefetch(next_skb->data - NET_IP_ALIGN);
+
                cleaned = TRUE;
+               cleaned_count++;
                pci_unmap_single(pdev, buffer_info->dma,
                                 buffer_info->length,
                                 PCI_DMA_FROMDEVICE);
 
-               skb = buffer_info->skb;
-
                if(unlikely(!(staterr & E1000_RXD_STAT_EOP))) {
                        E1000_DBG("%s: Packet Split buffers didn't pick up"
                                  " the full packet\n", netdev->name);
@@ -3610,13 +3780,24 @@ e1000_clean_rx_irq_ps(struct e1000_adapt
 next_desc:
                rx_desc->wb.middle.status_error &= ~0xFF;
                buffer_info->skb = NULL;
-               if(unlikely(++i == rx_ring->count)) i = 0;
 
-               rx_desc = E1000_RX_DESC_PS(*rx_ring, i);
+               /* return some buffers to hardware, one at a time is too slow */
+               if (unlikely(cleaned_count >= E1000_RX_BUFFER_WRITE)) {
+                       adapter->alloc_rx_buf(adapter, rx_ring, cleaned_count);
+                       cleaned_count = 0;
+               }
+
+               /* use prefetched values */
+               rx_desc = next_rxd;
+               buffer_info = next_buffer;
+
                staterr = le32_to_cpu(rx_desc->wb.middle.status_error);
        }
        rx_ring->next_to_clean = i;
-       adapter->alloc_rx_buf(adapter, rx_ring);
+
+       cleaned_count = E1000_DESC_UNUSED(rx_ring);
+       if (cleaned_count)
+               adapter->alloc_rx_buf(adapter, rx_ring, cleaned_count);
 
        return cleaned;
 }
@@ -3628,7 +3737,8 @@ next_desc:
 
 static void
 e1000_alloc_rx_buffers(struct e1000_adapter *adapter,
-                       struct e1000_rx_ring *rx_ring)
+                       struct e1000_rx_ring *rx_ring,
+                      int cleaned_count)
 {
        struct net_device *netdev = adapter->netdev;
        struct pci_dev *pdev = adapter->pdev;
@@ -3641,8 +3819,14 @@ e1000_alloc_rx_buffers(struct e1000_adap
        i = rx_ring->next_to_use;
        buffer_info = &rx_ring->buffer_info[i];
 
-       while(!buffer_info->skb) {
-               skb = dev_alloc_skb(bufsz);
+       while(cleaned_count--) {
+               if(!(skb = buffer_info->skb))
+                       skb = dev_alloc_skb(bufsz);
+               else {
+                       skb->tail = skb->head;
+                       skb->len = 0;
+                       goto map_skb;
+               }
 
                if(unlikely(!skb)) {
                        /* Better luck next round */
@@ -3682,6 +3861,7 @@ e1000_alloc_rx_buffers(struct e1000_adap
 
                buffer_info->skb = skb;
                buffer_info->length = adapter->rx_buffer_len;
+map_skb:
                buffer_info->dma = pci_map_single(pdev,
                                                  skb->data,
                                                  adapter->rx_buffer_len,
@@ -3707,20 +3824,21 @@ e1000_alloc_rx_buffers(struct e1000_adap
                rx_desc = E1000_RX_DESC(*rx_ring, i);
                rx_desc->buffer_addr = cpu_to_le64(buffer_info->dma);
 
-               if(unlikely((i & ~(E1000_RX_BUFFER_WRITE - 1)) == i)) {
-                       /* Force memory writes to complete before letting h/w
-                        * know there are new descriptors to fetch.  (Only
-                        * applicable for weak-ordered memory model archs,
-                        * such as IA-64). */
-                       wmb();
-                       writel(i, adapter->hw.hw_addr + rx_ring->rdt);
-               }
-
                if(unlikely(++i == rx_ring->count)) i = 0;
                buffer_info = &rx_ring->buffer_info[i];
        }
 
-       rx_ring->next_to_use = i;
+       if (rx_ring->next_to_use != i) {
+               rx_ring->next_to_use = i;
+               if(unlikely(i-- == 0)) i = (rx_ring->count - 1);
+
+               /* Force memory writes to complete before letting h/w
+                * know there are new descriptors to fetch.  (Only
+                * applicable for weak-ordered memory model archs,
+                * such as IA-64). */
+               wmb();
+               writel(i, adapter->hw.hw_addr + rx_ring->rdt);
+       }
 }
 
 /**
@@ -3730,7 +3848,8 @@ e1000_alloc_rx_buffers(struct e1000_adap
 
 static void
 e1000_alloc_rx_buffers_ps(struct e1000_adapter *adapter,
-                          struct e1000_rx_ring *rx_ring)
+                          struct e1000_rx_ring *rx_ring,
+                         int cleaned_count)
 {
        struct net_device *netdev = adapter->netdev;
        struct pci_dev *pdev = adapter->pdev;
@@ -3746,7 +3925,7 @@ e1000_alloc_rx_buffers_ps(struct e1000_a
        ps_page = &rx_ring->ps_page[i];
        ps_page_dma = &rx_ring->ps_page_dma[i];
 
-       while(!buffer_info->skb) {
+       while (cleaned_count--) {
                rx_desc = E1000_RX_DESC_PS(*rx_ring, i);
 
                for(j = 0; j < PS_PAGE_BUFFERS; j++) {
@@ -3793,19 +3976,6 @@ e1000_alloc_rx_buffers_ps(struct e1000_a
 
                rx_desc->read.buffer_addr[0] = cpu_to_le64(buffer_info->dma);
 
-               if(unlikely((i & ~(E1000_RX_BUFFER_WRITE - 1)) == i)) {
-                       /* Force memory writes to complete before letting h/w
-                        * know there are new descriptors to fetch.  (Only
-                        * applicable for weak-ordered memory model archs,
-                        * such as IA-64). */
-                       wmb();
-                       /* Hardware increments by 16 bytes, but packet split
-                        * descriptors are 32 bytes...so we increment tail
-                        * twice as much.
-                        */
-                       writel(i<<1, adapter->hw.hw_addr + rx_ring->rdt);
-               }
-
                if(unlikely(++i == rx_ring->count)) i = 0;
                buffer_info = &rx_ring->buffer_info[i];
                ps_page = &rx_ring->ps_page[i];
@@ -3813,7 +3976,21 @@ 
        }
 
 no_buffers:
-       rx_ring->next_to_use = i;
+       if (rx_ring->next_to_use != i) {
+               rx_ring->next_to_use = i;
+               if(unlikely(i-- == 0)) i = (rx_ring->count - 1);
+
+               /* Force memory writes to complete before letting h/w
+                * know there are new descriptors to fetch.  (Only
+                * applicable for weak-ordered memory model archs,
+                * such as IA-64). */
+               wmb();
+               /* Hardware increments by 16 bytes, but packet split
+                * descriptors are 32 bytes...so we increment tail
+                * twice as much.
+                */
+               writel(i<<1, adapter->hw.hw_addr + rx_ring->rdt);
+       }
 }
 
 /**

Cheers,
Jeff
diff -up linux-2.6/drivers/net/e1000/e1000.h 
linux-2.6.new/drivers/net/e1000/e1000.h
--- linux-2.6/drivers/net/e1000/e1000.h 2005-11-14 16:20:34.000000000 -0800
+++ linux-2.6.new/drivers/net/e1000/e1000.h     2005-11-04 01:23:40.000000000 
-0800
@@ -216,6 +216,12 @@ struct e1000_rx_ring {
        struct e1000_ps_page *ps_page;
        struct e1000_ps_page_dma *ps_page_dma;
 
+       struct sk_buff *rx_skb_top;
+       struct sk_buff *rx_skb_prev;
+
+       /* cpu for rx queue */
+       int cpu;
+
        uint16_t rdh;
        uint16_t rdt;
        uint64_t pkt;
@@ -288,7 +288,8 @@ struct e1000_adapter {
                               struct e1000_rx_ring *rx_ring);
 #endif
        void (*alloc_rx_buf) (struct e1000_adapter *adapter,
-                             struct e1000_rx_ring *rx_ring);
+                             struct e1000_rx_ring *rx_ring,
+                               int cleaned_count);
        struct e1000_rx_ring *rx_ring;      /* One per active queue */
 #ifdef CONFIG_E1000_NAPI
        struct net_device *polling_netdev;  /* One per active queue */
diff -up linux-2.6/drivers/net/e1000/e1000_main.c 
linux-2.6.new/drivers/net/e1000/e1000_main.c
--- linux-2.6/drivers/net/e1000/e1000_main.c    2005-11-14 16:20:34.000000000 
-0800
+++ linux-2.6.new/drivers/net/e1000/e1000_main.c        2005-11-04 
01:23:40.000000000 -0800
@@ -171,9 +171,11 @@ static boolean_t e1000_clean_rx_irq_ps(s
                                        struct e1000_rx_ring *rx_ring);
 #endif
 static void e1000_alloc_rx_buffers(struct e1000_adapter *adapter,
-                                   struct e1000_rx_ring *rx_ring);
+                                   struct e1000_rx_ring *rx_ring,
+                                  int cleaned_count);
 static void e1000_alloc_rx_buffers_ps(struct e1000_adapter *adapter,
-                                      struct e1000_rx_ring *rx_ring);
+                                      struct e1000_rx_ring *rx_ring,
+                                     int cleaned_count);
 static int e1000_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd);
 static int e1000_mii_ioctl(struct net_device *netdev, struct ifreq *ifr,
                           int cmd);
@@ -344,7 +344,8 @@ e1000_up(struct e1000_adapter *adapter)
        e1000_setup_rctl(adapter);
        e1000_configure_rx(adapter);
        for (i = 0; i < adapter->num_queues; i++)
-               adapter->alloc_rx_buf(adapter, &adapter->rx_ring[i]);
+               adapter->alloc_rx_buf(adapter, &adapter->rx_ring[i],
+                                       adapter->rx_ring[i].count);
 
 #ifdef CONFIG_PCI_MSI
        if(adapter->hw.mac_type > e1000_82547_rev_2) {
@@ -1454,6 +1457,8 @@ setup_rx_desc_die:
 
        rxdr->next_to_clean = 0;
        rxdr->next_to_use = 0;
+       rxdr->rx_skb_top = NULL;
+       rxdr->rx_skb_prev = NULL;
 
        return 0;
 }
@@ -1527,23 +1532,8 @@ e1000_setup_rctl(struct e1000_adapter *a
                rctl |= adapter->rx_buffer_len << 0x11;
        } else {
                rctl &= ~E1000_RCTL_SZ_4096;
-               rctl |= E1000_RCTL_BSEX; 
-               switch (adapter->rx_buffer_len) {
-               case E1000_RXBUFFER_2048:
-               default:
-                       rctl |= E1000_RCTL_SZ_2048;
-                       rctl &= ~E1000_RCTL_BSEX;
-                       break;
-               case E1000_RXBUFFER_4096:
-                       rctl |= E1000_RCTL_SZ_4096;
-                       break;
-               case E1000_RXBUFFER_8192:
-                       rctl |= E1000_RCTL_SZ_8192;
-                       break;
-               case E1000_RXBUFFER_16384:
-                       rctl |= E1000_RCTL_SZ_16384;
-                       break;
-               }
+               rctl &= ~E1000_RCTL_BSEX;
+               rctl |= E1000_RCTL_SZ_2048;
        }
 
 #ifdef CONFIG_E1000_PACKET_SPLIT
@@ -1935,6 +1925,16 @@ e1000_clean_rx_ring(struct e1000_adapter
                }
        }
 
+       /* there also may be some cached data in our adapter */
+       if(rx_ring->rx_skb_top) {
+               dev_kfree_skb(rx_ring->rx_skb_top);
+
+               /* rx_skb_prev will be wiped out by rx_skb_top */
+               rx_ring->rx_skb_top = NULL;
+               rx_ring->rx_skb_prev = NULL;
+       }
+
+
        size = sizeof(struct e1000_buffer) * rx_ring->count;
        memset(rx_ring->buffer_info, 0, size);
        size = sizeof(struct e1000_ps_page) * rx_ring->count;
@@ -2005,7 +2005,8 @@ e1000_leave_82542_rst(struct e1000_adapt
 
        if(netif_running(netdev)) {
                e1000_configure_rx(adapter);
-               e1000_alloc_rx_buffers(adapter, &adapter->rx_ring[0]);
+               e1000_alloc_rx_buffers(adapter, &adapter->rx_ring[0],
+                                       adapter->rx_ring[0].count);
        }
 }
 
@@ -2903,29 +2904,30 @@ e1000_change_mtu(struct net_device *netd
                                    "on 82573\n");
                return -EINVAL;
        }
+       if(unlikely((adapter->hw.mac_type < e1000_82543) &&
+                   (max_frame > MAXIMUM_ETHERNET_FRAME_SIZE))) {
+               DPRINTK(PROBE, ERR, "Jumbo Frames not supported on 82542\n");
+               return -EINVAL;
+       }
+
+       /* since the driver code now supports splitting a packet across
+        * multiple descriptors, most of the fifo related limitations on
+        * jumbo frame traffic have gone away.
+        * simply use 2k descriptors for everything.
+        *
+        * NOTE: dev_alloc_skb reserves 16 bytes, and typically NET_IP_ALIGN
+        * means we reserve 2 more, this pushes us to allocate from the next
+        * larger slab size
+        * i.e. RXBUFFER_2048 --> size-4096 slab */
 
+       /* recent hardware supports 1KB granularity */
        if(adapter->hw.mac_type > e1000_82547_rev_2) {
-               adapter->rx_buffer_len = max_frame;
+               adapter->rx_buffer_len =
+                   ((max_frame < E1000_RXBUFFER_2048) ?
+                       max_frame : E1000_RXBUFFER_2048);
                E1000_ROUNDUP(adapter->rx_buffer_len, 1024);
-       } else {
-               if(unlikely((adapter->hw.mac_type < e1000_82543) &&
-                  (max_frame > MAXIMUM_ETHERNET_FRAME_SIZE))) {
-                       DPRINTK(PROBE, ERR, "Jumbo Frames not supported "
-                                           "on 82542\n");
-                       return -EINVAL;
-
-               } else {
-                       if(max_frame <= E1000_RXBUFFER_2048) {
-                               adapter->rx_buffer_len = E1000_RXBUFFER_2048;
-                       } else if(max_frame <= E1000_RXBUFFER_4096) {
-                               adapter->rx_buffer_len = E1000_RXBUFFER_4096;
-                       } else if(max_frame <= E1000_RXBUFFER_8192) {
-                               adapter->rx_buffer_len = E1000_RXBUFFER_8192;
-                       } else if(max_frame <= E1000_RXBUFFER_16384) {
-                               adapter->rx_buffer_len = E1000_RXBUFFER_16384;
-                       }
-               }
-       }
+       } else
+               adapter->rx_buffer_len = E1000_RXBUFFER_2048;
 
        netdev->mtu = new_mtu;
 
@@ -3049,8 +3188,8 @@ e1000_update_stats(struct e1000_adapter 
 
        adapter->net_stats.rx_errors = adapter->stats.rxerrc +
                adapter->stats.crcerrs + adapter->stats.algnerrc +
-               adapter->stats.rlec + adapter->stats.mpc + 
-               adapter->stats.cexterr;
+               adapter->stats.rlec + adapter->stats.cexterr;
+       adapter->net_stats.rx_dropped = 0;
        adapter->net_stats.rx_length_errors = adapter->stats.rlec;
        adapter->net_stats.rx_crc_errors = adapter->stats.crcerrs;
        adapter->net_stats.rx_frame_errors = adapter->stats.algnerrc;
@@ -3294,9 +3454,6 @@ e1000_clean_tx_irq(struct e1000_adapter 
                        E1000_STATUS_TXOFF)) {
 
                        /* detected Tx unit hang */
-                       i = tx_ring->next_to_clean;
-                       eop = tx_ring->buffer_info[i].next_to_watch;
-                       eop_desc = E1000_TX_DESC(*tx_ring, eop);
                        DPRINTK(DRV, ERR, "Detected Tx Unit Hang\n"
                                        "  TDH                  <%x>\n"
                                        "  TDT                  <%x>\n"
@@ -3303,7 +3454,6 @@ 
                                        "  next_to_use          <%x>\n"
                                        "  next_to_clean        <%x>\n"
                                        "buffer_info[next_to_clean]\n"
-                                       "  dma                  <%llx>\n"
                                        "  time_stamp           <%lx>\n"
                                        "  next_to_watch        <%x>\n"
                                        "  jiffies              <%lx>\n"
@@ -3311,9 +3477,8 @@ e1000_clean_tx_irq(struct e1000_adapter 
                                readl(adapter->hw.hw_addr + tx_ring->tdh),
                                readl(adapter->hw.hw_addr + tx_ring->tdt),
                                tx_ring->next_to_use,
-                               i,
-                               (unsigned long long)tx_ring->buffer_info[i].dma,
-                               tx_ring->buffer_info[i].time_stamp,
+                               tx_ring->next_to_clean,
+                               tx_ring->buffer_info[eop].time_stamp,
                                eop,
                                jiffies,
                                eop_desc->upper.fields.status);
@@ -3391,46 +3388,98 @@ e1000_clean_rx_irq(struct e1000_adapter 
 {
        struct net_device *netdev = adapter->netdev;
        struct pci_dev *pdev = adapter->pdev;
-       struct e1000_rx_desc *rx_desc;
-       struct e1000_buffer *buffer_info;
-       struct sk_buff *skb;
+       struct e1000_rx_desc *rx_desc, *next_rxd;
+       struct e1000_buffer *buffer_info, *next_buffer, *next2_buffer;
        unsigned long flags;
        uint32_t length;
        uint8_t last_byte;
-       unsigned int i;
-       boolean_t cleaned = FALSE;
+       unsigned int i, j;
+       int cleaned_count = 0; 
+       boolean_t cleaned = FALSE, multi_descriptor = FALSE;
 
        i = rx_ring->next_to_clean;
        rx_desc = E1000_RX_DESC(*rx_ring, i);
+       buffer_info = &rx_ring->buffer_info[i];
 
        while(rx_desc->status & E1000_RXD_STAT_DD) {
-               buffer_info = &rx_ring->buffer_info[i];
+               struct sk_buff *skb, *next_skb;
+               u8 status;
+
 #ifdef CONFIG_E1000_NAPI
                if(*work_done >= work_to_do)
                        break;
                (*work_done)++;
 #endif
-               cleaned = TRUE;
+               status = rx_desc->status;
+               skb = buffer_info->skb;
+               buffer_info->skb = NULL;
+
+               prefetch(skb->data - NET_IP_ALIGN);
 
+               if(++i == rx_ring->count) i = 0;
+               next_rxd = E1000_RX_DESC(*rx_ring, i);
+               prefetch(next_rxd);
+
+               if((j = i + 1) == rx_ring->count) j = 0;
+               next2_buffer = &rx_ring->buffer_info[j];
+               prefetch(next2_buffer);
+
+               next_buffer = &rx_ring->buffer_info[i];
+               next_skb = next_buffer->skb;
+               prefetch(next_skb);
+               prefetch(next_skb->data - NET_IP_ALIGN);
+
+               cleaned = TRUE;
+               cleaned_count++;
                pci_unmap_single(pdev,
                                 buffer_info->dma,
                                 buffer_info->length,
                                 PCI_DMA_FROMDEVICE);
 
-               skb = buffer_info->skb;
                length = le16_to_cpu(rx_desc->length);
 
-               if(unlikely(!(rx_desc->status & E1000_RXD_STAT_EOP))) {
-                       /* All receives must fit into a single buffer */
-                       E1000_DBG("%s: Receive packet consumed multiple"
-                                 " buffers\n", netdev->name);
-                       dev_kfree_skb_irq(skb);
+               if(!(status & E1000_RXD_STAT_EOP)) {
+                       skb_put(skb, length);
+                       if(!rx_ring->rx_skb_top) {
+                               rx_ring->rx_skb_top = skb;
+                               rx_ring->rx_skb_top->len = length;
+                               rx_ring->rx_skb_prev = skb;
+                       } else {
+                               if(skb_shinfo(rx_ring->rx_skb_top)->frag_list) {
+                                       rx_ring->rx_skb_prev->next = skb;
+                                       skb->prev = rx_ring->rx_skb_prev;
+                               } else {
+                                       
skb_shinfo(rx_ring->rx_skb_top)->frag_list = skb;
+                               }
+                               rx_ring->rx_skb_prev = skb;
+                               rx_ring->rx_skb_top->data_len += length;
+                       }
                        goto next_desc;
+               } else {
+                       skb_put(skb, length);
+                       if (rx_ring->rx_skb_top) {
+                               if(skb_shinfo(rx_ring->rx_skb_top)
+                                                       ->frag_list) {
+                                       rx_ring->rx_skb_prev->next = skb;
+                                       skb->prev = rx_ring->rx_skb_prev;
+                               } else
+                                       skb_shinfo(rx_ring->rx_skb_top)
+                                                       ->frag_list = skb;
+
+                               rx_ring->rx_skb_top->data_len += length;
+                               rx_ring->rx_skb_top->len +=
+                                       rx_ring->rx_skb_top->data_len;
+
+                               skb = rx_ring->rx_skb_top;
+                               multi_descriptor = TRUE;
+                               rx_ring->rx_skb_top = NULL;
+                               rx_ring->rx_skb_prev = NULL;
+                       }
                }
 
                if(unlikely(rx_desc->errors & E1000_RXD_ERR_FRAME_ERR_MASK)) {
                        last_byte = *(skb->data + length - 1);
-                       if(TBI_ACCEPT(&adapter->hw, rx_desc->status,
+                       if(TBI_ACCEPT(&adapter->hw, status,
                                      rx_desc->errors, length, last_byte)) {
                                spin_lock_irqsave(&adapter->stats_lock, flags);
                                e1000_tbi_adjust_stats(&adapter->hw,
@@ -3445,18 +3494,41 @@ e1000_clean_rx_irq(struct e1000_adapter 
                        }
                }
 
-               /* Good Receive */
-               skb_put(skb, length - ETHERNET_FCS_SIZE);
+               /* code added for copybreak, this should improve
+                * performance for small packets with large amounts
+                * of reassembly being done in the stack */
+#define E1000_CB_LENGTH 256
+               if((length < E1000_CB_LENGTH) &&
+                  !rx_ring->rx_skb_top &&
+                  /* or maybe (status & E1000_RXD_STAT_EOP) && */
+                  !multi_descriptor) {
+                       struct sk_buff *new_skb =
+                           dev_alloc_skb(length + NET_IP_ALIGN);
+                       if(new_skb) {
+                               skb_reserve(new_skb, NET_IP_ALIGN);
+                               new_skb->dev = netdev;
+                               memcpy(new_skb->data - NET_IP_ALIGN,
+                                      skb->data - NET_IP_ALIGN,
+                                      length + NET_IP_ALIGN);
+                               /* save the skb in buffer_info as good */
+                               buffer_info->skb = skb;
+                               skb = new_skb;
+                               skb_put(skb, length);
+                       }
+               }
+
+               /* end copybreak code */
 
                /* Receive Checksum Offload */
                e1000_rx_checksum(adapter,
-                                 (uint32_t)(rx_desc->status) |
+                                 (uint32_t)(status) |
                                  ((uint32_t)(rx_desc->errors) << 24),
                                  rx_desc->csum, skb);
+
                skb->protocol = eth_type_trans(skb, netdev);
 #ifdef CONFIG_E1000_NAPI
                if(unlikely(adapter->vlgrp &&
-                           (rx_desc->status & E1000_RXD_STAT_VP))) {
+                           (status & E1000_RXD_STAT_VP))) {
                        vlan_hwaccel_receive_skb(skb, adapter->vlgrp,
                                                 le16_to_cpu(rx_desc->special) &
                                                 E1000_RXD_SPC_VLAN_MASK);
@@ -3465,7 +3537,7 @@ e1000_clean_rx_irq(struct e1000_adapter 
                }
 #else /* CONFIG_E1000_NAPI */
                if(unlikely(adapter->vlgrp &&
-                           (rx_desc->status & E1000_RXD_STAT_VP))) {
+                           (status & E1000_RXD_STAT_VP))) {
                        vlan_hwaccel_rx(skb, adapter->vlgrp,
                                        le16_to_cpu(rx_desc->special) &
                                        E1000_RXD_SPC_VLAN_MASK);
@@ -3478,13 +3646,22 @@ e1000_clean_rx_irq(struct e1000_adapter 
 
 next_desc:
                rx_desc->status = 0;
-               buffer_info->skb = NULL;
-               if(unlikely(++i == rx_ring->count)) i = 0;
 
-               rx_desc = E1000_RX_DESC(*rx_ring, i);
+               /* return some buffers to hardware, one at a time is too slow */
+               if (unlikely(cleaned_count >= E1000_RX_BUFFER_WRITE)) {
+                       adapter->alloc_rx_buf(adapter, rx_ring, cleaned_count);
+                       cleaned_count = 0;
+               }
+
+               /* use prefetched values */
+               rx_desc = next_rxd;
+               buffer_info = next_buffer;
        }
        rx_ring->next_to_clean = i;
-       adapter->alloc_rx_buf(adapter, rx_ring);
+
+       cleaned_count = E1000_DESC_UNUSED(rx_ring);
+       if (cleaned_count)
+               adapter->alloc_rx_buf(adapter, rx_ring, cleaned_count);
 
        return cleaned;
 }
@@ -3504,16 +3585,17 @@ e1000_clean_rx_irq_ps(struct e1000_adapt
                       struct e1000_rx_ring *rx_ring)
 #endif
 {
-       union e1000_rx_desc_packet_split *rx_desc;
+       union e1000_rx_desc_packet_split *rx_desc, *next_rxd;
        struct net_device *netdev = adapter->netdev;
        struct pci_dev *pdev = adapter->pdev;
-       struct e1000_buffer *buffer_info;
+       struct e1000_buffer *buffer_info, *next_buffer, *next2_buffer;
        struct e1000_ps_page *ps_page;
        struct e1000_ps_page_dma *ps_page_dma;
-       struct sk_buff *skb;
+       struct sk_buff *skb, *next_skb;
        unsigned int i, j;
        uint32_t length, staterr;
        boolean_t cleaned = FALSE;
+       int cleaned_count = 0;
 
        i = rx_ring->next_to_clean;
        rx_desc = E1000_RX_DESC_PS(*rx_ring, i);
@@ -3528,13 +3610,29 @@ e1000_clean_rx_irq_ps(struct e1000_adapt
                        break;
                (*work_done)++;
 #endif
+               skb = buffer_info->skb;
+
+               prefetch(skb->data - NET_IP_ALIGN);
+
+               if(++i == rx_ring->count) i = 0;
+               next_rxd = E1000_RX_DESC_PS(*rx_ring, i);
+               prefetch(next_rxd);
+
+               if((j = i + 1) == rx_ring->count) j = 0;
+               next2_buffer = &rx_ring->buffer_info[j];
+               prefetch(next2_buffer);
+
+               next_buffer = &rx_ring->buffer_info[i];
+               next_skb = next_buffer->skb;
+               prefetch(next_skb);
+               prefetch(next_skb->data - NET_IP_ALIGN);
+
                cleaned = TRUE;
+               cleaned_count++;
                pci_unmap_single(pdev, buffer_info->dma,
                                 buffer_info->length,
                                 PCI_DMA_FROMDEVICE);
 
-               skb = buffer_info->skb;
-
                if(unlikely(!(staterr & E1000_RXD_STAT_EOP))) {
                        E1000_DBG("%s: Packet Split buffers didn't pick up"
                                  " the full packet\n", netdev->name);
@@ -3610,13 +3780,24 @@ e1000_clean_rx_irq_ps(struct e1000_adapt
 next_desc:
                rx_desc->wb.middle.status_error &= ~0xFF;
                buffer_info->skb = NULL;
-               if(unlikely(++i == rx_ring->count)) i = 0;
 
-               rx_desc = E1000_RX_DESC_PS(*rx_ring, i);
+               /* return some buffers to hardware, one at a time is too slow */
+               if (unlikely(cleaned_count >= E1000_RX_BUFFER_WRITE)) {
+                       adapter->alloc_rx_buf(adapter, rx_ring, cleaned_count);
+                       cleaned_count = 0;
+               }
+
+               /* use prefetched values */
+               rx_desc = next_rxd;
+               buffer_info = next_buffer;
+
                staterr = le32_to_cpu(rx_desc->wb.middle.status_error);
        }
        rx_ring->next_to_clean = i;
-       adapter->alloc_rx_buf(adapter, rx_ring);
+
+       cleaned_count = E1000_DESC_UNUSED(rx_ring);
+       if (cleaned_count)
+               adapter->alloc_rx_buf(adapter, rx_ring, cleaned_count);
 
        return cleaned;
 }
@@ -3628,7 +3737,8 @@ next_desc:
 
 static void
 e1000_alloc_rx_buffers(struct e1000_adapter *adapter,
-                       struct e1000_rx_ring *rx_ring)
+                       struct e1000_rx_ring *rx_ring,
+                      int cleaned_count)
 {
        struct net_device *netdev = adapter->netdev;
        struct pci_dev *pdev = adapter->pdev;
@@ -3641,8 +3819,14 @@ e1000_alloc_rx_buffers(struct e1000_adap
        i = rx_ring->next_to_use;
        buffer_info = &rx_ring->buffer_info[i];
 
-       while(!buffer_info->skb) {
-               skb = dev_alloc_skb(bufsz);
+       while(cleaned_count--) {
+               if(!(skb = buffer_info->skb))
+                       skb = dev_alloc_skb(bufsz);
+               else {
+                       skb->tail = skb->head;
+                       skb->len = 0;
+                       goto map_skb;
+               }
 
                if(unlikely(!skb)) {
                        /* Better luck next round */
@@ -3682,6 +3861,7 @@ e1000_alloc_rx_buffers(struct e1000_adap
 
                buffer_info->skb = skb;
                buffer_info->length = adapter->rx_buffer_len;
+map_skb:
                buffer_info->dma = pci_map_single(pdev,
                                                  skb->data,
                                                  adapter->rx_buffer_len,
@@ -3707,20 +3824,21 @@ e1000_alloc_rx_buffers(struct e1000_adap
                rx_desc = E1000_RX_DESC(*rx_ring, i);
                rx_desc->buffer_addr = cpu_to_le64(buffer_info->dma);
 
-               if(unlikely((i & ~(E1000_RX_BUFFER_WRITE - 1)) == i)) {
-                       /* Force memory writes to complete before letting h/w
-                        * know there are new descriptors to fetch.  (Only
-                        * applicable for weak-ordered memory model archs,
-                        * such as IA-64). */
-                       wmb();
-                       writel(i, adapter->hw.hw_addr + rx_ring->rdt);
-               }
-
                if(unlikely(++i == rx_ring->count)) i = 0;
                buffer_info = &rx_ring->buffer_info[i];
        }
 
-       rx_ring->next_to_use = i;
+       if (rx_ring->next_to_use != i) {
+               rx_ring->next_to_use = i;
+               if(unlikely(i-- == 0)) i = (rx_ring->count - 1);
+
+               /* Force memory writes to complete before letting h/w
+                * know there are new descriptors to fetch.  (Only
+                * applicable for weak-ordered memory model archs,
+                * such as IA-64). */
+               wmb();
+               writel(i, adapter->hw.hw_addr + rx_ring->rdt);
+       }
 }
 
 /**
@@ -3730,7 +3848,8 @@ e1000_alloc_rx_buffers(struct e1000_adap
 
 static void
 e1000_alloc_rx_buffers_ps(struct e1000_adapter *adapter,
-                          struct e1000_rx_ring *rx_ring)
+                          struct e1000_rx_ring *rx_ring,
+                         int cleaned_count)
 {
        struct net_device *netdev = adapter->netdev;
        struct pci_dev *pdev = adapter->pdev;
@@ -3746,7 +3925,7 @@ e1000_alloc_rx_buffers_ps(struct e1000_a
        ps_page = &rx_ring->ps_page[i];
        ps_page_dma = &rx_ring->ps_page_dma[i];
 
-       while(!buffer_info->skb) {
+       while (cleaned_count--) {
                rx_desc = E1000_RX_DESC_PS(*rx_ring, i);
 
                for(j = 0; j < PS_PAGE_BUFFERS; j++) {
@@ -3793,19 +3976,6 @@ e1000_alloc_rx_buffers_ps(struct e1000_a
 
                rx_desc->read.buffer_addr[0] = cpu_to_le64(buffer_info->dma);
 
-               if(unlikely((i & ~(E1000_RX_BUFFER_WRITE - 1)) == i)) {
-                       /* Force memory writes to complete before letting h/w
-                        * know there are new descriptors to fetch.  (Only
-                        * applicable for weak-ordered memory model archs,
-                        * such as IA-64). */
-                       wmb();
-                       /* Hardware increments by 16 bytes, but packet split
-                        * descriptors are 32 bytes...so we increment tail
-                        * twice as much.
-                        */
-                       writel(i<<1, adapter->hw.hw_addr + rx_ring->rdt);
-               }
-
                if(unlikely(++i == rx_ring->count)) i = 0;
                buffer_info = &rx_ring->buffer_info[i];
                ps_page = &rx_ring->ps_page[i];
@@ -3813,7 +3976,21 @@ 
        }
 
 no_buffers:
-       rx_ring->next_to_use = i;
+       if (rx_ring->next_to_use != i) {
+               rx_ring->next_to_use = i;
+               if(unlikely(i-- == 0)) i = (rx_ring->count - 1);
+
+               /* Force memory writes to complete before letting h/w
+                * know there are new descriptors to fetch.  (Only
+                * applicable for weak-ordered memory model archs,
+                * such as IA-64). */
+               wmb();
+               /* Hardware increments by 16 bytes, but packet split
+                * descriptors are 32 bytes...so we increment tail
+                * twice as much.
+                */
+               writel(i<<1, adapter->hw.hw_addr + rx_ring->rdt);
+       }
 }
 
 /**

Reply via email to