In xmit process, the variables are set many times. In fact,
it is enough for these variables to be set once.
After a long time test, the throughput performance is better
than before.

CC: Srinivas Eeda <srinivas.e...@oracle.com>
CC: Joe Jin <joe....@oracle.com>
CC: Junxiao Bi <junxiao...@oracle.com>
Signed-off-by: Zhu Yanjun <yanjun....@oracle.com>
---
 drivers/net/ethernet/nvidia/forcedeth.c | 28 ++++++++++++++++++++--------
 1 file changed, 20 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/nvidia/forcedeth.c 
b/drivers/net/ethernet/nvidia/forcedeth.c
index 63a9e1e..22912e7 100644
--- a/drivers/net/ethernet/nvidia/forcedeth.c
+++ b/drivers/net/ethernet/nvidia/forcedeth.c
@@ -2218,8 +2218,6 @@ static netdev_tx_t nv_start_xmit(struct sk_buff *skb, 
struct net_device *dev)
 
        /* setup the header buffer */
        do {
-               prev_tx = put_tx;
-               prev_tx_ctx = np->put_tx_ctx;
                bcnt = (size > NV_TX2_TSO_MAX_SIZE) ? NV_TX2_TSO_MAX_SIZE : 
size;
                np->put_tx_ctx->dma = dma_map_single(&np->pci_dev->dev,
                                                     skb->data + offset, bcnt,
@@ -2254,8 +2252,6 @@ static netdev_tx_t nv_start_xmit(struct sk_buff *skb, 
struct net_device *dev)
                offset = 0;
 
                do {
-                       prev_tx = put_tx;
-                       prev_tx_ctx = np->put_tx_ctx;
                        if (!start_tx_ctx)
                                start_tx_ctx = tmp_tx_ctx = np->put_tx_ctx;
 
@@ -2296,6 +2292,16 @@ static netdev_tx_t nv_start_xmit(struct sk_buff *skb, 
struct net_device *dev)
                } while (frag_size);
        }
 
+       if (unlikely(put_tx == np->first_tx.orig))
+               prev_tx = np->last_tx.orig;
+       else
+               prev_tx = put_tx - 1;
+
+       if (unlikely(np->put_tx_ctx == np->first_tx_ctx))
+               prev_tx_ctx = np->last_tx_ctx;
+       else
+               prev_tx_ctx = np->put_tx_ctx - 1;
+
        /* set last fragment flag  */
        prev_tx->flaglen |= cpu_to_le32(tx_flags_extra);
 
@@ -2368,8 +2374,6 @@ static netdev_tx_t nv_start_xmit_optimized(struct sk_buff 
*skb,
 
        /* setup the header buffer */
        do {
-               prev_tx = put_tx;
-               prev_tx_ctx = np->put_tx_ctx;
                bcnt = (size > NV_TX2_TSO_MAX_SIZE) ? NV_TX2_TSO_MAX_SIZE : 
size;
                np->put_tx_ctx->dma = dma_map_single(&np->pci_dev->dev,
                                                     skb->data + offset, bcnt,
@@ -2405,8 +2409,6 @@ static netdev_tx_t nv_start_xmit_optimized(struct sk_buff 
*skb,
                offset = 0;
 
                do {
-                       prev_tx = put_tx;
-                       prev_tx_ctx = np->put_tx_ctx;
                        bcnt = (frag_size > NV_TX2_TSO_MAX_SIZE) ? 
NV_TX2_TSO_MAX_SIZE : frag_size;
                        if (!start_tx_ctx)
                                start_tx_ctx = tmp_tx_ctx = np->put_tx_ctx;
@@ -2447,6 +2449,16 @@ static netdev_tx_t nv_start_xmit_optimized(struct 
sk_buff *skb,
                } while (frag_size);
        }
 
+       if (unlikely(put_tx == np->first_tx.ex))
+               prev_tx = np->last_tx.ex;
+       else
+               prev_tx = put_tx - 1;
+
+       if (unlikely(np->put_tx_ctx == np->first_tx_ctx))
+               prev_tx_ctx = np->last_tx_ctx;
+       else
+               prev_tx_ctx = np->put_tx_ctx - 1;
+
        /* set last fragment flag  */
        prev_tx->flaglen |= cpu_to_le32(NV_TX2_LASTPACKET);
 
-- 
2.7.4

Reply via email to