From: Nelio Laranjeiro <nelio.laranje...@6wind.com>

Prefetching initial bytes of mbuf structures earlier and in two cache lines
instead of one improves performance of mlx4_rx_burst(), which accesses the
mbuf->next field not present in the first 128 bytes.

Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro at 6wind.com>
---
 drivers/net/mlx4/mlx4.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/drivers/net/mlx4/mlx4.c b/drivers/net/mlx4/mlx4.c
index cb08ee8..4cf2f7d 100644
--- a/drivers/net/mlx4/mlx4.c
+++ b/drivers/net/mlx4/mlx4.c
@@ -2824,6 +2824,12 @@ mlx4_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, 
uint16_t pkts_n)
                assert(wr->num_sge == 1);
                assert(elts_head < rxq->elts_n);
                assert(rxq->elts_head < rxq->elts_n);
+               /*
+                * Fetch initial bytes of packet descriptor into a
+                * cacheline while allocating rep.
+                */
+               rte_prefetch0(seg);
+               rte_prefetch0(&seg->cacheline1);
                ret = rxq->if_cq->poll_length_flags(rxq->cq, NULL, NULL,
                                                    &flags);
                if (unlikely(ret < 0)) {
@@ -2861,11 +2867,6 @@ mlx4_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, 
uint16_t pkts_n)
                if (ret == 0)
                        break;
                len = ret;
-               /*
-                * Fetch initial bytes of packet descriptor into a
-                * cacheline while allocating rep.
-                */
-               rte_prefetch0(seg);
                rep = __rte_mbuf_raw_alloc(rxq->mp);
                if (unlikely(rep == NULL)) {
                        /*
-- 
2.1.0

Reply via email to