From: Nelio Laranjeiro <nelio.laranje...@6wind.com> Prefetching initial bytes of mbuf structures earlier and in two cache lines instead of one improves performance of mlx4_rx_burst(), which accesses the mbuf->next field not present in the first 128 bytes.
Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro at 6wind.com> --- drivers/net/mlx4/mlx4.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/net/mlx4/mlx4.c b/drivers/net/mlx4/mlx4.c index cb08ee8..4cf2f7d 100644 --- a/drivers/net/mlx4/mlx4.c +++ b/drivers/net/mlx4/mlx4.c @@ -2824,6 +2824,12 @@ mlx4_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) assert(wr->num_sge == 1); assert(elts_head < rxq->elts_n); assert(rxq->elts_head < rxq->elts_n); + /* + * Fetch initial bytes of packet descriptor into a + * cacheline while allocating rep. + */ + rte_prefetch0(seg); + rte_prefetch0(&seg->cacheline1); ret = rxq->if_cq->poll_length_flags(rxq->cq, NULL, NULL, &flags); if (unlikely(ret < 0)) { @@ -2861,11 +2867,6 @@ mlx4_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) if (ret == 0) break; len = ret; - /* - * Fetch initial bytes of packet descriptor into a - * cacheline while allocating rep. - */ - rte_prefetch0(seg); rep = __rte_mbuf_raw_alloc(rxq->mp); if (unlikely(rep == NULL)) { /* -- 2.1.0