Move prefetch code out of Tx coalesce path to allow prefetching for non-coalesced Tx packets, as well.
Cc: sta...@dpdk.org Fixes: bf89cbedd2d9 ("cxgbe: optimize forwarding performance for 40G") Signed-off-by: Rahul Lakkireddy <rahul.lakkire...@chelsio.com> --- v2: - No changes. drivers/net/cxgbe/cxgbe_ethdev.c | 9 +++++++-- drivers/net/cxgbe/sge.c | 1 - 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/net/cxgbe/cxgbe_ethdev.c b/drivers/net/cxgbe/cxgbe_ethdev.c index 7d7be69ed..5d74f8ba3 100644 --- a/drivers/net/cxgbe/cxgbe_ethdev.c +++ b/drivers/net/cxgbe/cxgbe_ethdev.c @@ -67,6 +67,7 @@ uint16_t cxgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, struct sge_eth_txq *txq = (struct sge_eth_txq *)tx_queue; uint16_t pkts_sent, pkts_remain; uint16_t total_sent = 0; + uint16_t idx = 0; int ret = 0; CXGBE_DEBUG_TX(adapter, "%s: txq = %p; tx_pkts = %p; nb_pkts = %d\n", @@ -75,12 +76,16 @@ uint16_t cxgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, t4_os_lock(&txq->txq_lock); /* free up desc from already completed tx */ reclaim_completed_tx(&txq->q); + rte_prefetch0(rte_pktmbuf_mtod(tx_pkts[0], volatile void *)); while (total_sent < nb_pkts) { pkts_remain = nb_pkts - total_sent; for (pkts_sent = 0; pkts_sent < pkts_remain; pkts_sent++) { - ret = t4_eth_xmit(txq, tx_pkts[total_sent + pkts_sent], - nb_pkts); + idx = total_sent + pkts_sent; + if ((idx + 1) < nb_pkts) + rte_prefetch0(rte_pktmbuf_mtod(tx_pkts[idx + 1], + volatile void *)); + ret = t4_eth_xmit(txq, tx_pkts[idx], nb_pkts); if (ret < 0) break; } diff --git a/drivers/net/cxgbe/sge.c b/drivers/net/cxgbe/sge.c index 641be9657..bf3190211 100644 --- a/drivers/net/cxgbe/sge.c +++ b/drivers/net/cxgbe/sge.c @@ -1154,7 +1154,6 @@ int t4_eth_xmit(struct sge_eth_txq *txq, struct rte_mbuf *mbuf, txq->stats.mapping_err++; goto out_free; } - rte_prefetch0((volatile void *)addr); return tx_do_packet_coalesce(txq, mbuf, cflits, adap, pi, addr, nb_pkts); } else { -- 2.18.0