Adds ethdev driver prefetch of variable structure to CPU cache 0 while calling into tx or rx device driver operation.
RFC 2544 test of NIC task test measurement points show improvement of lower latency and/or better packet throughput indicating clock cycles saved. Signed-off-by: Mike A. Polehn <mike.a.polehn at intel.com> --- lib/librte_ether/rte_ethdev.h | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h index 48a540d..f1c35de 100644 --- a/lib/librte_ether/rte_ethdev.h +++ b/lib/librte_ether/rte_ethdev.h @@ -2458,12 +2458,17 @@ rte_eth_rx_burst(uint8_t port_id, uint16_t queue_id, struct rte_mbuf **rx_pkts, const uint16_t nb_pkts) { struct rte_eth_dev *dev; + int16_t nb_rx; dev = &rte_eth_devices[port_id]; - int16_t nb_rx = (*dev->rx_pkt_burst)(dev->data->rx_queues[queue_id], - rx_pkts, nb_pkts); + { /* limit scope of rxq variable */ + /* rxq is going to be immediately used, prefetch it */ + void *rxq = dev->data->rx_queues[queue_id]; + rte_prefetch0(rxq); + nb_rx = (*dev->rx_pkt_burst)(rxq, rx_pkts, nb_pkts); + } #ifdef RTE_ETHDEV_RXTX_CALLBACKS struct rte_eth_rxtx_callback *cb = dev->post_rx_burst_cbs[queue_id]; @@ -2600,6 +2605,7 @@ rte_eth_tx_burst(uint8_t port_id, uint16_t queue_id, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) { struct rte_eth_dev *dev; + void *txq; dev = &rte_eth_devices[port_id]; @@ -2615,7 +2621,11 @@ rte_eth_tx_burst(uint8_t port_id, uint16_t queue_id, } #endif - return (*dev->tx_pkt_burst)(dev->data->tx_queues[queue_id], tx_pkts, nb_pkts); + /* txq is going to be immediately used, prefetch it */ + txq = dev->data->tx_queues[queue_id]; + rte_prefetch0(txq); + + return (*dev->tx_pkt_burst)(txq, tx_pkts, nb_pkts); } #endif -- 2.6.0