Adds ethdev driver prefetch of variable structure to CPU cache 0
while calling into tx or rx device driver operation.

RFC 2544 test of NIC task test measurement points show improvement
of lower latency and/or better packet throughput indicating clock
cycles saved.

Signed-off-by: Mike A. Polehn <mike.a.polehn at intel.com>
---
 lib/librte_ether/rte_ethdev.h | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
index 48a540d..f1c35de 100644
--- a/lib/librte_ether/rte_ethdev.h
+++ b/lib/librte_ether/rte_ethdev.h
@@ -2458,12 +2458,17 @@ rte_eth_rx_burst(uint8_t port_id, uint16_t queue_id,
                 struct rte_mbuf **rx_pkts, const uint16_t nb_pkts)
 {
        struct rte_eth_dev *dev;
+       int16_t nb_rx;

        dev = &rte_eth_devices[port_id];

-       int16_t nb_rx = (*dev->rx_pkt_burst)(dev->data->rx_queues[queue_id],
-                       rx_pkts, nb_pkts);
+       { /* limit scope of rxq variable */
+               /* rxq is going to be immediately used, prefetch it */
+               void *rxq = dev->data->rx_queues[queue_id];
+               rte_prefetch0(rxq);

+               nb_rx = (*dev->rx_pkt_burst)(rxq, rx_pkts, nb_pkts);
+       }
 #ifdef RTE_ETHDEV_RXTX_CALLBACKS
        struct rte_eth_rxtx_callback *cb = dev->post_rx_burst_cbs[queue_id];

@@ -2600,6 +2605,7 @@ rte_eth_tx_burst(uint8_t port_id, uint16_t queue_id,
                 struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
 {
        struct rte_eth_dev *dev;
+       void *txq;

        dev = &rte_eth_devices[port_id];

@@ -2615,7 +2621,11 @@ rte_eth_tx_burst(uint8_t port_id, uint16_t queue_id,
        }
 #endif

-       return (*dev->tx_pkt_burst)(dev->data->tx_queues[queue_id], tx_pkts, 
nb_pkts);
+       /* txq is going to be immediately used, prefetch it */
+       txq = dev->data->tx_queues[queue_id];
+       rte_prefetch0(txq);
+
+       return (*dev->tx_pkt_burst)(txq, tx_pkts, nb_pkts);
 }
 #endif

-- 
2.6.0

Reply via email to