Adds Eth driver prefetch variable structure to CPU cache 0 while calling into 
tx or rx 
device driver operation.

RFC 2544 test of NIC task test measurement points show improvement of lower 
latency and/or better packet throughput indicating clock cycles saved.

Signed-off-by: Mike A. Polehn <mike.a.polehn at intel.com>

diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
index 8a8c82b..09f1069 100644
--- a/lib/librte_ether/rte_ethdev.h
+++ b/lib/librte_ether/rte_ethdev.h
@@ -2357,11 +2357,15 @@ rte_eth_rx_burst(uint8_t port_id, uint16_t queue_id,
                 struct rte_mbuf **rx_pkts, const uint16_t nb_pkts)
 {
        struct rte_eth_dev *dev;
+       void *rxq;

        dev = &rte_eth_devices[port_id];

-       int16_t nb_rx = (*dev->rx_pkt_burst)(dev->data->rx_queues[queue_id],
-                       rx_pkts, nb_pkts);
+       /* rxq is going to be immediately used, prefetch it */
+       rxq = dev->data->rx_queues[queue_id];
+       rte_prefetch0(rxq);
+
+       int16_t nb_rx = (*dev->rx_pkt_burst)(rxq, rx_pkts, nb_pkts);

 #ifdef RTE_ETHDEV_RXTX_CALLBACKS
        struct rte_eth_rxtx_callback *cb = dev->post_rx_burst_cbs[queue_id];
@@ -2499,6 +2503,7 @@ rte_eth_tx_burst(uint8_t port_id, uint16_t queue_id,
                 struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
 {
        struct rte_eth_dev *dev;
+       void *txq;

        dev = &rte_eth_devices[port_id];

@@ -2514,7 +2519,11 @@ rte_eth_tx_burst(uint8_t port_id, uint16_t queue_id,
        }
 #endif

-       return (*dev->tx_pkt_burst)(dev->data->tx_queues[queue_id], tx_pkts, 
nb_pkts);
+       /* txq is going to be immediately used, prefetch it */
+       txq = dev->data->tx_queues[queue_id];
+       rte_prefetch0(txq);
+
+       return (*dev->tx_pkt_burst)(txq, tx_pkts, nb_pkts);
 }
 #endif

Reply via email to