For i40e driver, enable direct re-arm mode. This patch supports the
case of mapping Rx/Tx queues from the same single lcore.

Suggested-by: Honnappa Nagarahalli <honnappa.nagaraha...@arm.com>
Signed-off-by: Feifei Wang <feifei.wa...@arm.com>
Reviewed-by: Ruifeng Wang <ruifeng.w...@arm.com>
---
 drivers/net/i40e/i40e_rxtx.h          |   4 +
 drivers/net/i40e/i40e_rxtx_vec_neon.c | 149 +++++++++++++++++++++++++-
 2 files changed, 151 insertions(+), 2 deletions(-)

diff --git a/drivers/net/i40e/i40e_rxtx.h b/drivers/net/i40e/i40e_rxtx.h
index 5e6eecc501..1fdf4305f4 100644
--- a/drivers/net/i40e/i40e_rxtx.h
+++ b/drivers/net/i40e/i40e_rxtx.h
@@ -102,6 +102,8 @@ struct i40e_rx_queue {
 
        uint16_t rxrearm_nb;    /**< number of remaining to be re-armed */
        uint16_t rxrearm_start; /**< the idx we start the re-arming from */
+       uint16_t direct_rxrearm_port; /** device TX port ID for direct re-arm 
mode */
+       uint16_t direct_rxrearm_queue; /** TX queue index for direct re-arm 
mode */
        uint64_t mbuf_initializer; /**< value to init mbufs */
 
        uint16_t port_id; /**< device port ID */
@@ -121,6 +123,8 @@ struct i40e_rx_queue {
        uint16_t rx_using_sse; /**<flag indicate the usage of vPMD for rx */
        uint8_t dcb_tc;         /**< Traffic class of rx queue */
        uint64_t offloads; /**< Rx offload flags of RTE_ETH_RX_OFFLOAD_* */
+       /**<  0 if direct re-arm mode disabled, 1 when enabled */
+       bool direct_rxrearm_enable;
        const struct rte_memzone *mz;
 };
 
diff --git a/drivers/net/i40e/i40e_rxtx_vec_neon.c 
b/drivers/net/i40e/i40e_rxtx_vec_neon.c
index b951ea2dc3..72bac3fb40 100644
--- a/drivers/net/i40e/i40e_rxtx_vec_neon.c
+++ b/drivers/net/i40e/i40e_rxtx_vec_neon.c
@@ -77,6 +77,147 @@ i40e_rxq_rearm(struct i40e_rx_queue *rxq)
        I40E_PCI_REG_WRITE_RELAXED(rxq->qrx_tail, rx_id);
 }
 
+static inline void
+i40e_rxq_rearm_direct_single(struct i40e_rx_queue *rxq)
+{
+       struct rte_eth_dev *dev;
+       struct i40e_tx_queue *txq;
+       volatile union i40e_rx_desc *rxdp;
+       struct i40e_tx_entry *txep;
+       struct i40e_rx_entry *rxep;
+       uint16_t tx_port_id, tx_queue_id;
+       uint16_t rx_id;
+       struct rte_mbuf *mb0, *mb1, *m;
+       uint64x2_t dma_addr0, dma_addr1;
+       uint64x2_t zero = vdupq_n_u64(0);
+       uint64_t paddr;
+       uint16_t i, n;
+       uint16_t nb_rearm = 0;
+
+       rxdp = rxq->rx_ring + rxq->rxrearm_start;
+       rxep = &rxq->sw_ring[rxq->rxrearm_start];
+
+       tx_port_id = rxq->direct_rxrearm_port;
+       tx_queue_id = rxq->direct_rxrearm_queue;
+       dev = &rte_eth_devices[tx_port_id];
+       txq = dev->data->tx_queues[tx_queue_id];
+
+       /* tx_rs_thresh must be equal to
+        * RTE_I40E_RXQ_REARM_THRESH in
+        * direct re-arm mode due to
+        * tx_next_dd update based on the
+        * number of free buffers in the
+        * next time
+        */
+       n = RTE_I40E_RXQ_REARM_THRESH;
+
+       if (txq->nb_tx_free < txq->tx_free_thresh) {
+               /* check DD bits on threshold descriptor */
+               if ((txq->tx_ring[txq->tx_next_dd].cmd_type_offset_bsz &
+                               rte_cpu_to_le_64(I40E_TXD_QW1_DTYPE_MASK)) !=
+                               rte_cpu_to_le_64(I40E_TX_DESC_DTYPE_DESC_DONE)) 
{
+                       goto mempool_bulk;
+               }
+
+               /* first buffer to free from S/W ring is at index
+                * tx_next_dd - (tx_rs_thresh-1)
+                */
+               txep = &txq->sw_ring[txq->tx_next_dd - (n - 1)];
+
+               if (txq->offloads & DEV_TX_OFFLOAD_MBUF_FAST_FREE) {
+                       /* directly put mbufs from Tx to Rx,
+                        * and initialize the mbufs in vector,
+                        * process 2 mbufs in one loop
+                        */
+                       for (i = 0; i < n; i += 2, rxep += 2, txep += 2) {
+                               rxep[0].mbuf = txep[0].mbuf;
+                               rxep[1].mbuf = txep[1].mbuf;
+
+                               /* Initialize rxdp descs */
+                               mb0 = txep[0].mbuf;
+                               mb1 = txep[1].mbuf;
+
+                               paddr = mb0->buf_iova + RTE_PKTMBUF_HEADROOM;
+                               dma_addr0 = vdupq_n_u64(paddr);
+                               /* flush desc with pa dma_addr */
+                               vst1q_u64((uint64_t *)&rxdp++->read, dma_addr0);
+
+                               paddr = mb1->buf_iova + RTE_PKTMBUF_HEADROOM;
+                               dma_addr1 = vdupq_n_u64(paddr);
+                               /* flush desc with pa dma_addr */
+                               vst1q_u64((uint64_t *)&rxdp++->read, dma_addr1);
+                       }
+               } else {
+                       for (i = 0; i < n; i++) {
+                               m = rte_pktmbuf_prefree_seg(txep[i].mbuf);
+                               if (m != NULL) {
+                                       rxep[i].mbuf = m;
+
+                                       /* Initialize rxdp descs */
+                                       paddr = m->buf_iova + 
RTE_PKTMBUF_HEADROOM;
+                                       dma_addr0 = vdupq_n_u64(paddr);
+                                       /* flush desc with pa dma_addr */
+                                       vst1q_u64((uint64_t *)&rxdp++->read, 
dma_addr0);
+                                       nb_rearm++;
+                               }
+                       }
+                       n = nb_rearm;
+               }
+
+               /* update counters for Tx */
+               txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + 
RTE_I40E_RXQ_REARM_THRESH);
+               txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + 
RTE_I40E_RXQ_REARM_THRESH);
+               if (txq->tx_next_dd >= txq->nb_tx_desc)
+                       txq->tx_next_dd = (uint16_t)(RTE_I40E_RXQ_REARM_THRESH 
- 1);
+       } else {
+mempool_bulk:
+               /* if TX did not free bufs into Rx sw-ring,
+                * get new bufs from mempool
+                */
+               if (unlikely(rte_mempool_get_bulk(rxq->mp, (void *)rxep, n) < 
0)) {
+                       if (rxq->rxrearm_nb + n >= rxq->nb_rx_desc) {
+                               for (i = 0; i < RTE_I40E_DESCS_PER_LOOP; i++) {
+                                       rxep[i].mbuf = &rxq->fake_mbuf;
+                                       vst1q_u64((uint64_t *)&rxdp[i].read, 
zero);
+                               }
+                       }
+                       
rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed += n;
+                       return;
+               }
+
+               /* Initialize the mbufs in vector, process 2 mbufs in one loop 
*/
+               for (i = 0; i < n; i += 2, rxep += 2) {
+                       mb0 = rxep[0].mbuf;
+                       mb1 = rxep[1].mbuf;
+
+                       paddr = mb0->buf_iova + RTE_PKTMBUF_HEADROOM;
+                       dma_addr0 = vdupq_n_u64(paddr);
+                       /* flush desc with pa dma_addr */
+                       vst1q_u64((uint64_t *)&rxdp++->read, dma_addr0);
+
+                       paddr = mb1->buf_iova + RTE_PKTMBUF_HEADROOM;
+                       dma_addr1 = vdupq_n_u64(paddr);
+                       /* flush desc with pa dma_addr */
+                       vst1q_u64((uint64_t *)&rxdp++->read, dma_addr1);
+               }
+       }
+
+       /* Update the descriptor initializer index */
+       rxq->rxrearm_start += n;
+       rx_id = rxq->rxrearm_start - 1;
+
+       if (unlikely(rxq->rxrearm_start >= rxq->nb_rx_desc)) {
+               rxq->rxrearm_start = 0;
+               rx_id = rxq->nb_rx_desc - 1;
+       }
+
+       rxq->rxrearm_nb -= n;
+
+       rte_io_wmb();
+       /* Update the tail pointer on the NIC */
+       I40E_PCI_REG_WRITE_RELAXED(rxq->qrx_tail, rx_id);
+}
+
 static inline void
 desc_to_olflags_v(struct i40e_rx_queue *rxq, uint64x2_t descs[4],
                  struct rte_mbuf **rx_pkts)
@@ -244,8 +385,12 @@ _recv_raw_pkts_vec(struct i40e_rx_queue *__rte_restrict 
rxq,
        /* See if we need to rearm the RX queue - gives the prefetch a bit
         * of time to act
         */
-       if (rxq->rxrearm_nb > RTE_I40E_RXQ_REARM_THRESH)
-               i40e_rxq_rearm(rxq);
+       if (rxq->rxrearm_nb > RTE_I40E_RXQ_REARM_THRESH) {
+               if (rxq->direct_rxrearm_enable)
+                       i40e_rxq_rearm_direct_single(rxq);
+               else
+                       i40e_rxq_rearm(rxq);
+       }
 
        /* Before we start moving massive data around, check to see if
         * there is actually a packet available
-- 
2.25.1

Reply via email to