For i40e driver, enable direct re-arm mode. This patch supports the case of mapping Rx/Tx queues from the same single lcore.
Suggested-by: Honnappa Nagarahalli <honnappa.nagaraha...@arm.com> Signed-off-by: Feifei Wang <feifei.wa...@arm.com> Reviewed-by: Ruifeng Wang <ruifeng.w...@arm.com> Reviewed-by: Honnappa Nagarahalli <honnappa.nagaraha...@arm.com> --- drivers/net/i40e/i40e_ethdev.c | 1 + drivers/net/i40e/i40e_ethdev.h | 2 + drivers/net/i40e/i40e_rxtx.c | 19 ++++++ drivers/net/i40e/i40e_rxtx.h | 2 + drivers/net/i40e/i40e_rxtx_vec_neon.c | 93 +++++++++++++++++++++++++++ 5 files changed, 117 insertions(+) diff --git a/drivers/net/i40e/i40e_ethdev.c b/drivers/net/i40e/i40e_ethdev.c index 65e689df32..649ec06f31 100644 --- a/drivers/net/i40e/i40e_ethdev.c +++ b/drivers/net/i40e/i40e_ethdev.c @@ -497,6 +497,7 @@ static const struct eth_dev_ops i40e_eth_dev_ops = { .flow_ops_get = i40e_dev_flow_ops_get, .rxq_info_get = i40e_rxq_info_get, .txq_info_get = i40e_txq_info_get, + .txq_data_get = i40e_txq_data_get, .rx_burst_mode_get = i40e_rx_burst_mode_get, .tx_burst_mode_get = i40e_tx_burst_mode_get, .timesync_enable = i40e_timesync_enable, diff --git a/drivers/net/i40e/i40e_ethdev.h b/drivers/net/i40e/i40e_ethdev.h index fe943a45ff..ee13730917 100644 --- a/drivers/net/i40e/i40e_ethdev.h +++ b/drivers/net/i40e/i40e_ethdev.h @@ -1352,6 +1352,8 @@ void i40e_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id, struct rte_eth_rxq_info *qinfo); void i40e_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id, struct rte_eth_txq_info *qinfo); +void i40e_txq_data_get(struct rte_eth_dev *dev, uint16_t queue_id, + struct rte_eth_txq_data *txq_data); int i40e_rx_burst_mode_get(struct rte_eth_dev *dev, uint16_t queue_id, struct rte_eth_burst_mode *mode); int i40e_tx_burst_mode_get(struct rte_eth_dev *dev, uint16_t queue_id, diff --git a/drivers/net/i40e/i40e_rxtx.c b/drivers/net/i40e/i40e_rxtx.c index 788ffb51c2..be92ccd38a 100644 --- a/drivers/net/i40e/i40e_rxtx.c +++ b/drivers/net/i40e/i40e_rxtx.c @@ -3197,6 +3197,24 @@ i40e_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id, qinfo->conf.offloads = txq->offloads; } +void +i40e_txq_data_get(struct rte_eth_dev *dev, uint16_t queue_id, + struct rte_eth_txq_data *txq_data) +{ + struct i40e_tx_queue *txq; + + txq = dev->data->tx_queues[queue_id]; + + txq_data->offloads = &txq->offloads; + txq_data->tx_sw_ring = txq->sw_ring; + txq_data->tx_ring = txq->tx_ring; + txq_data->tx_next_dd = &txq->tx_next_dd; + txq_data->nb_tx_free = &txq->nb_tx_free; + txq_data->nb_tx_desc = txq->nb_tx_desc; + txq_data->tx_rs_thresh = txq->tx_rs_thresh; + txq_data->tx_free_thresh = txq->tx_free_thresh; +} + #ifdef RTE_ARCH_X86 static inline bool get_avx_supported(bool request_avx512) @@ -3321,6 +3339,7 @@ i40e_set_rx_function(struct rte_eth_dev *dev) PMD_INIT_LOG(DEBUG, "Using Vector Rx (port %d).", dev->data->port_id); dev->rx_pkt_burst = i40e_recv_pkts_vec; + dev->rx_direct_rearm = i40e_direct_rearm_vec; } #endif /* RTE_ARCH_X86 */ } else if (!dev->data->scattered_rx && ad->rx_bulk_alloc_allowed) { diff --git a/drivers/net/i40e/i40e_rxtx.h b/drivers/net/i40e/i40e_rxtx.h index 5e6eecc501..7a8fa2d1e8 100644 --- a/drivers/net/i40e/i40e_rxtx.h +++ b/drivers/net/i40e/i40e_rxtx.h @@ -216,6 +216,8 @@ uint16_t i40e_simple_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts); uint16_t i40e_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts); +uint16_t i40e_direct_rearm_vec(void *rx_queue, + struct rte_eth_txq_data *txq_data); int i40e_tx_queue_init(struct i40e_tx_queue *txq); int i40e_rx_queue_init(struct i40e_rx_queue *rxq); void i40e_free_tx_resources(struct i40e_tx_queue *txq); diff --git a/drivers/net/i40e/i40e_rxtx_vec_neon.c b/drivers/net/i40e/i40e_rxtx_vec_neon.c index 12e6f1cbcb..84e0159e08 100644 --- a/drivers/net/i40e/i40e_rxtx_vec_neon.c +++ b/drivers/net/i40e/i40e_rxtx_vec_neon.c @@ -762,3 +762,96 @@ i40e_rx_vec_dev_conf_condition_check(struct rte_eth_dev *dev) { return i40e_rx_vec_dev_conf_condition_check_default(dev); } + +uint16_t +i40e_direct_rearm_vec(void *rx_queue, struct rte_eth_txq_data *txq_data) +{ + struct i40e_rx_queue *rxq = rx_queue; + struct i40e_rx_entry *rxep; + struct i40e_tx_entry *txep; + volatile union i40e_rx_desc *rxdp; + volatile struct i40e_tx_desc *tx_ring; + struct rte_mbuf *m; + uint16_t rx_id; + uint64x2_t dma_addr; + uint64_t paddr; + uint16_t i, n; + uint16_t nb_rearm; + + if (rxq->rxrearm_nb > txq_data->tx_rs_thresh && + *txq_data->nb_tx_free < txq_data->tx_free_thresh) { + tx_ring = txq_data->tx_ring; + /* check DD bits on threshold descriptor */ + if ((tx_ring[*txq_data->tx_next_dd].cmd_type_offset_bsz & + rte_cpu_to_le_64(I40E_TXD_QW1_DTYPE_MASK)) != + rte_cpu_to_le_64(I40E_TX_DESC_DTYPE_DESC_DONE)) { + return 0; + } + n = txq_data->tx_rs_thresh; + + /* first buffer to free from S/W ring is at index + * tx_next_dd - (tx_rs_thresh-1) + */ + txep = txq_data->tx_sw_ring; + txep += *txq_data->tx_next_dd - (txq_data->tx_rs_thresh - 1); + rxep = &rxq->sw_ring[rxq->rxrearm_start]; + rxdp = rxq->rx_ring + rxq->rxrearm_start; + + if (*txq_data->offloads & RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE) { + /* directly put mbufs from Tx to Rx, + * and initialize the mbufs in vector + */ + for (i = 0; i < n; i++, rxep++, txep++) { + rxep[0].mbuf = txep[0].mbuf; + + /* Initialize rxdp descs */ + m = txep[0].mbuf; + paddr = m->buf_iova + RTE_PKTMBUF_HEADROOM; + dma_addr = vdupq_n_u64(paddr); + /* flush desc with pa dma_addr */ + vst1q_u64((uint64_t *)&rxdp++->read, dma_addr); + } + } else { + for (i = 0, nb_rearm = 0; i < n; i++) { + m = rte_pktmbuf_prefree_seg(txep[i].mbuf); + if (m != NULL) { + rxep[i].mbuf = m; + + /* Initialize rxdp descs */ + paddr = m->buf_iova + RTE_PKTMBUF_HEADROOM; + dma_addr = vdupq_n_u64(paddr); + /* flush desc with pa dma_addr */ + vst1q_u64((uint64_t *)&rxdp++->read, dma_addr); + nb_rearm++; + } + } + n = nb_rearm; + } + + /* update counters for Tx */ + *txq_data->nb_tx_free = *txq_data->nb_tx_free + txq_data->tx_rs_thresh; + *txq_data->tx_next_dd = *txq_data->tx_next_dd + txq_data->tx_rs_thresh; + if (*txq_data->tx_next_dd >= txq_data->nb_tx_desc) + *txq_data->tx_next_dd = txq_data->tx_rs_thresh - 1; + + /* Update the descriptor initializer index */ + rxq->rxrearm_start += n; + rx_id = rxq->rxrearm_start - 1; + + if (unlikely(rxq->rxrearm_start >= rxq->nb_rx_desc)) { + rxq->rxrearm_start = rxq->rxrearm_start - rxq->nb_rx_desc; + if (!rxq->rxrearm_start) + rx_id = rxq->nb_rx_desc - 1; + else + rx_id = rxq->rxrearm_start - 1; + } + rxq->rxrearm_nb -= n; + + rte_io_wmb(); + /* Update the tail pointer on the NIC */ + I40E_PCI_REG_WRITE_RELAXED(rxq->qrx_tail, rx_id); + return n; + } + + return 0; +} -- 2.25.1