> -----邮件原件-----
> 发件人: Konstantin Ananyev <konstantin.v.anan...@yandex.ru>
> 发送时间: Thursday, February 2, 2023 10:38 PM
> 收件人: Feifei Wang <feifei.wa...@arm.com>; Yuying Zhang
> <yuying.zh...@intel.com>; Beilei Xing <beilei.x...@intel.com>; Ruifeng
> Wang <ruifeng.w...@arm.com>
> 抄送: dev@dpdk.org; nd <n...@arm.com>; Honnappa Nagarahalli
> <honnappa.nagaraha...@arm.com>
> 主题: Re: [PATCH v3 2/3] net/i40e: enable direct rearm with separate API
> 
> 04/01/2023 07:30, Feifei Wang пишет:
> > Add internal API to separate direct rearm operations between Rx and
> > Tx.
> >
> > Suggested-by: Honnappa Nagarahalli <honnappa.nagaraha...@arm.com>
> > Signed-off-by: Feifei Wang <feifei.wa...@arm.com>
> > Reviewed-by: Ruifeng Wang <ruifeng.w...@arm.com>
> > Reviewed-by: Honnappa Nagarahalli <honnappa.nagaraha...@arm.com>
> > ---
> >   drivers/net/i40e/i40e_ethdev.c          |  1 +
> >   drivers/net/i40e/i40e_ethdev.h          |  2 +
> >   drivers/net/i40e/i40e_rxtx.c            | 19 +++++++++
> >   drivers/net/i40e/i40e_rxtx.h            |  4 ++
> >   drivers/net/i40e/i40e_rxtx_vec_common.h | 54
> +++++++++++++++++++++++++
> >   drivers/net/i40e/i40e_rxtx_vec_neon.c   | 42 +++++++++++++++++++
> >   6 files changed, 122 insertions(+)
> >
> > diff --git a/drivers/net/i40e/i40e_ethdev.c
> > b/drivers/net/i40e/i40e_ethdev.c index 7726a89d99..29c1ce2470 100644
> > --- a/drivers/net/i40e/i40e_ethdev.c
> > +++ b/drivers/net/i40e/i40e_ethdev.c
> > @@ -497,6 +497,7 @@ static const struct eth_dev_ops i40e_eth_dev_ops
> = {
> >     .flow_ops_get                 = i40e_dev_flow_ops_get,
> >     .rxq_info_get                 = i40e_rxq_info_get,
> >     .txq_info_get                 = i40e_txq_info_get,
> > +   .rxq_rearm_data_get           = i40e_rxq_rearm_data_get,
> >     .rx_burst_mode_get            = i40e_rx_burst_mode_get,
> >     .tx_burst_mode_get            = i40e_tx_burst_mode_get,
> >     .timesync_enable              = i40e_timesync_enable,
> > diff --git a/drivers/net/i40e/i40e_ethdev.h
> > b/drivers/net/i40e/i40e_ethdev.h index fe943a45ff..6a6a2a6d3c 100644
> > --- a/drivers/net/i40e/i40e_ethdev.h
> > +++ b/drivers/net/i40e/i40e_ethdev.h
> > @@ -1352,6 +1352,8 @@ void i40e_rxq_info_get(struct rte_eth_dev *dev,
> uint16_t queue_id,
> >     struct rte_eth_rxq_info *qinfo);
> >   void i40e_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
> >     struct rte_eth_txq_info *qinfo);
> > +void i40e_rxq_rearm_data_get(struct rte_eth_dev *dev, uint16_t
> queue_id,
> > +   struct rte_eth_rxq_rearm_data *rxq_rearm_data);
> >   int i40e_rx_burst_mode_get(struct rte_eth_dev *dev, uint16_t queue_id,
> >                        struct rte_eth_burst_mode *mode);
> >   int i40e_tx_burst_mode_get(struct rte_eth_dev *dev, uint16_t
> > queue_id, diff --git a/drivers/net/i40e/i40e_rxtx.c
> > b/drivers/net/i40e/i40e_rxtx.c index 788ffb51c2..d8d801acaf 100644
> > --- a/drivers/net/i40e/i40e_rxtx.c
> > +++ b/drivers/net/i40e/i40e_rxtx.c
> > @@ -3197,6 +3197,19 @@ i40e_txq_info_get(struct rte_eth_dev *dev,
> uint16_t queue_id,
> >     qinfo->conf.offloads = txq->offloads;
> >   }
> >
> > +void
> > +i40e_rxq_rearm_data_get(struct rte_eth_dev *dev, uint16_t queue_id,
> > +   struct rte_eth_rxq_rearm_data *rxq_rearm_data) {
> > +   struct i40e_rx_queue *rxq;
> > +
> > +   rxq = dev->data->rx_queues[queue_id];
> > +
> > +   rxq_rearm_data->rx_sw_ring = rxq->sw_ring;
> > +   rxq_rearm_data->rearm_start = &rxq->rxrearm_start;
> > +   rxq_rearm_data->rearm_nb = &rxq->rxrearm_nb; }
> > +
> >   #ifdef RTE_ARCH_X86
> >   static inline bool
> >   get_avx_supported(bool request_avx512) @@ -3321,6 +3334,9 @@
> > i40e_set_rx_function(struct rte_eth_dev *dev)
> >                     PMD_INIT_LOG(DEBUG, "Using Vector Rx (port %d).",
> >                                  dev->data->port_id);
> >                     dev->rx_pkt_burst = i40e_recv_pkts_vec;
> > +#ifdef RTE_ARCH_ARM64
> > +                   dev->rx_flush_descriptor =
> i40e_rx_flush_descriptor_vec; #endif
> >             }
> >   #endif /* RTE_ARCH_X86 */
> >     } else if (!dev->data->scattered_rx && ad->rx_bulk_alloc_allowed) {
> > @@ -3484,6 +3500,9 @@ i40e_set_tx_function(struct rte_eth_dev *dev)
> >                     PMD_INIT_LOG(DEBUG, "Using Vector Tx (port %d).",
> >                                  dev->data->port_id);
> >                     dev->tx_pkt_burst = i40e_xmit_pkts_vec;
> > +#ifdef RTE_ARCH_ARM64
> > +                   dev->tx_fill_sw_ring = i40e_tx_fill_sw_ring; #endif
> 
> As I can see tx_fill_sw_ring() is non ARM specific, any reason to guard it 
> with
> #ifdef ARM?
> Actually same ask for rx_flush_descriptor() - can we have generic version too?

Here we consider direct-rearm not enable in other architecture. Agree with that
we need to have generic version to avoid this, I will update in the next 
version.
 
> 
> >   #endif /* RTE_ARCH_X86 */
> >             } else {
> >                     PMD_INIT_LOG(DEBUG, "Simple tx finally be used.");
> diff --git
> > a/drivers/net/i40e/i40e_rxtx.h b/drivers/net/i40e/i40e_rxtx.h index
> > 5e6eecc501..8a29bd89df 100644
> > --- a/drivers/net/i40e/i40e_rxtx.h
> > +++ b/drivers/net/i40e/i40e_rxtx.h
> > @@ -233,6 +233,10 @@ uint32_t i40e_dev_rx_queue_count(void
> *rx_queue);
> >   int i40e_dev_rx_descriptor_status(void *rx_queue, uint16_t offset);
> >   int i40e_dev_tx_descriptor_status(void *tx_queue, uint16_t offset);
> >
> > +int i40e_tx_fill_sw_ring(void *tx_queue,
> > +           struct rte_eth_rxq_rearm_data *rxq_rearm_data); int
> > +i40e_rx_flush_descriptor_vec(void *rx_queue, uint16_t nb_rearm);
> > +
> >   uint16_t i40e_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
> >                         uint16_t nb_pkts);
> >   uint16_t i40e_recv_scattered_pkts_vec(void *rx_queue, diff --git
> > a/drivers/net/i40e/i40e_rxtx_vec_common.h
> > b/drivers/net/i40e/i40e_rxtx_vec_common.h
> > index fe1a6ec75e..eb96301a43 100644
> > --- a/drivers/net/i40e/i40e_rxtx_vec_common.h
> > +++ b/drivers/net/i40e/i40e_rxtx_vec_common.h
> > @@ -146,6 +146,60 @@ i40e_tx_free_bufs(struct i40e_tx_queue *txq)
> >     return txq->tx_rs_thresh;
> >   }
> >
> > +int
> > +i40e_tx_fill_sw_ring(void *tx_queue,
> > +           struct rte_eth_rxq_rearm_data *rxq_rearm_data) {
> > +   struct i40e_tx_queue *txq = tx_queue;
> > +   struct i40e_tx_entry *txep;
> > +   void **rxep;
> > +   struct rte_mbuf *m;
> > +   int i, n;
> > +   int nb_rearm = 0;
> > +
> > +   if (*rxq_rearm_data->rearm_nb < txq->tx_rs_thresh ||
> > +                   txq->nb_tx_free > txq->tx_free_thresh)
> > +           return 0;
> > +
> > +   /* check DD bits on threshold descriptor */
> > +   if ((txq->tx_ring[txq->tx_next_dd].cmd_type_offset_bsz &
> > +                   rte_cpu_to_le_64(I40E_TXD_QW1_DTYPE_MASK)) !=
> > +
>       rte_cpu_to_le_64(I40E_TX_DESC_DTYPE_DESC_DONE))
> > +           return 0;
> > +
> > +   n = txq->tx_rs_thresh;
> > +
> > +   /* first buffer to free from S/W ring is at index
> > +    * tx_next_dd - (tx_rs_thresh-1)
> > +    */
> > +   txep = &txq->sw_ring[txq->tx_next_dd - (n - 1)];
> > +   rxep = rxq_rearm_data->rx_sw_ring;
> > +   rxep += *rxq_rearm_data->rearm_start;
> > +
> > +   if (txq->offloads & RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE) {
> > +           /* directly put mbufs from Tx to Rx */
> > +           for (i = 0; i < n; i++, rxep++, txep++)
> > +                   *rxep = txep[0].mbuf;
> > +   } else {
> > +           for (i = 0; i < n; i++, rxep++) {
> > +                   m = rte_pktmbuf_prefree_seg(txep[i].mbuf);
> > +                   if (m != NULL) {
> > +                           *rxep = m;
> > +                           nb_rearm++;
> > +                   }
> > +           }
> > +           n = nb_rearm;
> > +   }
> > +
> > +   /* update counters for Tx */
> > +   txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq->tx_rs_thresh);
> > +   txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + txq->tx_rs_thresh);
> > +   if (txq->tx_next_dd >= txq->nb_tx_desc)
> > +           txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
> > +
> > +   return n;
> > +}
> > +
> >   static __rte_always_inline void
> >   tx_backlog_entry(struct i40e_tx_entry *txep,
> >              struct rte_mbuf **tx_pkts, uint16_t nb_pkts) diff --git
> > a/drivers/net/i40e/i40e_rxtx_vec_neon.c
> > b/drivers/net/i40e/i40e_rxtx_vec_neon.c
> > index 12e6f1cbcb..1509d3223b 100644
> > --- a/drivers/net/i40e/i40e_rxtx_vec_neon.c
> > +++ b/drivers/net/i40e/i40e_rxtx_vec_neon.c
> > @@ -739,6 +739,48 @@ i40e_xmit_fixed_burst_vec(void *__rte_restrict
> tx_queue,
> >     return nb_pkts;
> >   }
> >
> > +int
> > +i40e_rx_flush_descriptor_vec(void *rx_queue, uint16_t nb_rearm) {
> > +   struct i40e_rx_queue *rxq = rx_queue;
> > +   struct i40e_rx_entry *rxep;
> > +   volatile union i40e_rx_desc *rxdp;
> > +   uint16_t rx_id;
> > +   uint64x2_t dma_addr;
> > +   uint64_t paddr;
> > +   uint16_t i;
> > +
> > +   rxdp = rxq->rx_ring + rxq->rxrearm_start;
> > +   rxep = &rxq->sw_ring[rxq->rxrearm_start];
> > +
> > +   for (i = 0; i < nb_rearm; i++) {
> > +           /* Initialize rxdp descs */
> > +           paddr = (rxep[i].mbuf)->buf_iova +
> RTE_PKTMBUF_HEADROOM;
> > +           dma_addr = vdupq_n_u64(paddr);
> > +           /* flush desc with pa dma_addr */
> > +           vst1q_u64((uint64_t *)&rxdp++->read, dma_addr);
> > +   }
> > +
> > +   /* Update the descriptor initializer index */
> > +   rxq->rxrearm_start += nb_rearm;
> > +   rx_id = rxq->rxrearm_start - 1;
> > +
> > +   if (unlikely(rxq->rxrearm_start >= rxq->nb_rx_desc)) {
> > +           rxq->rxrearm_start = rxq->rxrearm_start - rxq->nb_rx_desc;
> > +           if (!rxq->rxrearm_start)
> > +                   rx_id = rxq->nb_rx_desc - 1;
> > +           else
> > +                   rx_id = rxq->rxrearm_start - 1;
> > +   }
> > +   rxq->rxrearm_nb -= nb_rearm;
> > +
> > +   rte_io_wmb();
> > +   /* Update the tail pointer on the NIC */
> > +   I40E_PCI_REG_WRITE_RELAXED(rxq->qrx_tail, rx_id);
> > +
> > +   return 0;
> > +}
> > +
> >   void __rte_cold
> >   i40e_rx_queue_release_mbufs_vec(struct i40e_rx_queue *rxq)
> >   {

Reply via email to