> -----Original Message-----
> From: Zhang, Qi Z <qi.z.zh...@intel.com>
> Sent: Wednesday, September 13, 2023 3:46 PM
> To: Wu, Wenjun1 <wenjun1...@intel.com>; Su, Simei <simei...@intel.com>;
> Wu, Jingjing <jingjing...@intel.com>; Xing, Beilei <beilei.x...@intel.com>
> Cc: dev@dpdk.org
> Subject: RE: [PATCH v3] common/idpf: refactor single queue Tx function
> 
> 
> 
> > -----Original Message-----
> > From: Wu, Wenjun1 <wenjun1...@intel.com>
> > Sent: Wednesday, September 13, 2023 1:57 PM
> > To: Su, Simei <simei...@intel.com>; Wu, Jingjing
> > <jingjing...@intel.com>; Xing, Beilei <beilei.x...@intel.com>; Zhang,
> > Qi Z <qi.z.zh...@intel.com>
> > Cc: dev@dpdk.org
> > Subject: RE: [PATCH v3] common/idpf: refactor single queue Tx function
> >
> >
> >
> > > -----Original Message-----
> > > From: Su, Simei <simei...@intel.com>
> > > Sent: Friday, September 8, 2023 6:28 PM
> > > To: Wu, Jingjing <jingjing...@intel.com>; Xing, Beilei
> > > <beilei.x...@intel.com>; Zhang, Qi Z <qi.z.zh...@intel.com>
> > > Cc: dev@dpdk.org; Wu, Wenjun1 <wenjun1...@intel.com>; Su, Simei
> > > <simei...@intel.com>
> > > Subject: [PATCH v3] common/idpf: refactor single queue Tx function
> > >
> > > This patch replaces flex Tx descriptor with base Tx descriptor to
> > > align with kernel driver practice.
> > >
> > > Signed-off-by: Simei Su <simei...@intel.com>
> > > ---
> > > v3:
> > > * Change context TSO descriptor from base mode to flex mode.
> > >
> > > v2:
> > > * Refine commit title and commit log.
> > > * Remove redundant definition.
> > > * Modify base mode context TSO descriptor.
> > >
> > >  drivers/common/idpf/idpf_common_rxtx.c        | 39 +++++++++----------
> > >  drivers/common/idpf/idpf_common_rxtx.h        |  2 +-
> > >  drivers/common/idpf/idpf_common_rxtx_avx512.c | 37 +++++++++--------
> -
> > >  drivers/net/idpf/idpf_rxtx.c                  |  2 +-
> > >  4 files changed, 39 insertions(+), 41 deletions(-)
> > >
> > > diff --git a/drivers/common/idpf/idpf_common_rxtx.c
> > > b/drivers/common/idpf/idpf_common_rxtx.c
> > > index fc87e3e243..e6d2486272 100644
> > > --- a/drivers/common/idpf/idpf_common_rxtx.c
> > > +++ b/drivers/common/idpf/idpf_common_rxtx.c
> > > @@ -276,14 +276,14 @@ idpf_qc_single_tx_queue_reset(struct
> > > idpf_tx_queue *txq)
> > >   }
> > >
> > >   txe = txq->sw_ring;
> > > - size = sizeof(struct idpf_flex_tx_desc) * txq->nb_tx_desc;
> > > + size = sizeof(struct idpf_base_tx_desc) * txq->nb_tx_desc;
> > >   for (i = 0; i < size; i++)
> > >           ((volatile char *)txq->tx_ring)[i] = 0;
> > >
> > >   prev = (uint16_t)(txq->nb_tx_desc - 1);
> > >   for (i = 0; i < txq->nb_tx_desc; i++) {
> > > -         txq->tx_ring[i].qw1.cmd_dtype =
> > > -
> > >   rte_cpu_to_le_16(IDPF_TX_DESC_DTYPE_DESC_DONE);
> > > +         txq->tx_ring[i].qw1 =
> > > +
> > >   rte_cpu_to_le_64(IDPF_TX_DESC_DTYPE_DESC_DONE);
> > >           txe[i].mbuf =  NULL;
> > >           txe[i].last_id = i;
> > >           txe[prev].next_id = i;
> > > @@ -1307,17 +1307,16 @@ idpf_xmit_cleanup(struct idpf_tx_queue
> *txq)
> > >   uint16_t nb_tx_to_clean;
> > >   uint16_t i;
> > >
> > > - volatile struct idpf_flex_tx_desc *txd = txq->tx_ring;
> > > + volatile struct idpf_base_tx_desc *txd = txq->tx_ring;
> > >
> > >   desc_to_clean_to = (uint16_t)(last_desc_cleaned + txq->rs_thresh);
> > >   if (desc_to_clean_to >= nb_tx_desc)
> > >           desc_to_clean_to = (uint16_t)(desc_to_clean_to -
> > nb_tx_desc);
> > >
> > >   desc_to_clean_to = sw_ring[desc_to_clean_to].last_id;
> > > - /* In the writeback Tx desccriptor, the only significant fields are the 
> > > 4-
> > > bit DTYPE */
> > > - if ((txd[desc_to_clean_to].qw1.cmd_dtype &
> > > -      rte_cpu_to_le_16(IDPF_TXD_QW1_DTYPE_M)) !=
> > > -     rte_cpu_to_le_16(IDPF_TX_DESC_DTYPE_DESC_DONE)) {
> > > + if ((txd[desc_to_clean_to].qw1 &
> > > +      rte_cpu_to_le_64(IDPF_TXD_QW1_DTYPE_M)) !=
> > > +     rte_cpu_to_le_64(IDPF_TX_DESC_DTYPE_DESC_DONE)) {
> > >           TX_LOG(DEBUG, "TX descriptor %4u is not done "
> > >                  "(port=%d queue=%d)", desc_to_clean_to,
> > >                  txq->port_id, txq->queue_id); @@ -1331,10 +1330,7 @@
> > > idpf_xmit_cleanup(struct idpf_tx_queue *txq)
> > >           nb_tx_to_clean = (uint16_t)(desc_to_clean_to -
> > >                                       last_desc_cleaned);
> > >
> > > - txd[desc_to_clean_to].qw1.cmd_dtype = 0;
> > > - txd[desc_to_clean_to].qw1.buf_size = 0;
> > > - for (i = 0; i < RTE_DIM(txd[desc_to_clean_to].qw1.flex.raw); i++)
> > > -         txd[desc_to_clean_to].qw1.flex.raw[i] = 0;
> > > + txd[desc_to_clean_to].qw1 = 0;
> > >
> > >   txq->last_desc_cleaned = desc_to_clean_to;
> > >   txq->nb_free = (uint16_t)(txq->nb_free + nb_tx_to_clean); @@ -
> > > 1347,8 +1343,8 @@ uint16_t  idpf_dp_singleq_xmit_pkts(void
> > > *tx_queue, struct rte_mbuf **tx_pkts,
> > >                     uint16_t nb_pkts)
> > >  {
> > > - volatile struct idpf_flex_tx_desc *txd;
> > > - volatile struct idpf_flex_tx_desc *txr;
> > > + volatile struct idpf_base_tx_desc *txd;
> > > + volatile struct idpf_base_tx_desc *txr;
> > >   union idpf_tx_offload tx_offload = {0};
> > >   struct idpf_tx_entry *txe, *txn;
> > >   struct idpf_tx_entry *sw_ring;
> > > @@ -1356,6 +1352,7 @@ idpf_dp_singleq_xmit_pkts(void *tx_queue,
> > struct
> > > rte_mbuf **tx_pkts,
> > >   struct rte_mbuf *tx_pkt;
> > >   struct rte_mbuf *m_seg;
> > >   uint64_t buf_dma_addr;
> > > + uint32_t td_offset;
> > >   uint64_t ol_flags;
> > >   uint16_t tx_last;
> > >   uint16_t nb_used;
> > > @@ -1382,6 +1379,7 @@ idpf_dp_singleq_xmit_pkts(void *tx_queue,
> > struct
> > > rte_mbuf **tx_pkts,
> > >
> > >   for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
> > >           td_cmd = 0;
> > > +         td_offset = 0;
> > >
> > >           tx_pkt = *tx_pkts++;
> > >           RTE_MBUF_PREFETCH_TO_FREE(txe->mbuf);
> > > @@ -1462,9 +1460,10 @@ idpf_dp_singleq_xmit_pkts(void *tx_queue,
> > > struct rte_mbuf **tx_pkts,
> > >                   slen = m_seg->data_len;
> > >                   buf_dma_addr = rte_mbuf_data_iova(m_seg);
> > >                   txd->buf_addr = rte_cpu_to_le_64(buf_dma_addr);
> > > -                 txd->qw1.buf_size = slen;
> > > -                 txd->qw1.cmd_dtype =
> > > rte_cpu_to_le_16(IDPF_TX_DESC_DTYPE_FLEX_DATA <<
> > > -
> > > IDPF_FLEX_TXD_QW1_DTYPE_S);
> > > +                 txd->qw1 =
> > > rte_cpu_to_le_64(IDPF_TX_DESC_DTYPE_DATA |
> > > +                         ((uint64_t)td_cmd  <<
> > > IDPF_TXD_QW1_CMD_S) |
> > > +                         ((uint64_t)td_offset <<
> > > IDPF_TXD_QW1_OFFSET_S) |
> > > +                         ((uint64_t)slen <<
> > > IDPF_TXD_QW1_TX_BUF_SZ_S));
> > >
> > >                   txe->last_id = tx_last;
> > >                   tx_id = txe->next_id;
> > > @@ -1473,7 +1472,7 @@ idpf_dp_singleq_xmit_pkts(void *tx_queue,
> > struct
> > > rte_mbuf **tx_pkts,
> > >           } while (m_seg);
> > >
> > >           /* The last packet data descriptor needs End Of Packet (EOP)
> > */
> > > -         td_cmd |= IDPF_TX_FLEX_DESC_CMD_EOP;
> > > +         td_cmd |= IDPF_TX_DESC_CMD_EOP;
> > >           txq->nb_used = (uint16_t)(txq->nb_used + nb_used);
> > >           txq->nb_free = (uint16_t)(txq->nb_free - nb_used);
> > >
> > > @@ -1482,7 +1481,7 @@ idpf_dp_singleq_xmit_pkts(void *tx_queue,
> > struct
> > > rte_mbuf **tx_pkts,
> > >                          "%4u (port=%d queue=%d)",
> > >                          tx_last, txq->port_id, txq->queue_id);
> > >
> > > -                 td_cmd |= IDPF_TX_FLEX_DESC_CMD_RS;
> > > +                 td_cmd |= IDPF_TX_DESC_CMD_RS;
> > >
> > >                   /* Update txq RS bit counters */
> > >                   txq->nb_used = 0;
> > > @@ -1491,7 +1490,7 @@ idpf_dp_singleq_xmit_pkts(void *tx_queue,
> > struct
> > > rte_mbuf **tx_pkts,
> > >           if (ol_flags & IDPF_TX_CKSUM_OFFLOAD_MASK)
> > >                   td_cmd |= IDPF_TX_FLEX_DESC_CMD_CS_EN;
> > >
> > > -         txd->qw1.cmd_dtype |= rte_cpu_to_le_16(td_cmd <<
> > > IDPF_FLEX_TXD_QW1_CMD_S);
> > > +         txd->qw1 |= rte_cpu_to_le_16(td_cmd <<
> > > IDPF_TXD_QW1_CMD_S);
> > >   }
> > >
> > >  end_of_tx:
> > > diff --git a/drivers/common/idpf/idpf_common_rxtx.h
> > > b/drivers/common/idpf/idpf_common_rxtx.h
> > > index 6cb83fc0a6..b49b1ed737 100644
> > > --- a/drivers/common/idpf/idpf_common_rxtx.h
> > > +++ b/drivers/common/idpf/idpf_common_rxtx.h
> > > @@ -157,7 +157,7 @@ struct idpf_tx_entry {
> > >  /* Structure associated with each TX queue. */  struct idpf_tx_queue {
> > >   const struct rte_memzone *mz;           /* memzone for Tx ring */
> > > - volatile struct idpf_flex_tx_desc *tx_ring;     /* Tx ring virtual
> > > address */
> > > + volatile struct idpf_base_tx_desc *tx_ring;     /* Tx ring virtual
> > > address */
> > >   volatile union {
> > >           struct idpf_flex_tx_sched_desc *desc_ring;
> > >           struct idpf_splitq_tx_compl_desc *compl_ring; diff --git
> > > a/drivers/common/idpf/idpf_common_rxtx_avx512.c
> > > b/drivers/common/idpf/idpf_common_rxtx_avx512.c
> > > index 81312617cc..afb0014a13 100644
> > > --- a/drivers/common/idpf/idpf_common_rxtx_avx512.c
> > > +++ b/drivers/common/idpf/idpf_common_rxtx_avx512.c
> > > @@ -1005,7 +1005,7 @@ idpf_tx_singleq_free_bufs_avx512(struct
> > > idpf_tx_queue *txq)
> > >   struct rte_mbuf *m, *free[txq->rs_thresh];
> > >
> > >   /* check DD bits on threshold descriptor */
> > > - if ((txq->tx_ring[txq->next_dd].qw1.cmd_dtype &
> > > + if ((txq->tx_ring[txq->next_dd].qw1 &
> > >                   rte_cpu_to_le_64(IDPF_TXD_QW1_DTYPE_M)) !=
> > >
> > >   rte_cpu_to_le_64(IDPF_TX_DESC_DTYPE_DESC_DONE))
> > >           return 0;
> > > @@ -1113,15 +1113,14 @@ tx_backlog_entry_avx512(struct
> > > idpf_tx_vec_entry *txep,
> > >           txep[i].mbuf = tx_pkts[i];
> > >  }
> > >
> > > -#define IDPF_FLEX_TXD_QW1_BUF_SZ_S 48  static __rte_always_inline
> > > void -idpf_singleq_vtx1(volatile struct idpf_flex_tx_desc *txdp,
> > > +idpf_singleq_vtx1(volatile struct idpf_base_tx_desc *txdp,
> > >     struct rte_mbuf *pkt, uint64_t flags)  {
> > >   uint64_t high_qw =
> > > -         (IDPF_TX_DESC_DTYPE_FLEX_DATA <<
> > > IDPF_FLEX_TXD_QW1_DTYPE_S |
> > > -          ((uint64_t)flags  << IDPF_FLEX_TXD_QW1_CMD_S) |
> > > -          ((uint64_t)pkt->data_len <<
> > > IDPF_FLEX_TXD_QW1_BUF_SZ_S));
> > > +         (IDPF_TX_DESC_DTYPE_DATA |
> > > +          ((uint64_t)flags  << IDPF_TXD_QW1_CMD_S) |
> > > +          ((uint64_t)pkt->data_len << IDPF_TXD_QW1_TX_BUF_SZ_S));
> > >
> > >   __m128i descriptor = _mm_set_epi64x(high_qw,
> > >                                       pkt->buf_iova + pkt->data_off);
> > @@ -1131,11 +1130,11 @@
> > > idpf_singleq_vtx1(volatile struct idpf_flex_tx_desc *txdp,  #define
> > > IDPF_TX_LEN_MASK 0xAA  #define IDPF_TX_OFF_MASK 0x55  static
> > > __rte_always_inline void - idpf_singleq_vtx(volatile struct
> > > idpf_flex_tx_desc *txdp,
> > > +idpf_singleq_vtx(volatile struct idpf_base_tx_desc *txdp,
> > >    struct rte_mbuf **pkt, uint16_t nb_pkts,  uint64_t flags)  {
> > > - const uint64_t hi_qw_tmpl = (IDPF_TX_DESC_DTYPE_FLEX_DATA  |
> > > -                 ((uint64_t)flags  << IDPF_FLEX_TXD_QW1_CMD_S));
> > > + const uint64_t hi_qw_tmpl = (IDPF_TX_DESC_DTYPE_DATA  |
> > > +                 ((uint64_t)flags  << IDPF_TXD_QW1_CMD_S));
> > >
> > >   /* if unaligned on 32-bit boundary, do one to align */
> > >   if (((uintptr_t)txdp & 0x1F) != 0 && nb_pkts != 0) { @@ -1148,19
> > > +1147,19 @@ idpf_singleq_vtx(volatile struct idpf_flex_tx_desc
> > > +*txdp,
> > >           uint64_t hi_qw3 =
> > >                   hi_qw_tmpl |
> > >                   ((uint64_t)pkt[3]->data_len <<
> > > -                  IDPF_FLEX_TXD_QW1_BUF_SZ_S);
> > > +                  IDPF_TXD_QW1_TX_BUF_SZ_S);
> > >           uint64_t hi_qw2 =
> > >                   hi_qw_tmpl |
> > >                   ((uint64_t)pkt[2]->data_len <<
> > > -                  IDPF_FLEX_TXD_QW1_BUF_SZ_S);
> > > +                  IDPF_TXD_QW1_TX_BUF_SZ_S);
> > >           uint64_t hi_qw1 =
> > >                   hi_qw_tmpl |
> > >                   ((uint64_t)pkt[1]->data_len <<
> > > -                  IDPF_FLEX_TXD_QW1_BUF_SZ_S);
> > > +                  IDPF_TXD_QW1_TX_BUF_SZ_S);
> > >           uint64_t hi_qw0 =
> > >                   hi_qw_tmpl |
> > >                   ((uint64_t)pkt[0]->data_len <<
> > > -                  IDPF_FLEX_TXD_QW1_BUF_SZ_S);
> > > +                  IDPF_TXD_QW1_TX_BUF_SZ_S);
> > >
> > >           __m512i desc0_3 =
> > >                   _mm512_set_epi64
> > > @@ -1187,11 +1186,11 @@
> > idpf_singleq_xmit_fixed_burst_vec_avx512(void
> > > *tx_queue, struct rte_mbuf **tx_pk
> > >                                    uint16_t nb_pkts)
> > >  {
> > >   struct idpf_tx_queue *txq = tx_queue;
> > > - volatile struct idpf_flex_tx_desc *txdp;
> > > + volatile struct idpf_base_tx_desc *txdp;
> > >   struct idpf_tx_vec_entry *txep;
> > >   uint16_t n, nb_commit, tx_id;
> > > - uint64_t flags = IDPF_TX_FLEX_DESC_CMD_EOP;
> > > - uint64_t rs = IDPF_TX_FLEX_DESC_CMD_RS | flags;
> > > + uint64_t flags = IDPF_TX_DESC_CMD_EOP;
> > > + uint64_t rs = IDPF_TX_DESC_CMD_RS | flags;
> > >
> > >   /* cross rx_thresh boundary is not allowed */
> > >   nb_pkts = RTE_MIN(nb_pkts, txq->rs_thresh); @@ -1238,9 +1237,9
> > @@
> > > idpf_singleq_xmit_fixed_burst_vec_avx512(void *tx_queue, struct
> > > rte_mbuf **tx_pk
> > >
> > >   tx_id = (uint16_t)(tx_id + nb_commit);
> > >   if (tx_id > txq->next_rs) {
> > > -         txq->tx_ring[txq->next_rs].qw1.cmd_dtype |=
> > > -
> > >   rte_cpu_to_le_64(((uint64_t)IDPF_TX_FLEX_DESC_CMD_RS) <<
> > > -                                  IDPF_FLEX_TXD_QW1_CMD_S);
> > > +         txq->tx_ring[txq->next_rs].qw1 |=
> > > +
> > >   rte_cpu_to_le_64(((uint64_t)IDPF_TX_DESC_CMD_RS) <<
> > > +                                  IDPF_TXD_QW1_CMD_S);
> > >           txq->next_rs =
> > >                   (uint16_t)(txq->next_rs + txq->rs_thresh);
> > >   }
> > > diff --git a/drivers/net/idpf/idpf_rxtx.c
> > > b/drivers/net/idpf/idpf_rxtx.c index
> > > 3e3d81ca6d..64f2235580 100644
> > > --- a/drivers/net/idpf/idpf_rxtx.c
> > > +++ b/drivers/net/idpf/idpf_rxtx.c
> > > @@ -74,7 +74,7 @@ idpf_dma_zone_reserve(struct rte_eth_dev *dev,
> > > uint16_t queue_idx,
> > >                   ring_size = RTE_ALIGN(len * sizeof(struct
> > > idpf_flex_tx_sched_desc),
> > >                                         IDPF_DMA_MEM_ALIGN);
> > >           else
> > > -                 ring_size = RTE_ALIGN(len * sizeof(struct
> > > idpf_flex_tx_desc),
> > > +                 ring_size = RTE_ALIGN(len * sizeof(struct
> > > idpf_base_tx_desc),
> > >                                         IDPF_DMA_MEM_ALIGN);
> > >           rte_memcpy(ring_name, "idpf Tx ring", sizeof("idpf Tx ring"));
> > >           break;
> > > --
> > > 2.25.1
> >
> > Acked-by: Wenjun Wu <wenjun1...@intel.com>
> 
> Applied to dpdk-next-net-intel.

Reverted due to new change request.


Reply via email to