> -----Original Message-----
> From: Nicolau, Radu <radu.nico...@intel.com>
> Sent: Tuesday, October 26, 2021 9:57 PM
> To: Wu, Jingjing <jingjing...@intel.com>; Xing, Beilei
> <beilei.x...@intel.com>;
> Richardson, Bruce <bruce.richard...@intel.com>; Ananyev, Konstantin
> <konstantin.anan...@intel.com>
> Cc: dev@dpdk.org; Doherty, Declan <declan.dohe...@intel.com>; Sinha,
> Abhijit <abhijit.si...@intel.com>; Zhang, Qi Z <qi.z.zh...@intel.com>;
> Nicolau,
> Radu <radu.nico...@intel.com>
> Subject: [PATCH v12 2/7] net/iavf: rework tx path
>
> Rework the TX path and TX descriptor usage in order to allow for better use of
> oflload flags and to facilitate enabling of inline crypto offload feature.
>
> Signed-off-by: Declan Doherty <declan.dohe...@intel.com>
> Signed-off-by: Abhijit Sinha <abhijit.si...@intel.com>
> Signed-off-by: Radu Nicolau <radu.nico...@intel.com>
> Acked-by: Jingjing Wu <jingjing...@intel.com>
> ---
> drivers/net/iavf/iavf_rxtx.c | 538 ++++++++++++++++-----------
> drivers/net/iavf/iavf_rxtx.h | 117 +++++-
> drivers/net/iavf/iavf_rxtx_vec_sse.c | 10 +-
> 3 files changed, 431 insertions(+), 234 deletions(-)
>
> diff --git a/drivers/net/iavf/iavf_rxtx.c b/drivers/net/iavf/iavf_rxtx.c index
> 52d919ca1b..128691aaf1 100644
> --- a/drivers/net/iavf/iavf_rxtx.c
> +++ b/drivers/net/iavf/iavf_rxtx.c
> @@ -1054,27 +1054,31 @@ iavf_rxd_to_vlan_tci(struct rte_mbuf *mb,
> volatile union iavf_rx_desc *rxdp)
>
> static inline void
> iavf_flex_rxd_to_vlan_tci(struct rte_mbuf *mb,
> - volatile union iavf_rx_flex_desc *rxdp,
> - uint8_t rx_flags)
> + volatile union iavf_rx_flex_desc *rxdp)
> {
> - uint16_t vlan_tci = 0;
> -
> - if (rx_flags & IAVF_RX_FLAGS_VLAN_TAG_LOC_L2TAG1 &&
> - rte_le_to_cpu_64(rxdp->wb.status_error0) &
> - (1 << IAVF_RX_FLEX_DESC_STATUS0_L2TAG1P_S))
> - vlan_tci = rte_le_to_cpu_16(rxdp->wb.l2tag1);
> + if (rte_le_to_cpu_64(rxdp->wb.status_error0) &
> + (1 << IAVF_RX_FLEX_DESC_STATUS0_L2TAG1P_S)) {
> + mb->ol_flags |= RTE_MBUF_F_RX_VLAN |
> RTE_MBUF_F_RX_VLAN_STRIPPED;
> + mb->vlan_tci =
> + rte_le_to_cpu_16(rxdp->wb.l2tag1);
> + } else {
> + mb->vlan_tci = 0;
> + }
>
> #ifndef RTE_LIBRTE_IAVF_16BYTE_RX_DESC
> - if (rx_flags & IAVF_RX_FLAGS_VLAN_TAG_LOC_L2TAG2_2 &&
> - rte_le_to_cpu_16(rxdp->wb.status_error1) &
> - (1 << IAVF_RX_FLEX_DESC_STATUS1_L2TAG2P_S))
> - vlan_tci = rte_le_to_cpu_16(rxdp->wb.l2tag2_2nd);
> -#endif
> -
> - if (vlan_tci) {
> - mb->ol_flags |= RTE_MBUF_F_RX_VLAN |
> RTE_MBUF_F_RX_VLAN_STRIPPED;
> - mb->vlan_tci = vlan_tci;
> + if (rte_le_to_cpu_16(rxdp->wb.status_error1) &
> + (1 << IAVF_RX_FLEX_DESC_STATUS1_L2TAG2P_S)) {
> + mb->ol_flags |= RTE_MBUF_F_RX_QINQ_STRIPPED |
> RTE_MBUF_F_RX_QINQ |
> + RTE_MBUF_F_RX_VLAN_STRIPPED |
> RTE_MBUF_F_RX_VLAN;
> + mb->vlan_tci_outer = mb->vlan_tci;
> + mb->vlan_tci = rte_le_to_cpu_16(rxdp->wb.l2tag2_2nd);
> + PMD_RX_LOG(DEBUG, "Descriptor l2tag2_1: %u, l2tag2_2: %u",
> + rte_le_to_cpu_16(rxdp->wb.l2tag2_1st),
> + rte_le_to_cpu_16(rxdp->wb.l2tag2_2nd));
> + } else {
> + mb->vlan_tci_outer = 0;
> }
> +#endif
> }
>
> /* Translate the rx descriptor status and error fields to pkt flags */ @@
> -1394,7 +1398,7 @@ iavf_recv_pkts_flex_rxd(void *rx_queue,
> rxm->ol_flags = 0;
> rxm->packet_type = ptype_tbl[IAVF_RX_FLEX_DESC_PTYPE_M &
> rte_le_to_cpu_16(rxd.wb.ptype_flex_flags0)];
> - iavf_flex_rxd_to_vlan_tci(rxm, &rxd, rxq->rx_flags);
> + iavf_flex_rxd_to_vlan_tci(rxm, &rxd);
> rxq->rxd_to_pkt_fields(rxq, rxm, &rxd);
> pkt_flags = iavf_flex_rxd_error_to_pkt_flags(rx_stat_err0);
> rxm->ol_flags |= pkt_flags;
> @@ -1536,7 +1540,7 @@ iavf_recv_scattered_pkts_flex_rxd(void *rx_queue,
> struct rte_mbuf **rx_pkts,
> first_seg->ol_flags = 0;
> first_seg->packet_type = ptype_tbl[IAVF_RX_FLEX_DESC_PTYPE_M
> &
> rte_le_to_cpu_16(rxd.wb.ptype_flex_flags0)];
> - iavf_flex_rxd_to_vlan_tci(first_seg, &rxd, rxq->rx_flags);
> + iavf_flex_rxd_to_vlan_tci(first_seg, &rxd);
> rxq->rxd_to_pkt_fields(rxq, first_seg, &rxd);
> pkt_flags = iavf_flex_rxd_error_to_pkt_flags(rx_stat_err0);
>
> @@ -1774,7 +1778,7 @@ iavf_rx_scan_hw_ring_flex_rxd(struct
> iavf_rx_queue *rxq)
>
> mb->packet_type = ptype_tbl[IAVF_RX_FLEX_DESC_PTYPE_M &
> rte_le_to_cpu_16(rxdp[j].wb.ptype_flex_flags0)];
> - iavf_flex_rxd_to_vlan_tci(mb, &rxdp[j], rxq->rx_flags);
> + iavf_flex_rxd_to_vlan_tci(mb, &rxdp[j]);
> rxq->rxd_to_pkt_fields(rxq, mb, &rxdp[j]);
> stat_err0 = rte_le_to_cpu_16(rxdp[j].wb.status_error0);
> pkt_flags = iavf_flex_rxd_error_to_pkt_flags(stat_err0);
> @@ -2068,190 +2072,302 @@ iavf_xmit_cleanup(struct iavf_tx_queue *txq)
> return 0;
> }
>
> -/* Check if the context descriptor is needed for TX offloading */
> +
> +
> +static inline void
> +iavf_fill_ctx_desc_cmd_field(volatile uint64_t *field, struct rte_mbuf
> +*m) {
> + uint64_t cmd = 0;
> +
> + /* TSO enabled */
> + if (m->ol_flags & (RTE_MBUF_F_TX_TCP_SEG |
> RTE_MBUF_F_TX_UDP_SEG))
> + cmd = IAVF_TX_CTX_DESC_TSO <<
> IAVF_TXD_DATA_QW1_CMD_SHIFT;
> +
> + /* Time Sync - Currently not supported */
> +
> + /* Outer L2 TAG 2 Insertion - Currently not supported */
> + /* Inner L2 TAG 2 Insertion - Currently not supported */
> +
> + *field |= cmd;
> +}
> +
> +static inline void
> +iavf_fill_ctx_desc_tunnelling_field(volatile uint64_t *qw0,
> + const struct rte_mbuf *m)
> +{
> + uint64_t eip_typ = IAVF_TX_CTX_DESC_EIPT_NONE;
> + uint64_t eip_len = 0;
> + uint64_t eip_noinc = 0;
> + /* Default - IP_ID is increment in each segment of LSO */
> +
> + switch (m->ol_flags & (RTE_MBUF_F_TX_OUTER_IPV4 |
> RTE_MBUF_F_TX_OUTER_IPV6 |
> + RTE_MBUF_F_TX_OUTER_IP_CKSUM)) {
> + case RTE_MBUF_F_TX_OUTER_IPV4:
> + eip_typ =
> IAVF_TX_CTX_DESC_EIPT_IPV4_NO_CHECKSUM_OFFLOAD;
> + eip_len = m->outer_l3_len >> 2;
> + break;
> + case RTE_MBUF_F_TX_OUTER_IPV4 |
> RTE_MBUF_F_TX_OUTER_IP_CKSUM:
> + eip_typ = IAVF_TX_CTX_DESC_EIPT_IPV4_CHECKSUM_OFFLOAD;
> + eip_len = m->outer_l3_len >> 2;
> + break;
> + case RTE_MBUF_F_TX_OUTER_IPV6:
> + eip_typ = IAVF_TX_CTX_DESC_EIPT_IPV6;
> + eip_len = m->outer_l3_len >> 2;
> + break;
> + }
> +
> + *qw0 = eip_typ << IAVF_TXD_CTX_QW0_TUN_PARAMS_EIPT_SHIFT |
> + eip_len << IAVF_TXD_CTX_QW0_TUN_PARAMS_EIPLEN_SHIFT |
> + eip_noinc <<
> IAVF_TXD_CTX_QW0_TUN_PARAMS_EIP_NOINC_SHIFT;
> +}
> +
> static inline uint16_t
> -iavf_calc_context_desc(uint64_t flags, uint8_t vlan_flag)
> +iavf_fill_ctx_desc_segmentation_field(volatile uint64_t *field,
> + struct rte_mbuf *m)
> {
> - if (flags & RTE_MBUF_F_TX_TCP_SEG)
> - return 1;
> - if (flags & RTE_MBUF_F_TX_VLAN &&
> - vlan_flag & IAVF_TX_FLAGS_VLAN_TAG_LOC_L2TAG2)
> - return 1;
> - return 0;
> + uint64_t segmentation_field = 0;
> + uint64_t total_length = 0;
> +
> + total_length = m->pkt_len - (m->l2_len + m->l3_len + m->l4_len);
> +
> + if (m->ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK)
> + total_length -= m->outer_l3_len;
> +
> +#ifdef RTE_LIBRTE_IAVF_DEBUG_TX
> + if (!m->l4_len || !m->tso_segsz)
> + PMD_TX_LOG(DEBUG, "L4 length %d, LSO Segment size %d",
> + m->l4_len, m->tso_segsz);
> + if (m->tso_segsz < 88)
> + PMD_TX_LOG(DEBUG, "LSO Segment size %d is less than
> minimum %d",
> + m->tso_segsz, 88);
> +#endif
> + segmentation_field =
> + (((uint64_t)total_length << IAVF_TXD_CTX_QW1_TSO_LEN_SHIFT) &
> + IAVF_TXD_CTX_QW1_TSO_LEN_MASK) |
> + (((uint64_t)m->tso_segsz << IAVF_TXD_CTX_QW1_MSS_SHIFT) &
> + IAVF_TXD_CTX_QW1_MSS_MASK);
> +
> + *field |= segmentation_field;
> +
> + return total_length;
> }
>
> +
> +struct iavf_tx_context_desc_qws {
> + __le64 qw0;
> + __le64 qw1;
> +};
> +
> static inline void
> -iavf_txd_enable_checksum(uint64_t ol_flags,
> - uint32_t *td_cmd,
> - uint32_t *td_offset,
> - union iavf_tx_offload tx_offload)
> +iavf_fill_context_desc(volatile struct iavf_tx_context_desc *desc,
> + struct rte_mbuf *m, uint16_t *tlen)
> {
> + volatile struct iavf_tx_context_desc_qws *desc_qws =
> + (volatile struct iavf_tx_context_desc_qws *)desc;
> + /* fill descriptor type field */
> + desc_qws->qw1 = IAVF_TX_DESC_DTYPE_CONTEXT;
> +
> + /* fill command field */
> + iavf_fill_ctx_desc_cmd_field(&desc_qws->qw1, m);
> +
> + /* fill segmentation field */
> + if (m->ol_flags & (RTE_MBUF_F_TX_TCP_SEG |
> RTE_MBUF_F_TX_UDP_SEG)) {
> + *tlen = iavf_fill_ctx_desc_segmentation_field(&desc_qws->qw1,
> + m);
> + }
> +
> + /* fill tunnelling field */
> + if (m->ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK)
> + iavf_fill_ctx_desc_tunnelling_field(&desc_qws->qw0, m);
> + else
> + desc_qws->qw0 = 0;
> +
> + desc_qws->qw0 = rte_cpu_to_le_64(desc_qws->qw0);
> + desc_qws->qw1 = rte_cpu_to_le_64(desc_qws->qw1); }
> +
> +
> +static inline void
> +iavf_build_data_desc_cmd_offset_fields(volatile uint64_t *qw1,
> + struct rte_mbuf *m)
> +{
> + uint64_t command = 0;
> + uint64_t offset = 0;
> + uint64_t l2tag1 = 0;
> +
> + *qw1 = IAVF_TX_DESC_DTYPE_DATA;
> +
> + command = (uint64_t)IAVF_TX_DESC_CMD_ICRC;
> +
> + /* Descriptor based VLAN insertion */
> + if (m->ol_flags & RTE_MBUF_F_TX_VLAN) {
> + command |= (uint64_t)IAVF_TX_DESC_CMD_IL2TAG1;
> + l2tag1 |= m->vlan_tci;
> + }
> +
> /* Set MACLEN */
> - *td_offset |= (tx_offload.l2_len >> 1) <<
> - IAVF_TX_DESC_LENGTH_MACLEN_SHIFT;
> -
> - /* Enable L3 checksum offloads */
> - if (ol_flags & RTE_MBUF_F_TX_IP_CKSUM) {
> - *td_cmd |= IAVF_TX_DESC_CMD_IIPT_IPV4_CSUM;
> - *td_offset |= (tx_offload.l3_len >> 2) <<
> - IAVF_TX_DESC_LENGTH_IPLEN_SHIFT;
> - } else if (ol_flags & RTE_MBUF_F_TX_IPV4) {
> - *td_cmd |= IAVF_TX_DESC_CMD_IIPT_IPV4;
> - *td_offset |= (tx_offload.l3_len >> 2) <<
> - IAVF_TX_DESC_LENGTH_IPLEN_SHIFT;
> - } else if (ol_flags & RTE_MBUF_F_TX_IPV6) {
> - *td_cmd |= IAVF_TX_DESC_CMD_IIPT_IPV6;
> - *td_offset |= (tx_offload.l3_len >> 2) <<
> - IAVF_TX_DESC_LENGTH_IPLEN_SHIFT;
> - }
> -
> - if (ol_flags & RTE_MBUF_F_TX_TCP_SEG) {
> - *td_cmd |= IAVF_TX_DESC_CMD_L4T_EOFT_TCP;
> - *td_offset |= (tx_offload.l4_len >> 2) <<
> + offset |= (m->l2_len >> 1) << IAVF_TX_DESC_LENGTH_MACLEN_SHIFT;
> +
> + /* Enable L3 checksum offloading inner */
> + if (m->ol_flags & (RTE_MBUF_F_TX_IP_CKSUM | RTE_MBUF_F_TX_IPV4))
> {
> + command |= IAVF_TX_DESC_CMD_IIPT_IPV4_CSUM;
> + offset |= (m->l3_len >> 2) << IAVF_TX_DESC_LENGTH_IPLEN_SHIFT;
> + } else if (m->ol_flags & RTE_MBUF_F_TX_IPV4) {
> + command |= IAVF_TX_DESC_CMD_IIPT_IPV4;
> + offset |= (m->l3_len >> 2) << IAVF_TX_DESC_LENGTH_IPLEN_SHIFT;
> + } else if (m->ol_flags & RTE_MBUF_F_TX_IPV6) {
> + command |= IAVF_TX_DESC_CMD_IIPT_IPV6;
> + offset |= (m->l3_len >> 2) << IAVF_TX_DESC_LENGTH_IPLEN_SHIFT;
> + }
> +
> + if (m->ol_flags & RTE_MBUF_F_TX_TCP_SEG) {
> + command |= IAVF_TX_DESC_CMD_L4T_EOFT_TCP;
> + offset |= (m->l4_len >> 2) <<
> IAVF_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
> - return;
> }
>
> /* Enable L4 checksum offloads */
> - switch (ol_flags & RTE_MBUF_F_TX_L4_MASK) {
> + switch (m->ol_flags & RTE_MBUF_F_TX_L4_MASK) {
> case RTE_MBUF_F_TX_TCP_CKSUM:
> - *td_cmd |= IAVF_TX_DESC_CMD_L4T_EOFT_TCP;
> - *td_offset |= (sizeof(struct rte_tcp_hdr) >> 2) <<
> - IAVF_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
> + command |= IAVF_TX_DESC_CMD_L4T_EOFT_TCP;
> + offset |= (sizeof(struct rte_tcp_hdr) >> 2) <<
> + IAVF_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
> break;
> case RTE_MBUF_F_TX_SCTP_CKSUM:
> - *td_cmd |= IAVF_TX_DESC_CMD_L4T_EOFT_SCTP;
> - *td_offset |= (sizeof(struct rte_sctp_hdr) >> 2) <<
> - IAVF_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
> + command |= IAVF_TX_DESC_CMD_L4T_EOFT_SCTP;
> + offset |= (sizeof(struct rte_sctp_hdr) >> 2) <<
> + IAVF_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
> break;
> case RTE_MBUF_F_TX_UDP_CKSUM:
> - *td_cmd |= IAVF_TX_DESC_CMD_L4T_EOFT_UDP;
> - *td_offset |= (sizeof(struct rte_udp_hdr) >> 2) <<
> - IAVF_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
> - break;
> - default:
> + command |= IAVF_TX_DESC_CMD_L4T_EOFT_UDP;
> + offset |= (sizeof(struct rte_udp_hdr) >> 2) <<
> + IAVF_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
> break;
> }
> +
> + *qw1 = rte_cpu_to_le_64((((uint64_t)command <<
> + IAVF_TXD_DATA_QW1_CMD_SHIFT) &
> IAVF_TXD_DATA_QW1_CMD_MASK) |
> + (((uint64_t)offset << IAVF_TXD_DATA_QW1_OFFSET_SHIFT) &
> + IAVF_TXD_DATA_QW1_OFFSET_MASK) |
> + ((uint64_t)l2tag1 << IAVF_TXD_DATA_QW1_L2TAG1_SHIFT));
> }
>
> -/* set TSO context descriptor
> - * support IP -> L4 and IP -> IP -> L4
> - */
> -static inline uint64_t
> -iavf_set_tso_ctx(struct rte_mbuf *mbuf, union iavf_tx_offload tx_offload)
> +static inline void
> +iavf_fill_data_desc_buffer_sz_field(volatile uint64_t *field, uint16_t
> +value)
above static function never be used, removed during merge.
> {
> - uint64_t ctx_desc = 0;
> - uint32_t cd_cmd, hdr_len, cd_tso_len;
> -
> - if (!tx_offload.l4_len) {
> - PMD_TX_LOG(DEBUG, "L4 length set to 0");
> - return ctx_desc;
> + *field |= (((uint64_t)value << IAVF_TXD_DATA_QW1_TX_BUF_SZ_SHIFT) &
> + IAVF_TXD_DATA_QW1_TX_BUF_SZ_MASK);
> }
>
> - hdr_len = tx_offload.l2_len +
> - tx_offload.l3_len +
> - tx_offload.l4_len;
> +static inline void
> +iavf_fill_data_desc(volatile struct iavf_tx_desc *desc,
> + struct rte_mbuf *m, uint64_t desc_template,
> + uint16_t tlen, uint16_t ipseclen)
> +{
> + uint32_t hdrlen = m->l2_len;
> + uint32_t bufsz = 0;
>
> - cd_cmd = IAVF_TX_CTX_DESC_TSO;
> - cd_tso_len = mbuf->pkt_len - hdr_len;
> - ctx_desc |= ((uint64_t)cd_cmd << IAVF_TXD_CTX_QW1_CMD_SHIFT) |
> - ((uint64_t)cd_tso_len <<
> IAVF_TXD_CTX_QW1_TSO_LEN_SHIFT) |
> - ((uint64_t)mbuf->tso_segsz <<
> IAVF_TXD_CTX_QW1_MSS_SHIFT);
> + /* fill data descriptor qw1 from template */
> + desc->cmd_type_offset_bsz = desc_template;
>
> - return ctx_desc;
> -}
> + /* set data buffer address */
> + desc->buffer_addr = rte_mbuf_data_iova(m);
>
> -/* Construct the tx flags */
> -static inline uint64_t
> -iavf_build_ctob(uint32_t td_cmd, uint32_t td_offset, unsigned int size,
> - uint32_t td_tag)
> -{
> - return rte_cpu_to_le_64(IAVF_TX_DESC_DTYPE_DATA |
> - ((uint64_t)td_cmd << IAVF_TXD_QW1_CMD_SHIFT) |
> - ((uint64_t)td_offset <<
> - IAVF_TXD_QW1_OFFSET_SHIFT) |
> - ((uint64_t)size <<
> - IAVF_TXD_QW1_TX_BUF_SZ_SHIFT) |
> - ((uint64_t)td_tag <<
> - IAVF_TXD_QW1_L2TAG1_SHIFT));
> + /* calculate data buffer size less set header lengths */
> + if ((m->ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) &&
> + (m->ol_flags & (RTE_MBUF_F_TX_TCP_SEG |
> RTE_MBUF_F_TX_UDP_SEG))) {
> + hdrlen += m->outer_l3_len;
> + if (m->ol_flags & RTE_MBUF_F_TX_L4_MASK)
> + hdrlen += m->l3_len + m->l4_len;
> + else
> + hdrlen += m->l3_len;
> + if (m->ol_flags & RTE_MBUF_F_TX_SEC_OFFLOAD)
> + hdrlen += ipseclen;
> + bufsz = hdrlen + tlen;
> + } else {
> + bufsz = m->data_len;
> + }
> +
> + /* set data buffer size */
> + desc->cmd_type_offset_bsz |=
> + (((uint64_t)bufsz << IAVF_TXD_DATA_QW1_TX_BUF_SZ_SHIFT) &
> + IAVF_TXD_DATA_QW1_TX_BUF_SZ_MASK);
> +
> + desc->buffer_addr = rte_cpu_to_le_64(desc->buffer_addr);
> + desc->cmd_type_offset_bsz =
> +rte_cpu_to_le_64(desc->cmd_type_offset_bsz);
> }
>
> +
> /* TX function */
> uint16_t
> iavf_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
> {
> - volatile struct iavf_tx_desc *txd;
> - volatile struct iavf_tx_desc *txr;
> - struct iavf_tx_queue *txq;
> - struct iavf_tx_entry *sw_ring;
> + struct iavf_tx_queue *txq = tx_queue;
> + volatile struct iavf_tx_desc *txr = txq->tx_ring;
> + struct iavf_tx_entry *txe_ring = txq->sw_ring;
> struct iavf_tx_entry *txe, *txn;
> - struct rte_mbuf *tx_pkt;
> - struct rte_mbuf *m_seg;
> - uint16_t tx_id;
> - uint16_t nb_tx;
> - uint32_t td_cmd;
> - uint32_t td_offset;
> - uint32_t td_tag;
> - uint64_t ol_flags;
> - uint16_t nb_used;
> - uint16_t nb_ctx;
> - uint16_t tx_last;
> - uint16_t slen;
> - uint64_t buf_dma_addr;
> - uint16_t cd_l2tag2 = 0;
> - union iavf_tx_offload tx_offload = {0};
> -
> - txq = tx_queue;
> - sw_ring = txq->sw_ring;
> - txr = txq->tx_ring;
> - tx_id = txq->tx_tail;
> - txe = &sw_ring[tx_id];
> + struct rte_mbuf *mb, *mb_seg;
> + uint16_t desc_idx, desc_idx_last;
> + uint16_t idx;
> +
>
> /* Check if the descriptor ring needs to be cleaned. */
> if (txq->nb_free < txq->free_thresh)
> - (void)iavf_xmit_cleanup(txq);
> + iavf_xmit_cleanup(txq);
> +
> + desc_idx = txq->tx_tail;
> + txe = &txe_ring[desc_idx];
> +
> +#ifdef RTE_LIBRTE_IAVF_DEBUG_TX_DESC_RING
> + iavf_dump_tx_entry_ring(txq);
> + iavf_dump_tx_desc_ring(txq);
> +#endif
> +
>
> - for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
> - td_cmd = 0;
> - td_tag = 0;
> - td_offset = 0;
> + for (idx = 0; idx < nb_pkts; idx++) {
> + volatile struct iavf_tx_desc *ddesc;
> + uint16_t nb_desc_ctx;
> + uint16_t nb_desc_data, nb_desc_required;
> + uint16_t tlen = 0, ipseclen = 0;
> + uint64_t ddesc_template = 0;
> + uint64_t ddesc_cmd = 0;
> +
> + mb = tx_pkts[idx];
>
> - tx_pkt = *tx_pkts++;
> RTE_MBUF_PREFETCH_TO_FREE(txe->mbuf);
>
> - ol_flags = tx_pkt->ol_flags;
> - tx_offload.l2_len = tx_pkt->l2_len;
> - tx_offload.l3_len = tx_pkt->l3_len;
> - tx_offload.l4_len = tx_pkt->l4_len;
> - tx_offload.tso_segsz = tx_pkt->tso_segsz;
> - /* Calculate the number of context descriptors needed. */
> - nb_ctx = iavf_calc_context_desc(ol_flags, txq->vlan_flag);
> + nb_desc_data = mb->nb_segs;
> + nb_desc_ctx = !!(mb->ol_flags &
> + (RTE_MBUF_F_TX_TCP_SEG | RTE_MBUF_F_TX_UDP_SEG |
> +RTE_MBUF_F_TX_TUNNEL_MASK));
>
> - /* The number of descriptors that must be allocated for
> + /**
> + * The number of descriptors that must be allocated for
> * a packet equals to the number of the segments of that
> - * packet plus 1 context descriptor if needed.
> + * packet plus the context and ipsec descriptors if needed.
> */
> - nb_used = (uint16_t)(tx_pkt->nb_segs + nb_ctx);
> - tx_last = (uint16_t)(tx_id + nb_used - 1);
> + nb_desc_required = nb_desc_data + nb_desc_ctx;
> +
> + desc_idx_last = (uint16_t)(desc_idx + nb_desc_required - 1);
>
> - /* Circular ring */
> - if (tx_last >= txq->nb_tx_desc)
> - tx_last = (uint16_t)(tx_last - txq->nb_tx_desc);
> + /* wrap descriptor ring */
> + if (desc_idx_last >= txq->nb_tx_desc)
> + desc_idx_last =
> + (uint16_t)(desc_idx_last - txq->nb_tx_desc);
>
> - PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u"
> - " tx_first=%u tx_last=%u",
> - txq->port_id, txq->queue_id, tx_id, tx_last);
> + PMD_TX_LOG(DEBUG,
> + "port_id=%u queue_id=%u tx_first=%u tx_last=%u",
> + txq->port_id, txq->queue_id, desc_idx, desc_idx_last);
>
> - if (nb_used > txq->nb_free) {
> + if (nb_desc_required > txq->nb_free) {
> if (iavf_xmit_cleanup(txq)) {
> - if (nb_tx == 0)
> + if (idx == 0)
> return 0;
> goto end_of_tx;
> }
> - if (unlikely(nb_used > txq->rs_thresh)) {
> - while (nb_used > txq->nb_free) {
> + if (unlikely(nb_desc_required > txq->rs_thresh)) {
> + while (nb_desc_required > txq->nb_free) {
> if (iavf_xmit_cleanup(txq)) {
> - if (nb_tx == 0)
> + if (idx == 0)
> return 0;
> goto end_of_tx;
> }
> @@ -2259,122 +2375,94 @@ iavf_xmit_pkts(void *tx_queue, struct rte_mbuf
> **tx_pkts, uint16_t nb_pkts)
> }
> }
>
> - /* Descriptor based VLAN insertion */
> - if (ol_flags & RTE_MBUF_F_TX_VLAN &&
> - txq->vlan_flag & IAVF_TX_FLAGS_VLAN_TAG_LOC_L2TAG1) {
> - td_cmd |= IAVF_TX_DESC_CMD_IL2TAG1;
> - td_tag = tx_pkt->vlan_tci;
> - }
> -
> - /* According to datasheet, the bit2 is reserved and must be
> - * set to 1.
> - */
> - td_cmd |= 0x04;
> -
> - /* Enable checksum offloading */
> - if (ol_flags & IAVF_TX_CKSUM_OFFLOAD_MASK)
> - iavf_txd_enable_checksum(ol_flags, &td_cmd,
> - &td_offset, tx_offload);
> + iavf_build_data_desc_cmd_offset_fields(&ddesc_template, mb);
>
> - if (nb_ctx) {
> /* Setup TX context descriptor if required */
> - uint64_t cd_type_cmd_tso_mss =
> - IAVF_TX_DESC_DTYPE_CONTEXT;
> - volatile struct iavf_tx_context_desc *ctx_txd =
> + if (nb_desc_ctx) {
> + volatile struct iavf_tx_context_desc *ctx_desc =
> (volatile struct iavf_tx_context_desc *)
> - &txr[tx_id];
> + &txr[desc_idx];
>
> /* clear QW0 or the previous writeback value
> * may impact next write
> */
> - *(volatile uint64_t *)ctx_txd = 0;
> + *(volatile uint64_t *)ctx_desc = 0;
>
> - txn = &sw_ring[txe->next_id];
> + txn = &txe_ring[txe->next_id];
> RTE_MBUF_PREFETCH_TO_FREE(txn->mbuf);
> +
> if (txe->mbuf) {
> rte_pktmbuf_free_seg(txe->mbuf);
> txe->mbuf = NULL;
> }
>
> - /* TSO enabled */
> - if (ol_flags & RTE_MBUF_F_TX_TCP_SEG)
> - cd_type_cmd_tso_mss |=
> - iavf_set_tso_ctx(tx_pkt, tx_offload);
> + iavf_fill_context_desc(ctx_desc, mb, &tlen);
> + IAVF_DUMP_TX_DESC(txq, ctx_desc, desc_idx);
>
> - if (ol_flags & RTE_MBUF_F_TX_VLAN &&
> - txq->vlan_flag & IAVF_TX_FLAGS_VLAN_TAG_LOC_L2TAG2)
> {
> - cd_type_cmd_tso_mss |= IAVF_TX_CTX_DESC_IL2TAG2
> - << IAVF_TXD_CTX_QW1_CMD_SHIFT;
> - cd_l2tag2 = tx_pkt->vlan_tci;
> + txe->last_id = desc_idx_last;
> + desc_idx = txe->next_id;
> + txe = txn;
> }
>
> - ctx_txd->type_cmd_tso_mss =
> - rte_cpu_to_le_64(cd_type_cmd_tso_mss);
> - ctx_txd->l2tag2 = rte_cpu_to_le_16(cd_l2tag2);
>
> - IAVF_DUMP_TX_DESC(txq, &txr[tx_id], tx_id);
> - txe->last_id = tx_last;
> - tx_id = txe->next_id;
> - txe = txn;
> - }
>
> - m_seg = tx_pkt;
> + mb_seg = mb;
> +
> do {
> - txd = &txr[tx_id];
> - txn = &sw_ring[txe->next_id];
> + ddesc = (volatile struct iavf_tx_desc *)
> + &txr[desc_idx];
> +
> + txn = &txe_ring[txe->next_id];
> + RTE_MBUF_PREFETCH_TO_FREE(txn->mbuf);
>
> if (txe->mbuf)
> rte_pktmbuf_free_seg(txe->mbuf);
> - txe->mbuf = m_seg;
> -
> - /* Setup TX Descriptor */
> - slen = m_seg->data_len;
> - buf_dma_addr = rte_mbuf_data_iova(m_seg);
> - txd->buffer_addr = rte_cpu_to_le_64(buf_dma_addr);
> - txd->cmd_type_offset_bsz = iavf_build_ctob(td_cmd,
> - td_offset,
> - slen,
> - td_tag);
> -
> - IAVF_DUMP_TX_DESC(txq, txd, tx_id);
> - txe->last_id = tx_last;
> - tx_id = txe->next_id;
> +
> + txe->mbuf = mb_seg;
> + iavf_fill_data_desc(ddesc, mb_seg,
> + ddesc_template, tlen, ipseclen);
> +
> + IAVF_DUMP_TX_DESC(txq, ddesc, desc_idx);
> +
> + txe->last_id = desc_idx_last;
> + desc_idx = txe->next_id;
> txe = txn;
> - m_seg = m_seg->next;
> - } while (m_seg);
> + mb_seg = mb_seg->next;
> + } while (mb_seg);
>
> /* The last packet data descriptor needs End Of Packet (EOP) */
> - td_cmd |= IAVF_TX_DESC_CMD_EOP;
> - txq->nb_used = (uint16_t)(txq->nb_used + nb_used);
> - txq->nb_free = (uint16_t)(txq->nb_free - nb_used);
> + ddesc_cmd = IAVF_TX_DESC_CMD_EOP;
> +
> + txq->nb_used = (uint16_t)(txq->nb_used + nb_desc_required);
> + txq->nb_free = (uint16_t)(txq->nb_free - nb_desc_required);
>
> if (txq->nb_used >= txq->rs_thresh) {
> PMD_TX_LOG(DEBUG, "Setting RS bit on TXD id="
> "%4u (port=%d queue=%d)",
> - tx_last, txq->port_id, txq->queue_id);
> + desc_idx_last, txq->port_id, txq->queue_id);
>
> - td_cmd |= IAVF_TX_DESC_CMD_RS;
> + ddesc_cmd |= IAVF_TX_DESC_CMD_RS;
>
> /* Update txq RS bit counters */
> txq->nb_used = 0;
> }
>
> - txd->cmd_type_offset_bsz |=
> - rte_cpu_to_le_64(((uint64_t)td_cmd) <<
> - IAVF_TXD_QW1_CMD_SHIFT);
> - IAVF_DUMP_TX_DESC(txq, txd, tx_id);
> + ddesc->cmd_type_offset_bsz |= rte_cpu_to_le_64(ddesc_cmd <<
> + IAVF_TXD_DATA_QW1_CMD_SHIFT);
> +
> + IAVF_DUMP_TX_DESC(txq, ddesc, desc_idx - 1);
> }
>
> end_of_tx:
> rte_wmb();
>
> PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u tx_tail=%u nb_tx=%u",
> - txq->port_id, txq->queue_id, tx_id, nb_tx);
> + txq->port_id, txq->queue_id, desc_idx, idx);
>
> - IAVF_PCI_REG_WC_WRITE_RELAXED(txq->qtx_tail, tx_id);
> - txq->tx_tail = tx_id;
> + IAVF_PCI_REG_WRITE_RELAXED(txq->qtx_tail, desc_idx);
> + txq->tx_tail = desc_idx;
>
> - return nb_tx;
> + return idx;
> }
>
> /* Check if the packet with vlan user priority is transmitted in the diff
> --git
> a/drivers/net/iavf/iavf_rxtx.h b/drivers/net/iavf/iavf_rxtx.h index
> 84351011f1..1da1278452 100644
> --- a/drivers/net/iavf/iavf_rxtx.h
> +++ b/drivers/net/iavf/iavf_rxtx.h
> @@ -403,6 +403,112 @@ enum iavf_rx_flex_desc_status_error_1_bits {
> IAVF_RX_FLEX_DESC_STATUS1_LAST /* this entry must be last!!! */ };
>
> +
> +#define IAVF_TXD_DATA_QW1_DTYPE_SHIFT (0)
> +#define IAVF_TXD_DATA_QW1_DTYPE_MASK (0xFUL <<
> IAVF_TXD_QW1_DTYPE_SHIFT)
> +
> +#define IAVF_TXD_DATA_QW1_CMD_SHIFT (4)
> +#define IAVF_TXD_DATA_QW1_CMD_MASK (0x3FFUL <<
> IAVF_TXD_DATA_QW1_CMD_SHIFT)
> +
> +#define IAVF_TXD_DATA_QW1_OFFSET_SHIFT (16)
> +#define IAVF_TXD_DATA_QW1_OFFSET_MASK (0x3FFFFULL << \
> + IAVF_TXD_DATA_QW1_OFFSET_SHIFT)
> +
> +#define IAVF_TXD_DATA_QW1_OFFSET_MACLEN_SHIFT
> (IAVF_TXD_DATA_QW1_OFFSET_SHIFT)
> +#define IAVF_TXD_DATA_QW1_OFFSET_MACLEN_MASK \
> + (0x7FUL << IAVF_TXD_DATA_QW1_OFFSET_MACLEN_SHIFT)
> +
> +#define IAVF_TXD_DATA_QW1_OFFSET_IPLEN_SHIFT \
> + (IAVF_TXD_DATA_QW1_OFFSET_SHIFT +
> IAVF_TX_DESC_LENGTH_IPLEN_SHIFT)
> +#define IAVF_TXD_DATA_QW1_OFFSET_IPLEN_MASK \
> + (0x7FUL << IAVF_TXD_DATA_QW1_OFFSET_IPLEN_SHIFT)
> +
> +#define IAVF_TXD_DATA_QW1_OFFSET_L4LEN_SHIFT \
> + (IAVF_TXD_DATA_QW1_OFFSET_SHIFT +
> IAVF_TX_DESC_LENGTH_L4_FC_LEN_SHIFT)
> +#define IAVF_TXD_DATA_QW1_OFFSET_L4LEN_MASK \
> + (0xFUL << IAVF_TXD_DATA_QW1_OFFSET_L4LEN_SHIFT)
> +
> +#define IAVF_TXD_DATA_QW1_MACLEN_MASK \
> + (0x7FUL << IAVF_TX_DESC_LENGTH_MACLEN_SHIFT)
> +#define IAVF_TXD_DATA_QW1_IPLEN_MASK \
> + (0x7FUL << IAVF_TX_DESC_LENGTH_IPLEN_SHIFT)
> +#define IAVF_TXD_DATA_QW1_L4LEN_MASK \
> + (0xFUL << IAVF_TX_DESC_LENGTH_L4_FC_LEN_SHIFT)
> +#define IAVF_TXD_DATA_QW1_FCLEN_MASK \
> + (0xFUL << IAVF_TX_DESC_LENGTH_L4_FC_LEN_SHIFT)
> +
> +#define IAVF_TXD_DATA_QW1_TX_BUF_SZ_SHIFT (34)
> +#define IAVF_TXD_DATA_QW1_TX_BUF_SZ_MASK \
> + (0x3FFFULL << IAVF_TXD_DATA_QW1_TX_BUF_SZ_SHIFT)
> +
> +#define IAVF_TXD_DATA_QW1_L2TAG1_SHIFT (48)
> +#define IAVF_TXD_DATA_QW1_L2TAG1_MASK \
> + (0xFFFFULL << IAVF_TXD_DATA_QW1_L2TAG1_SHIFT)
> +
> +#define IAVF_TXD_CTX_QW1_IPSEC_PARAMS_CIPHERBLK_SHIFT (11)
> +#define IAVF_TXD_CTX_QW1_IPSEC_PARAMS_CIPHERBLK_MASK \
> + (0x7UL << IAVF_TXD_CTX_QW1_IPSEC_PARAMS_CIPHERBLK_SHIFT)
> +
> +#define IAVF_TXD_CTX_QW1_IPSEC_PARAMS_ICVLEN_SHIFT (14)
> +#define IAVF_TXD_CTX_QW1_IPSEC_PARAMS_ICVLEN_MASK \
> + (0xFUL << IAVF_TXD_CTX_QW1_IPSEC_PARAMS_ICVLEN_SHIFT)
> +
> +#define IAVF_TXD_CTX_QW1_SEG_PARAMS_TLEN_SHIFT (30)
> +#define IAVF_TXD_CTX_QW1_SEG_PARAMS_TLEN_MASK \
> + (0x3FFFFUL << IAVF_TXD_CTX_QW1_SEG_PARAMS_TLEN_SHIFT)
> +
> +#define IAVF_TXD_CTX_QW1_TSYNC_PARAMS_TLEN_SHIFT (30)
> +#define IAVF_TXD_CTX_QW1_TSYNC_PARAMS_TLEN_MASK \
> + (0x3FUL << IAVF_TXD_CTX_QW1_SEG_PARAMS_TLEN_SHIFT)
> +
> +#define IAVF_TXD_CTX_QW1_SEG_PARAMS_MSS_SHIFT (50)
> +#define IAVF_TXD_CTX_QW1_SEG_PARAMS_MSS_MASK \
> + (0x3FFFUL << IAVF_TXD_CTX_QW1_SEG_PARAMS_MSS_SHIFT)
> +
> +#define IAVF_TXD_CTX_QW0_TUN_PARAMS_EIPT_SHIFT (0)
> +#define IAVF_TXD_CTX_QW0_TUN_PARAMS_EIPT_MASK (0x3UL)
> +
> +enum iavf_tx_ctx_desc_tunnel_external_ip_type {
> + IAVF_TX_CTX_DESC_EIPT_NONE,
> + IAVF_TX_CTX_DESC_EIPT_IPV6,
> + IAVF_TX_CTX_DESC_EIPT_IPV4_NO_CHECKSUM_OFFLOAD,
> + IAVF_TX_CTX_DESC_EIPT_IPV4_CHECKSUM_OFFLOAD
> +};
> +
> +#define IAVF_TXD_CTX_QW0_TUN_PARAMS_EIPLEN_SHIFT (2)
> +#define IAVF_TXD_CTX_QW0_TUN_PARAMS_EIPLEN_MASK (0x7FUL)
> +
> +#define IAVF_TXD_CTX_QW0_TUN_PARAMS_L4TUNT_SHIFT (9)
> +#define IAVF_TXD_CTX_QW0_TUN_PARAMS_L4TUNT_MASK (0x3UL)
> +
> +enum iavf_tx_ctx_desc_tunnel_l4_tunnel_type {
> + IAVF_TX_CTX_DESC_L4_TUN_TYP_NO_UDP_GRE,
> + IAVF_TX_CTX_DESC_L4_TUN_TYP_UDP,
> + IAVF_TX_CTX_DESC_L4_TUN_TYP_GRE
> +};
> +
> +#define IAVF_TXD_CTX_QW0_TUN_PARAMS_EIP_NOINC_SHIFT (11)
> +#define IAVF_TXD_CTX_QW0_TUN_PARAMS_EIP_NOINC_MASK (0x1UL)
> +
> +#define IAVF_TXD_CTX_QW0_TUN_PARAMS_L4TUNLEN_SHIFT (12)
> +#define IAVF_TXD_CTX_QW0_TUN_PARAMS_L4TUNLEN_MASK (0x7FUL)
> +
> +#define IAVF_TXD_CTX_QW0_TUN_PARAMS_DECTTL_SHIFT (19)
> +#define IAVF_TXD_CTX_QW0_TUN_PARAMS_DECTTL_MASK (0xFUL)
> +
> +#define IAVF_TXD_CTX_QW0_TUN_PARAMS_L4T_CS_SHIFT (23)
> +#define IAVF_TXD_CTX_QW0_TUN_PARAMS_L4T_CS_MASK (0x1UL)
> +
> +#define IAVF_TXD_CTX_QW0_L2TAG2_PARAM (32)
> +#define IAVF_TXD_CTX_QW0_L2TAG2_MASK (0xFFFFUL)
> +
> +
> +#define IAVF_RX_FLEX_DESC_IPSEC_CRYPTO_SAID_MASK (0xFFFFF)
> +
> +/* for iavf_32b_rx_flex_desc.ptype_flex_flags0 member */
> +#define IAVF_RX_FLEX_DESC_PTYPE_M (0x3FF) /* 10-bits */
> +
> +
> /* for iavf_32b_rx_flex_desc.ptype_flex_flags0 member */
> #define IAVF_RX_FLEX_DESC_PTYPE_M (0x3FF) /* 10-bits */
>
> @@ -553,9 +659,10 @@ void iavf_dump_tx_descriptor(const struct
> iavf_tx_queue *txq,
> const volatile struct iavf_tx_desc *tx_desc = desc;
> enum iavf_tx_desc_dtype_value type;
>
> - type = (enum iavf_tx_desc_dtype_value)rte_le_to_cpu_64(
> - tx_desc->cmd_type_offset_bsz &
> - rte_cpu_to_le_64(IAVF_TXD_QW1_DTYPE_MASK));
> +
> + type = (enum iavf_tx_desc_dtype_value)
> + rte_le_to_cpu_64(tx_desc->cmd_type_offset_bsz &
> + rte_cpu_to_le_64(IAVF_TXD_DATA_QW1_DTYPE_MASK));
> switch (type) {
> case IAVF_TX_DESC_DTYPE_DATA:
> name = "Tx_data_desc";
> @@ -569,8 +676,8 @@ void iavf_dump_tx_descriptor(const struct
> iavf_tx_queue *txq,
> }
>
> printf("Queue %d %s %d: QW0: 0x%016"PRIx64" QW1:
> 0x%016"PRIx64"\n",
> - txq->queue_id, name, tx_id, tx_desc->buffer_addr,
> - tx_desc->cmd_type_offset_bsz);
> + txq->queue_id, name, tx_id, tx_desc->buffer_addr,
> + tx_desc->cmd_type_offset_bsz);
> }
>
> #define FDIR_PROC_ENABLE_PER_QUEUE(ad, on) do { \ diff --git
> a/drivers/net/iavf/iavf_rxtx_vec_sse.c b/drivers/net/iavf/iavf_rxtx_vec_sse.c
> index d4f4d705b7..6d42ae9373 100644
> --- a/drivers/net/iavf/iavf_rxtx_vec_sse.c
> +++ b/drivers/net/iavf/iavf_rxtx_vec_sse.c
> @@ -363,10 +363,12 @@ static inline void flex_desc_to_ptype_v(__m128i
> descs[4], struct rte_mbuf **rx_pkts,
> const uint32_t *type_table)
> {
> - const __m128i ptype_mask = _mm_set_epi16(0,
> IAVF_RX_FLEX_DESC_PTYPE_M,
> - 0, IAVF_RX_FLEX_DESC_PTYPE_M,
> - 0, IAVF_RX_FLEX_DESC_PTYPE_M,
> - 0, IAVF_RX_FLEX_DESC_PTYPE_M);
> + const __m128i ptype_mask = _mm_set_epi16(
> + IAVF_RX_FLEX_DESC_PTYPE_M, 0x0,
> + IAVF_RX_FLEX_DESC_PTYPE_M, 0x0,
> + IAVF_RX_FLEX_DESC_PTYPE_M, 0x0,
> + IAVF_RX_FLEX_DESC_PTYPE_M, 0x0);
> +
> __m128i ptype_01 = _mm_unpacklo_epi32(descs[0], descs[1]);
> __m128i ptype_23 = _mm_unpacklo_epi32(descs[2], descs[3]);
> __m128i ptype_all = _mm_unpacklo_epi64(ptype_01, ptype_23);
> --
> 2.25.1