From: Nithin Dabilpuram <ndabilpu...@marvell.com>

Add multi-seg support for Inline IPsec.
Also in reassembly, FI_PAD is not required to compute pointer to
Fragment info because it is only at CPT_PARSE_HDR_S + FI_OFFSET * 8
and is always 8B aligned.

Signed-off-by: Nithin Dabilpuram <ndabilpu...@marvell.com>
Signed-off-by: Rahul Bhansali <rbhans...@marvell.com>
---
 drivers/net/cnxk/cn10k_rx.h |  40 +++++---
 drivers/net/cnxk/cn10k_tx.h | 181 ++++++++++++++++++++++++++----------
 2 files changed, 159 insertions(+), 62 deletions(-)

diff --git a/drivers/net/cnxk/cn10k_rx.h b/drivers/net/cnxk/cn10k_rx.h
index 5ecb20f038..8501ae9439 100644
--- a/drivers/net/cnxk/cn10k_rx.h
+++ b/drivers/net/cnxk/cn10k_rx.h
@@ -171,7 +171,7 @@ nix_sec_attach_frags(const struct cpt_parse_hdr_s *hdr,
 
        /* offset of 0 implies 256B, otherwise it implies offset*8B */
        offset = (((offset - 1) & 0x1f) + 1) * 8;
-       finfo = RTE_PTR_ADD(hdr, offset + hdr->w2.fi_pad);
+       finfo = RTE_PTR_ADD(hdr, offset);
 
        /* Frag-0: */
        wqe = (uint64_t *)(rte_be_to_cpu_64(hdr->wqe_ptr));
@@ -300,7 +300,7 @@ nix_sec_reassemble_frags(const struct cpt_parse_hdr_s *hdr, 
uint64_t cq_w1,
 
        /* offset of 0 implies 256B, otherwise it implies offset*8B */
        offset = (((offset - 1) & 0x1f) + 1) * 8;
-       finfo = RTE_PTR_ADD(hdr, offset + hdr->w2.fi_pad);
+       finfo = RTE_PTR_ADD(hdr, offset);
 
        /* Frag-0: */
        wqe = (uint64_t *)rte_be_to_cpu_64(hdr->wqe_ptr);
@@ -685,20 +685,32 @@ nix_cqe_xtract_mseg(const union nix_rx_parse_u *rx, 
struct rte_mbuf *mbuf,
        struct rte_mbuf *head;
        const rte_iova_t *eol;
        uint8_t nb_segs;
+       uint64_t cq_w1;
+       int64_t len;
        uint64_t sg;
 
+       cq_w1 = *(const uint64_t *)rx;
+       /* Use inner rx parse for meta pkts sg list */
+       if (cq_w1 & BIT(11) && flags & NIX_RX_OFFLOAD_SECURITY_F) {
+               const uint64_t *wqe = (const uint64_t *)(mbuf + 1);
+               rx = (const union nix_rx_parse_u *)(wqe + 1);
+       }
+
        sg = *(const uint64_t *)(rx + 1);
        nb_segs = (sg >> 48) & 0x3;
 
-       if (nb_segs == 1 && !(flags & NIX_RX_SEC_REASSEMBLY_F)) {
-               mbuf->next = NULL;
+       if (nb_segs == 1)
                return;
-       }
 
-       mbuf->pkt_len = (rx->pkt_lenm1 + 1) - (flags & NIX_RX_OFFLOAD_TSTAMP_F ?
-                                              CNXK_NIX_TIMESYNC_RX_OFFSET : 0);
-       mbuf->data_len = (sg & 0xFFFF) - (flags & NIX_RX_OFFLOAD_TSTAMP_F ?
-                                         CNXK_NIX_TIMESYNC_RX_OFFSET : 0);
+       /* For security we have already updated right pkt_len */
+       if (cq_w1 & BIT(11) && flags & NIX_RX_OFFLOAD_SECURITY_F)
+               len = mbuf->pkt_len;
+       else
+               len = rx->pkt_lenm1 + 1;
+       mbuf->pkt_len = len - (flags & NIX_RX_OFFLOAD_TSTAMP_F ? 
CNXK_NIX_TIMESYNC_RX_OFFSET : 0);
+       mbuf->data_len =
+               (sg & 0xFFFF) - (flags & NIX_RX_OFFLOAD_TSTAMP_F ? 
CNXK_NIX_TIMESYNC_RX_OFFSET : 0);
+       len -= mbuf->data_len;
        mbuf->nb_segs = nb_segs;
        sg = sg >> 16;
 
@@ -717,6 +729,7 @@ nix_cqe_xtract_mseg(const union nix_rx_parse_u *rx, struct 
rte_mbuf *mbuf,
                RTE_MEMPOOL_CHECK_COOKIES(mbuf->pool, (void **)&mbuf, 1, 1);
 
                mbuf->data_len = sg & 0xFFFF;
+               len -= sg & 0XFFFF;
                sg = sg >> 16;
                *(uint64_t *)(&mbuf->rearm_data) = rearm;
                nb_segs--;
@@ -729,7 +742,10 @@ nix_cqe_xtract_mseg(const union nix_rx_parse_u *rx, struct 
rte_mbuf *mbuf,
                        iova_list = (const rte_iova_t *)(iova_list + 1);
                }
        }
-       mbuf->next = NULL;
+
+       /* Adjust last mbuf data length with negative offset for security pkts 
if needed */
+       if (cq_w1 & BIT(11) && flags & NIX_RX_OFFLOAD_SECURITY_F && len < 0)
+               mbuf->data_len += len;
 }
 
 static __rte_always_inline void
@@ -787,9 +803,9 @@ cn10k_nix_cqe_to_mbuf(const struct nix_cqe_hdr_s *cq, const 
uint32_t tag,
                 * For multi segment packets, mbuf length correction according
                 * to Rx timestamp length will be handled later during
                 * timestamp data process.
-                * Hence, flag argument is not required.
+                * Hence, timestamp flag argument is not required.
                 */
-               nix_cqe_xtract_mseg(rx, mbuf, val, 0);
+               nix_cqe_xtract_mseg(rx, mbuf, val, flag & 
~NIX_RX_OFFLOAD_TSTAMP_F);
 }
 
 static inline uint16_t
diff --git a/drivers/net/cnxk/cn10k_tx.h b/drivers/net/cnxk/cn10k_tx.h
index ea13866b20..2be5ecdf5e 100644
--- a/drivers/net/cnxk/cn10k_tx.h
+++ b/drivers/net/cnxk/cn10k_tx.h
@@ -282,7 +282,7 @@ cn10k_nix_prep_sec_vec(struct rte_mbuf *m, uint64x2_t 
*cmd0, uint64x2_t *cmd1,
        uint8_t l2_len, l3_len;
        uintptr_t dptr, nixtx;
        uint64_t ucode_cmd[4];
-       uint64_t *laddr;
+       uint64_t *laddr, w0;
        uint16_t tag;
        uint64_t sa;
 
@@ -329,30 +329,57 @@ cn10k_nix_prep_sec_vec(struct rte_mbuf *m, uint64x2_t 
*cmd0, uint64x2_t *cmd1,
 
        /* Update send descriptors. Security is single segment only */
        *cmd0 = vsetq_lane_u16(pkt_len + dlen_adj, *cmd0, 0);
-       *cmd1 = vsetq_lane_u16(pkt_len + dlen_adj, *cmd1, 0);
 
-       /* Get area where NIX descriptor needs to be stored */
-       nixtx = dptr + pkt_len + dlen_adj;
-       nixtx += BIT_ULL(7);
-       nixtx = (nixtx - 1) & ~(BIT_ULL(7) - 1);
+       /* CPT word 5 and word 6 */
+       w0 = 0;
+       ucode_cmd[2] = 0;
+       if (flags & NIX_TX_MULTI_SEG_F && m->nb_segs > 1) {
+               struct rte_mbuf *last = rte_pktmbuf_lastseg(m);
+
+               /* Get area where NIX descriptor needs to be stored */
+               nixtx = rte_pktmbuf_mtod_offset(last, uintptr_t, last->data_len 
+ dlen_adj);
+               nixtx += BIT_ULL(7);
+               nixtx = (nixtx - 1) & ~(BIT_ULL(7) - 1);
+               nixtx += 16;
+
+               dptr = nixtx + ((flags & NIX_TX_NEED_EXT_HDR) ? 32 : 16);
+
+               /* Set l2 length as data offset */
+               w0 = (uint64_t)l2_len << 16;
+               w0 |= cn10k_nix_tx_ext_subs(flags) + 
NIX_NB_SEGS_TO_SEGDW(m->nb_segs);
+               ucode_cmd[1] = dptr | ((uint64_t)m->nb_segs << 60);
+       } else {
+               /* Get area where NIX descriptor needs to be stored */
+               nixtx = dptr + pkt_len + dlen_adj;
+               nixtx += BIT_ULL(7);
+               nixtx = (nixtx - 1) & ~(BIT_ULL(7) - 1);
+               nixtx += 16;
+
+               w0 |= cn10k_nix_tx_ext_subs(flags) + 1;
+               dptr += l2_len;
+               ucode_cmd[1] = dptr;
+               *cmd1 = vsetq_lane_u16(pkt_len + dlen_adj, *cmd1, 0);
+               /* DLEN passed is excluding L2 HDR */
+               pkt_len -= l2_len;
+       }
+       w0 |= nixtx;
+       /* CPT word 0 and 1 */
+       cmd01 = vdupq_n_u64(0);
+       cmd01 = vsetq_lane_u64(w0, cmd01, 0);
+       /* CPT_RES_S is 16B above NIXTX */
+       cmd01 = vsetq_lane_u64(nixtx - 16, cmd01, 1);
 
        /* Return nixtx addr */
-       *nixtx_addr = (nixtx + 16);
+       *nixtx_addr = nixtx;
 
-       /* DLEN passed is excluding L2HDR */
-       pkt_len -= l2_len;
+       /* CPT Word 4 and Word 7 */
        tag = sa_base & 0xFFFFUL;
        sa_base &= ~0xFFFFUL;
        sa = (uintptr_t)roc_nix_inl_ot_ipsec_outb_sa(sa_base, sess_priv.sa_idx);
        ucode_cmd[3] = (ROC_CPT_DFLT_ENG_GRP_SE_IE << 61 | 1UL << 60 | sa);
-       ucode_cmd[0] = (ROC_IE_OT_MAJOR_OP_PROCESS_OUTBOUND_IPSEC << 48 |
-                       ((uint64_t)sess_priv.chksum) << 32 |
-                       ((uint64_t)sess_priv.dec_ttl) << 34 | pkt_len);
-
-       /* CPT Word 0 and Word 1 */
-       cmd01 = vdupq_n_u64((nixtx + 16) | (cn10k_nix_tx_ext_subs(flags) + 1));
-       /* CPT_RES_S is 16B above NIXTX */
-       cmd01 = vsetq_lane_u8(nixtx & BIT_ULL(7), cmd01, 8);
+       ucode_cmd[0] = (ROC_IE_OT_MAJOR_OP_PROCESS_OUTBOUND_IPSEC << 48 | 1UL 
<< 54 |
+                       ((uint64_t)sess_priv.chksum) << 32 | 
((uint64_t)sess_priv.dec_ttl) << 34 |
+                       pkt_len);
 
        /* CPT word 2 and 3 */
        cmd23 = vdupq_n_u64(0);
@@ -371,9 +398,6 @@ cn10k_nix_prep_sec_vec(struct rte_mbuf *m, uint64x2_t 
*cmd0, uint64x2_t *cmd1,
                                rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV6);
        }
 
-       ucode_cmd[1] = dptr;
-       ucode_cmd[2] = dptr;
-
        /* Move to our line */
        laddr = LMT_OFF(lbase, *lnum, *loff ? 64 : 0);
 
@@ -404,7 +428,7 @@ cn10k_nix_prep_sec(struct rte_mbuf *m, uint64_t *cmd, 
uintptr_t *nixtx_addr,
        uint8_t l2_len, l3_len;
        uintptr_t dptr, nixtx;
        uint64_t ucode_cmd[4];
-       uint64_t *laddr;
+       uint64_t *laddr, w0;
        uint16_t tag;
        uint64_t sa;
 
@@ -457,30 +481,56 @@ cn10k_nix_prep_sec(struct rte_mbuf *m, uint64_t *cmd, 
uintptr_t *nixtx_addr,
 
        /* Update send descriptors. Security is single segment only */
        send_hdr->w0.total = pkt_len + dlen_adj;
-       sg->seg1_size = pkt_len + dlen_adj;
 
-       /* Get area where NIX descriptor needs to be stored */
-       nixtx = dptr + pkt_len + dlen_adj;
-       nixtx += BIT_ULL(7);
-       nixtx = (nixtx - 1) & ~(BIT_ULL(7) - 1);
+       /* CPT word 5 and word 6 */
+       w0 = 0;
+       ucode_cmd[2] = 0;
+       if (flags & NIX_TX_MULTI_SEG_F && m->nb_segs > 1) {
+               struct rte_mbuf *last = rte_pktmbuf_lastseg(m);
+
+               /* Get area where NIX descriptor needs to be stored */
+               nixtx = rte_pktmbuf_mtod_offset(last, uintptr_t, last->data_len 
+ dlen_adj);
+               nixtx += BIT_ULL(7);
+               nixtx = (nixtx - 1) & ~(BIT_ULL(7) - 1);
+               nixtx += 16;
+
+               dptr = nixtx + ((flags & NIX_TX_NEED_EXT_HDR) ? 32 : 16);
+
+               /* Set l2 length as data offset */
+               w0 = (uint64_t)l2_len << 16;
+               w0 |= cn10k_nix_tx_ext_subs(flags) + 
NIX_NB_SEGS_TO_SEGDW(m->nb_segs);
+               ucode_cmd[1] = dptr | ((uint64_t)m->nb_segs << 60);
+       } else {
+               /* Get area where NIX descriptor needs to be stored */
+               nixtx = dptr + pkt_len + dlen_adj;
+               nixtx += BIT_ULL(7);
+               nixtx = (nixtx - 1) & ~(BIT_ULL(7) - 1);
+               nixtx += 16;
+
+               w0 |= cn10k_nix_tx_ext_subs(flags) + 1;
+               dptr += l2_len;
+               ucode_cmd[1] = dptr;
+               sg->seg1_size = pkt_len + dlen_adj;
+               pkt_len -= l2_len;
+       }
+       w0 |= nixtx;
+       /* CPT word 0 and 1 */
+       cmd01 = vdupq_n_u64(0);
+       cmd01 = vsetq_lane_u64(w0, cmd01, 0);
+       /* CPT_RES_S is 16B above NIXTX */
+       cmd01 = vsetq_lane_u64(nixtx - 16, cmd01, 1);
 
        /* Return nixtx addr */
-       *nixtx_addr = (nixtx + 16);
+       *nixtx_addr = nixtx;
 
-       /* DLEN passed is excluding L2HDR */
-       pkt_len -= l2_len;
+       /* CPT Word 4 and Word 7 */
        tag = sa_base & 0xFFFFUL;
        sa_base &= ~0xFFFFUL;
        sa = (uintptr_t)roc_nix_inl_ot_ipsec_outb_sa(sa_base, sess_priv.sa_idx);
        ucode_cmd[3] = (ROC_CPT_DFLT_ENG_GRP_SE_IE << 61 | 1UL << 60 | sa);
-       ucode_cmd[0] = (ROC_IE_OT_MAJOR_OP_PROCESS_OUTBOUND_IPSEC << 48 |
-                       ((uint64_t)sess_priv.chksum) << 32 |
-                       ((uint64_t)sess_priv.dec_ttl) << 34 | pkt_len);
-
-       /* CPT Word 0 and Word 1. Assume no multi-seg support */
-       cmd01 = vdupq_n_u64((nixtx + 16) | (cn10k_nix_tx_ext_subs(flags) + 1));
-       /* CPT_RES_S is 16B above NIXTX */
-       cmd01 = vsetq_lane_u8(nixtx & BIT_ULL(7), cmd01, 8);
+       ucode_cmd[0] = (ROC_IE_OT_MAJOR_OP_PROCESS_OUTBOUND_IPSEC << 48 | 1UL 
<< 54 |
+                       ((uint64_t)sess_priv.chksum) << 32 | 
((uint64_t)sess_priv.dec_ttl) << 34 |
+                       pkt_len);
 
        /* CPT word 2 and 3 */
        cmd23 = vdupq_n_u64(0);
@@ -498,8 +548,6 @@ cn10k_nix_prep_sec(struct rte_mbuf *m, uint64_t *cmd, 
uintptr_t *nixtx_addr,
                        *((uint16_t *)(dptr - 2)) =
                                rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV6);
        }
-       ucode_cmd[1] = dptr;
-       ucode_cmd[2] = dptr;
 
        /* Move to our line */
        laddr = LMT_OFF(lbase, *lnum, *loff ? 64 : 0);
@@ -858,6 +906,8 @@ cn10k_nix_prepare_mseg(struct rte_mbuf *m, uint64_t *cmd, 
const uint16_t flags)
        union nix_send_sg_s *sg;
        struct rte_mbuf *m_next;
        uint64_t *slist, sg_u;
+       uint64_t len, dlen;
+       uint64_t ol_flags;
        uint64_t nb_segs;
        uint64_t segdw;
        uint8_t off, i;
@@ -870,10 +920,14 @@ cn10k_nix_prepare_mseg(struct rte_mbuf *m, uint64_t *cmd, 
const uint16_t flags)
                off = 0;
 
        sg = (union nix_send_sg_s *)&cmd[2 + off];
+       len = send_hdr->w0.total;
+       if (flags & NIX_TX_OFFLOAD_SECURITY_F)
+               ol_flags = m->ol_flags;
 
        /* Start from second segment, first segment is already there */
        i = 1;
        sg_u = sg->u;
+       len -= sg_u & 0xFFFF;
        nb_segs = m->nb_segs - 1;
        m_next = m->next;
        slist = &cmd[3 + off + 1];
@@ -888,6 +942,7 @@ cn10k_nix_prepare_mseg(struct rte_mbuf *m, uint64_t *cmd, 
const uint16_t flags)
                RTE_MEMPOOL_CHECK_COOKIES(m->pool, (void **)&m, 1, 0);
        rte_io_wmb();
 #endif
+       m->next = NULL;
        m = m_next;
        if (!m)
                goto done;
@@ -895,7 +950,9 @@ cn10k_nix_prepare_mseg(struct rte_mbuf *m, uint64_t *cmd, 
const uint16_t flags)
        /* Fill mbuf segments */
        do {
                m_next = m->next;
-               sg_u = sg_u | ((uint64_t)m->data_len << (i << 4));
+               dlen = m->data_len;
+               len -= dlen;
+               sg_u = sg_u | ((uint64_t)dlen << (i << 4));
                *slist = rte_mbuf_data_iova(m);
                /* Set invert df if buffer is not to be freed by H/W */
                if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)
@@ -919,10 +976,20 @@ cn10k_nix_prepare_mseg(struct rte_mbuf *m, uint64_t *cmd, 
const uint16_t flags)
                        sg_u = sg->u;
                        slist++;
                }
+               m->next = NULL;
                m = m_next;
        } while (nb_segs);
 
 done:
+       /* Add remaining bytes of security data to last seg */
+       if (flags & NIX_TX_OFFLOAD_SECURITY_F && ol_flags & 
RTE_MBUF_F_TX_SEC_OFFLOAD && len) {
+               uint8_t shft = ((i - 1) << 4);
+
+               dlen = ((sg_u >> shft) & 0xFFFFULL) + len;
+               sg_u = sg_u & ~(0xFFFFULL << shft);
+               sg_u |= dlen << shft;
+       }
+
        sg->u = sg_u;
        sg->segs = i;
        segdw = (uint64_t *)slist - (uint64_t *)&cmd[2 + off];
@@ -1266,17 +1333,26 @@ cn10k_nix_prepare_mseg_vec_list(struct rte_mbuf *m, 
uint64_t *cmd,
                                union nix_send_sg_s *sg, const uint32_t flags)
 {
        struct rte_mbuf *m_next;
+       uint64_t ol_flags, len;
        uint64_t *slist, sg_u;
        uint16_t nb_segs;
+       uint64_t dlen;
        int i = 1;
 
-       sh->total = m->pkt_len;
+       len = m->pkt_len;
+       ol_flags = m->ol_flags;
+       /* For security we would have already populated the right length */
+       if (flags & NIX_TX_OFFLOAD_SECURITY_F && ol_flags & 
RTE_MBUF_F_TX_SEC_OFFLOAD)
+               len = sh->total;
+       sh->total = len;
        /* Clear sg->u header before use */
        sg->u &= 0xFC00000000000000;
        sg_u = sg->u;
        slist = &cmd[0];
 
-       sg_u = sg_u | ((uint64_t)m->data_len);
+       dlen = m->data_len;
+       len -= dlen;
+       sg_u = sg_u | ((uint64_t)dlen);
 
        nb_segs = m->nb_segs - 1;
        m_next = m->next;
@@ -1291,11 +1367,14 @@ cn10k_nix_prepare_mseg_vec_list(struct rte_mbuf *m, 
uint64_t *cmd,
        rte_io_wmb();
 #endif
 
+       m->next = NULL;
        m = m_next;
        /* Fill mbuf segments */
        do {
                m_next = m->next;
-               sg_u = sg_u | ((uint64_t)m->data_len << (i << 4));
+               dlen = m->data_len;
+               len -= dlen;
+               sg_u = sg_u | ((uint64_t)dlen << (i << 4));
                *slist = rte_mbuf_data_iova(m);
                /* Set invert df if buffer is not to be freed by H/W */
                if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)
@@ -1320,9 +1399,18 @@ cn10k_nix_prepare_mseg_vec_list(struct rte_mbuf *m, 
uint64_t *cmd,
                        sg_u = sg->u;
                        slist++;
                }
+               m->next = NULL;
                m = m_next;
        } while (nb_segs);
 
+       /* Add remaining bytes of security data to last seg */
+       if (flags & NIX_TX_OFFLOAD_SECURITY_F && ol_flags & 
RTE_MBUF_F_TX_SEC_OFFLOAD && len) {
+               uint8_t shft = ((i - 1) << 4);
+
+               dlen = ((sg_u >> shft) & 0xFFFF) + len;
+               sg_u = sg_u & ~(0xFFFFULL << shft);
+               sg_u |= dlen << shft;
+       }
        sg->u = sg_u;
        sg->segs = i;
 }
@@ -2689,13 +2777,6 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, uint64_t *ws,
                        lnum += 1;
                }
 
-               if (flags & NIX_TX_MULTI_SEG_F) {
-                       tx_pkts[0]->next = NULL;
-                       tx_pkts[1]->next = NULL;
-                       tx_pkts[2]->next = NULL;
-                       tx_pkts[3]->next = NULL;
-               }
-
                tx_pkts = tx_pkts + NIX_DESCS_PER_LOOP;
        }
 
-- 
2.25.1

Reply via email to