On Tue, Aug 30, 2022 at 4:45 PM Rahul Bhansali <rbhans...@marvell.com> wrote: > > From: Nithin Dabilpuram <ndabilpu...@marvell.com> > > Add multi-seg support for Inline IPsec. > Also in reassembly, FI_PAD is not required to compute pointer to > Fragment info because it is only at CPT_PARSE_HDR_S + FI_OFFSET * 8 > and is always 8B aligned. > > Signed-off-by: Nithin Dabilpuram <ndabilpu...@marvell.com> > Signed-off-by: Rahul Bhansali <rbhans...@marvell.com>
Applied to dpdk-next-net-mrvl/for-next-net. Thanks > --- > drivers/net/cnxk/cn10k_rx.h | 40 +++++--- > drivers/net/cnxk/cn10k_tx.h | 181 ++++++++++++++++++++++++++---------- > 2 files changed, 159 insertions(+), 62 deletions(-) > > diff --git a/drivers/net/cnxk/cn10k_rx.h b/drivers/net/cnxk/cn10k_rx.h > index 5ecb20f038..8501ae9439 100644 > --- a/drivers/net/cnxk/cn10k_rx.h > +++ b/drivers/net/cnxk/cn10k_rx.h > @@ -171,7 +171,7 @@ nix_sec_attach_frags(const struct cpt_parse_hdr_s *hdr, > > /* offset of 0 implies 256B, otherwise it implies offset*8B */ > offset = (((offset - 1) & 0x1f) + 1) * 8; > - finfo = RTE_PTR_ADD(hdr, offset + hdr->w2.fi_pad); > + finfo = RTE_PTR_ADD(hdr, offset); > > /* Frag-0: */ > wqe = (uint64_t *)(rte_be_to_cpu_64(hdr->wqe_ptr)); > @@ -300,7 +300,7 @@ nix_sec_reassemble_frags(const struct cpt_parse_hdr_s > *hdr, uint64_t cq_w1, > > /* offset of 0 implies 256B, otherwise it implies offset*8B */ > offset = (((offset - 1) & 0x1f) + 1) * 8; > - finfo = RTE_PTR_ADD(hdr, offset + hdr->w2.fi_pad); > + finfo = RTE_PTR_ADD(hdr, offset); > > /* Frag-0: */ > wqe = (uint64_t *)rte_be_to_cpu_64(hdr->wqe_ptr); > @@ -685,20 +685,32 @@ nix_cqe_xtract_mseg(const union nix_rx_parse_u *rx, > struct rte_mbuf *mbuf, > struct rte_mbuf *head; > const rte_iova_t *eol; > uint8_t nb_segs; > + uint64_t cq_w1; > + int64_t len; > uint64_t sg; > > + cq_w1 = *(const uint64_t *)rx; > + /* Use inner rx parse for meta pkts sg list */ > + if (cq_w1 & BIT(11) && flags & NIX_RX_OFFLOAD_SECURITY_F) { > + const uint64_t *wqe = (const uint64_t *)(mbuf + 1); > + rx = (const union nix_rx_parse_u *)(wqe + 1); > + } > + > sg = *(const uint64_t *)(rx + 1); > nb_segs = (sg >> 48) & 0x3; > > - if (nb_segs == 1 && !(flags & NIX_RX_SEC_REASSEMBLY_F)) { > - mbuf->next = NULL; > + if (nb_segs == 1) > return; > - } > > - mbuf->pkt_len = (rx->pkt_lenm1 + 1) - (flags & > NIX_RX_OFFLOAD_TSTAMP_F ? > - CNXK_NIX_TIMESYNC_RX_OFFSET : > 0); > - mbuf->data_len = (sg & 0xFFFF) - (flags & NIX_RX_OFFLOAD_TSTAMP_F ? > - CNXK_NIX_TIMESYNC_RX_OFFSET : 0); > + /* For security we have already updated right pkt_len */ > + if (cq_w1 & BIT(11) && flags & NIX_RX_OFFLOAD_SECURITY_F) > + len = mbuf->pkt_len; > + else > + len = rx->pkt_lenm1 + 1; > + mbuf->pkt_len = len - (flags & NIX_RX_OFFLOAD_TSTAMP_F ? > CNXK_NIX_TIMESYNC_RX_OFFSET : 0); > + mbuf->data_len = > + (sg & 0xFFFF) - (flags & NIX_RX_OFFLOAD_TSTAMP_F ? > CNXK_NIX_TIMESYNC_RX_OFFSET : 0); > + len -= mbuf->data_len; > mbuf->nb_segs = nb_segs; > sg = sg >> 16; > > @@ -717,6 +729,7 @@ nix_cqe_xtract_mseg(const union nix_rx_parse_u *rx, > struct rte_mbuf *mbuf, > RTE_MEMPOOL_CHECK_COOKIES(mbuf->pool, (void **)&mbuf, 1, 1); > > mbuf->data_len = sg & 0xFFFF; > + len -= sg & 0XFFFF; > sg = sg >> 16; > *(uint64_t *)(&mbuf->rearm_data) = rearm; > nb_segs--; > @@ -729,7 +742,10 @@ nix_cqe_xtract_mseg(const union nix_rx_parse_u *rx, > struct rte_mbuf *mbuf, > iova_list = (const rte_iova_t *)(iova_list + 1); > } > } > - mbuf->next = NULL; > + > + /* Adjust last mbuf data length with negative offset for security > pkts if needed */ > + if (cq_w1 & BIT(11) && flags & NIX_RX_OFFLOAD_SECURITY_F && len < 0) > + mbuf->data_len += len; > } > > static __rte_always_inline void > @@ -787,9 +803,9 @@ cn10k_nix_cqe_to_mbuf(const struct nix_cqe_hdr_s *cq, > const uint32_t tag, > * For multi segment packets, mbuf length correction according > * to Rx timestamp length will be handled later during > * timestamp data process. > - * Hence, flag argument is not required. > + * Hence, timestamp flag argument is not required. > */ > - nix_cqe_xtract_mseg(rx, mbuf, val, 0); > + nix_cqe_xtract_mseg(rx, mbuf, val, flag & > ~NIX_RX_OFFLOAD_TSTAMP_F); > } > > static inline uint16_t > diff --git a/drivers/net/cnxk/cn10k_tx.h b/drivers/net/cnxk/cn10k_tx.h > index ea13866b20..2be5ecdf5e 100644 > --- a/drivers/net/cnxk/cn10k_tx.h > +++ b/drivers/net/cnxk/cn10k_tx.h > @@ -282,7 +282,7 @@ cn10k_nix_prep_sec_vec(struct rte_mbuf *m, uint64x2_t > *cmd0, uint64x2_t *cmd1, > uint8_t l2_len, l3_len; > uintptr_t dptr, nixtx; > uint64_t ucode_cmd[4]; > - uint64_t *laddr; > + uint64_t *laddr, w0; > uint16_t tag; > uint64_t sa; > > @@ -329,30 +329,57 @@ cn10k_nix_prep_sec_vec(struct rte_mbuf *m, uint64x2_t > *cmd0, uint64x2_t *cmd1, > > /* Update send descriptors. Security is single segment only */ > *cmd0 = vsetq_lane_u16(pkt_len + dlen_adj, *cmd0, 0); > - *cmd1 = vsetq_lane_u16(pkt_len + dlen_adj, *cmd1, 0); > > - /* Get area where NIX descriptor needs to be stored */ > - nixtx = dptr + pkt_len + dlen_adj; > - nixtx += BIT_ULL(7); > - nixtx = (nixtx - 1) & ~(BIT_ULL(7) - 1); > + /* CPT word 5 and word 6 */ > + w0 = 0; > + ucode_cmd[2] = 0; > + if (flags & NIX_TX_MULTI_SEG_F && m->nb_segs > 1) { > + struct rte_mbuf *last = rte_pktmbuf_lastseg(m); > + > + /* Get area where NIX descriptor needs to be stored */ > + nixtx = rte_pktmbuf_mtod_offset(last, uintptr_t, > last->data_len + dlen_adj); > + nixtx += BIT_ULL(7); > + nixtx = (nixtx - 1) & ~(BIT_ULL(7) - 1); > + nixtx += 16; > + > + dptr = nixtx + ((flags & NIX_TX_NEED_EXT_HDR) ? 32 : 16); > + > + /* Set l2 length as data offset */ > + w0 = (uint64_t)l2_len << 16; > + w0 |= cn10k_nix_tx_ext_subs(flags) + > NIX_NB_SEGS_TO_SEGDW(m->nb_segs); > + ucode_cmd[1] = dptr | ((uint64_t)m->nb_segs << 60); > + } else { > + /* Get area where NIX descriptor needs to be stored */ > + nixtx = dptr + pkt_len + dlen_adj; > + nixtx += BIT_ULL(7); > + nixtx = (nixtx - 1) & ~(BIT_ULL(7) - 1); > + nixtx += 16; > + > + w0 |= cn10k_nix_tx_ext_subs(flags) + 1; > + dptr += l2_len; > + ucode_cmd[1] = dptr; > + *cmd1 = vsetq_lane_u16(pkt_len + dlen_adj, *cmd1, 0); > + /* DLEN passed is excluding L2 HDR */ > + pkt_len -= l2_len; > + } > + w0 |= nixtx; > + /* CPT word 0 and 1 */ > + cmd01 = vdupq_n_u64(0); > + cmd01 = vsetq_lane_u64(w0, cmd01, 0); > + /* CPT_RES_S is 16B above NIXTX */ > + cmd01 = vsetq_lane_u64(nixtx - 16, cmd01, 1); > > /* Return nixtx addr */ > - *nixtx_addr = (nixtx + 16); > + *nixtx_addr = nixtx; > > - /* DLEN passed is excluding L2HDR */ > - pkt_len -= l2_len; > + /* CPT Word 4 and Word 7 */ > tag = sa_base & 0xFFFFUL; > sa_base &= ~0xFFFFUL; > sa = (uintptr_t)roc_nix_inl_ot_ipsec_outb_sa(sa_base, > sess_priv.sa_idx); > ucode_cmd[3] = (ROC_CPT_DFLT_ENG_GRP_SE_IE << 61 | 1UL << 60 | sa); > - ucode_cmd[0] = (ROC_IE_OT_MAJOR_OP_PROCESS_OUTBOUND_IPSEC << 48 | > - ((uint64_t)sess_priv.chksum) << 32 | > - ((uint64_t)sess_priv.dec_ttl) << 34 | pkt_len); > - > - /* CPT Word 0 and Word 1 */ > - cmd01 = vdupq_n_u64((nixtx + 16) | (cn10k_nix_tx_ext_subs(flags) + > 1)); > - /* CPT_RES_S is 16B above NIXTX */ > - cmd01 = vsetq_lane_u8(nixtx & BIT_ULL(7), cmd01, 8); > + ucode_cmd[0] = (ROC_IE_OT_MAJOR_OP_PROCESS_OUTBOUND_IPSEC << 48 | 1UL > << 54 | > + ((uint64_t)sess_priv.chksum) << 32 | > ((uint64_t)sess_priv.dec_ttl) << 34 | > + pkt_len); > > /* CPT word 2 and 3 */ > cmd23 = vdupq_n_u64(0); > @@ -371,9 +398,6 @@ cn10k_nix_prep_sec_vec(struct rte_mbuf *m, uint64x2_t > *cmd0, uint64x2_t *cmd1, > rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV6); > } > > - ucode_cmd[1] = dptr; > - ucode_cmd[2] = dptr; > - > /* Move to our line */ > laddr = LMT_OFF(lbase, *lnum, *loff ? 64 : 0); > > @@ -404,7 +428,7 @@ cn10k_nix_prep_sec(struct rte_mbuf *m, uint64_t *cmd, > uintptr_t *nixtx_addr, > uint8_t l2_len, l3_len; > uintptr_t dptr, nixtx; > uint64_t ucode_cmd[4]; > - uint64_t *laddr; > + uint64_t *laddr, w0; > uint16_t tag; > uint64_t sa; > > @@ -457,30 +481,56 @@ cn10k_nix_prep_sec(struct rte_mbuf *m, uint64_t *cmd, > uintptr_t *nixtx_addr, > > /* Update send descriptors. Security is single segment only */ > send_hdr->w0.total = pkt_len + dlen_adj; > - sg->seg1_size = pkt_len + dlen_adj; > > - /* Get area where NIX descriptor needs to be stored */ > - nixtx = dptr + pkt_len + dlen_adj; > - nixtx += BIT_ULL(7); > - nixtx = (nixtx - 1) & ~(BIT_ULL(7) - 1); > + /* CPT word 5 and word 6 */ > + w0 = 0; > + ucode_cmd[2] = 0; > + if (flags & NIX_TX_MULTI_SEG_F && m->nb_segs > 1) { > + struct rte_mbuf *last = rte_pktmbuf_lastseg(m); > + > + /* Get area where NIX descriptor needs to be stored */ > + nixtx = rte_pktmbuf_mtod_offset(last, uintptr_t, > last->data_len + dlen_adj); > + nixtx += BIT_ULL(7); > + nixtx = (nixtx - 1) & ~(BIT_ULL(7) - 1); > + nixtx += 16; > + > + dptr = nixtx + ((flags & NIX_TX_NEED_EXT_HDR) ? 32 : 16); > + > + /* Set l2 length as data offset */ > + w0 = (uint64_t)l2_len << 16; > + w0 |= cn10k_nix_tx_ext_subs(flags) + > NIX_NB_SEGS_TO_SEGDW(m->nb_segs); > + ucode_cmd[1] = dptr | ((uint64_t)m->nb_segs << 60); > + } else { > + /* Get area where NIX descriptor needs to be stored */ > + nixtx = dptr + pkt_len + dlen_adj; > + nixtx += BIT_ULL(7); > + nixtx = (nixtx - 1) & ~(BIT_ULL(7) - 1); > + nixtx += 16; > + > + w0 |= cn10k_nix_tx_ext_subs(flags) + 1; > + dptr += l2_len; > + ucode_cmd[1] = dptr; > + sg->seg1_size = pkt_len + dlen_adj; > + pkt_len -= l2_len; > + } > + w0 |= nixtx; > + /* CPT word 0 and 1 */ > + cmd01 = vdupq_n_u64(0); > + cmd01 = vsetq_lane_u64(w0, cmd01, 0); > + /* CPT_RES_S is 16B above NIXTX */ > + cmd01 = vsetq_lane_u64(nixtx - 16, cmd01, 1); > > /* Return nixtx addr */ > - *nixtx_addr = (nixtx + 16); > + *nixtx_addr = nixtx; > > - /* DLEN passed is excluding L2HDR */ > - pkt_len -= l2_len; > + /* CPT Word 4 and Word 7 */ > tag = sa_base & 0xFFFFUL; > sa_base &= ~0xFFFFUL; > sa = (uintptr_t)roc_nix_inl_ot_ipsec_outb_sa(sa_base, > sess_priv.sa_idx); > ucode_cmd[3] = (ROC_CPT_DFLT_ENG_GRP_SE_IE << 61 | 1UL << 60 | sa); > - ucode_cmd[0] = (ROC_IE_OT_MAJOR_OP_PROCESS_OUTBOUND_IPSEC << 48 | > - ((uint64_t)sess_priv.chksum) << 32 | > - ((uint64_t)sess_priv.dec_ttl) << 34 | pkt_len); > - > - /* CPT Word 0 and Word 1. Assume no multi-seg support */ > - cmd01 = vdupq_n_u64((nixtx + 16) | (cn10k_nix_tx_ext_subs(flags) + > 1)); > - /* CPT_RES_S is 16B above NIXTX */ > - cmd01 = vsetq_lane_u8(nixtx & BIT_ULL(7), cmd01, 8); > + ucode_cmd[0] = (ROC_IE_OT_MAJOR_OP_PROCESS_OUTBOUND_IPSEC << 48 | 1UL > << 54 | > + ((uint64_t)sess_priv.chksum) << 32 | > ((uint64_t)sess_priv.dec_ttl) << 34 | > + pkt_len); > > /* CPT word 2 and 3 */ > cmd23 = vdupq_n_u64(0); > @@ -498,8 +548,6 @@ cn10k_nix_prep_sec(struct rte_mbuf *m, uint64_t *cmd, > uintptr_t *nixtx_addr, > *((uint16_t *)(dptr - 2)) = > rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV6); > } > - ucode_cmd[1] = dptr; > - ucode_cmd[2] = dptr; > > /* Move to our line */ > laddr = LMT_OFF(lbase, *lnum, *loff ? 64 : 0); > @@ -858,6 +906,8 @@ cn10k_nix_prepare_mseg(struct rte_mbuf *m, uint64_t *cmd, > const uint16_t flags) > union nix_send_sg_s *sg; > struct rte_mbuf *m_next; > uint64_t *slist, sg_u; > + uint64_t len, dlen; > + uint64_t ol_flags; > uint64_t nb_segs; > uint64_t segdw; > uint8_t off, i; > @@ -870,10 +920,14 @@ cn10k_nix_prepare_mseg(struct rte_mbuf *m, uint64_t > *cmd, const uint16_t flags) > off = 0; > > sg = (union nix_send_sg_s *)&cmd[2 + off]; > + len = send_hdr->w0.total; > + if (flags & NIX_TX_OFFLOAD_SECURITY_F) > + ol_flags = m->ol_flags; > > /* Start from second segment, first segment is already there */ > i = 1; > sg_u = sg->u; > + len -= sg_u & 0xFFFF; > nb_segs = m->nb_segs - 1; > m_next = m->next; > slist = &cmd[3 + off + 1]; > @@ -888,6 +942,7 @@ cn10k_nix_prepare_mseg(struct rte_mbuf *m, uint64_t *cmd, > const uint16_t flags) > RTE_MEMPOOL_CHECK_COOKIES(m->pool, (void **)&m, 1, 0); > rte_io_wmb(); > #endif > + m->next = NULL; > m = m_next; > if (!m) > goto done; > @@ -895,7 +950,9 @@ cn10k_nix_prepare_mseg(struct rte_mbuf *m, uint64_t *cmd, > const uint16_t flags) > /* Fill mbuf segments */ > do { > m_next = m->next; > - sg_u = sg_u | ((uint64_t)m->data_len << (i << 4)); > + dlen = m->data_len; > + len -= dlen; > + sg_u = sg_u | ((uint64_t)dlen << (i << 4)); > *slist = rte_mbuf_data_iova(m); > /* Set invert df if buffer is not to be freed by H/W */ > if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) > @@ -919,10 +976,20 @@ cn10k_nix_prepare_mseg(struct rte_mbuf *m, uint64_t > *cmd, const uint16_t flags) > sg_u = sg->u; > slist++; > } > + m->next = NULL; > m = m_next; > } while (nb_segs); > > done: > + /* Add remaining bytes of security data to last seg */ > + if (flags & NIX_TX_OFFLOAD_SECURITY_F && ol_flags & > RTE_MBUF_F_TX_SEC_OFFLOAD && len) { > + uint8_t shft = ((i - 1) << 4); > + > + dlen = ((sg_u >> shft) & 0xFFFFULL) + len; > + sg_u = sg_u & ~(0xFFFFULL << shft); > + sg_u |= dlen << shft; > + } > + > sg->u = sg_u; > sg->segs = i; > segdw = (uint64_t *)slist - (uint64_t *)&cmd[2 + off]; > @@ -1266,17 +1333,26 @@ cn10k_nix_prepare_mseg_vec_list(struct rte_mbuf *m, > uint64_t *cmd, > union nix_send_sg_s *sg, const uint32_t flags) > { > struct rte_mbuf *m_next; > + uint64_t ol_flags, len; > uint64_t *slist, sg_u; > uint16_t nb_segs; > + uint64_t dlen; > int i = 1; > > - sh->total = m->pkt_len; > + len = m->pkt_len; > + ol_flags = m->ol_flags; > + /* For security we would have already populated the right length */ > + if (flags & NIX_TX_OFFLOAD_SECURITY_F && ol_flags & > RTE_MBUF_F_TX_SEC_OFFLOAD) > + len = sh->total; > + sh->total = len; > /* Clear sg->u header before use */ > sg->u &= 0xFC00000000000000; > sg_u = sg->u; > slist = &cmd[0]; > > - sg_u = sg_u | ((uint64_t)m->data_len); > + dlen = m->data_len; > + len -= dlen; > + sg_u = sg_u | ((uint64_t)dlen); > > nb_segs = m->nb_segs - 1; > m_next = m->next; > @@ -1291,11 +1367,14 @@ cn10k_nix_prepare_mseg_vec_list(struct rte_mbuf *m, > uint64_t *cmd, > rte_io_wmb(); > #endif > > + m->next = NULL; > m = m_next; > /* Fill mbuf segments */ > do { > m_next = m->next; > - sg_u = sg_u | ((uint64_t)m->data_len << (i << 4)); > + dlen = m->data_len; > + len -= dlen; > + sg_u = sg_u | ((uint64_t)dlen << (i << 4)); > *slist = rte_mbuf_data_iova(m); > /* Set invert df if buffer is not to be freed by H/W */ > if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) > @@ -1320,9 +1399,18 @@ cn10k_nix_prepare_mseg_vec_list(struct rte_mbuf *m, > uint64_t *cmd, > sg_u = sg->u; > slist++; > } > + m->next = NULL; > m = m_next; > } while (nb_segs); > > + /* Add remaining bytes of security data to last seg */ > + if (flags & NIX_TX_OFFLOAD_SECURITY_F && ol_flags & > RTE_MBUF_F_TX_SEC_OFFLOAD && len) { > + uint8_t shft = ((i - 1) << 4); > + > + dlen = ((sg_u >> shft) & 0xFFFF) + len; > + sg_u = sg_u & ~(0xFFFFULL << shft); > + sg_u |= dlen << shft; > + } > sg->u = sg_u; > sg->segs = i; > } > @@ -2689,13 +2777,6 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, uint64_t > *ws, > lnum += 1; > } > > - if (flags & NIX_TX_MULTI_SEG_F) { > - tx_pkts[0]->next = NULL; > - tx_pkts[1]->next = NULL; > - tx_pkts[2]->next = NULL; > - tx_pkts[3]->next = NULL; > - } > - > tx_pkts = tx_pkts + NIX_DESCS_PER_LOOP; > } > > -- > 2.25.1 >