From: Nithin Dabilpuram <ndabilpu...@marvell.com> Add multi-seg support for Inline IPsec. Also in reassembly, FI_PAD is not required to compute pointer to Fragment info because it is only at CPT_PARSE_HDR_S + FI_OFFSET * 8 and is always 8B aligned.
Signed-off-by: Nithin Dabilpuram <ndabilpu...@marvell.com> Signed-off-by: Rahul Bhansali <rbhans...@marvell.com> --- drivers/net/cnxk/cn10k_rx.h | 40 +++++--- drivers/net/cnxk/cn10k_tx.h | 181 ++++++++++++++++++++++++++---------- 2 files changed, 159 insertions(+), 62 deletions(-) diff --git a/drivers/net/cnxk/cn10k_rx.h b/drivers/net/cnxk/cn10k_rx.h index 5ecb20f038..8501ae9439 100644 --- a/drivers/net/cnxk/cn10k_rx.h +++ b/drivers/net/cnxk/cn10k_rx.h @@ -171,7 +171,7 @@ nix_sec_attach_frags(const struct cpt_parse_hdr_s *hdr, /* offset of 0 implies 256B, otherwise it implies offset*8B */ offset = (((offset - 1) & 0x1f) + 1) * 8; - finfo = RTE_PTR_ADD(hdr, offset + hdr->w2.fi_pad); + finfo = RTE_PTR_ADD(hdr, offset); /* Frag-0: */ wqe = (uint64_t *)(rte_be_to_cpu_64(hdr->wqe_ptr)); @@ -300,7 +300,7 @@ nix_sec_reassemble_frags(const struct cpt_parse_hdr_s *hdr, uint64_t cq_w1, /* offset of 0 implies 256B, otherwise it implies offset*8B */ offset = (((offset - 1) & 0x1f) + 1) * 8; - finfo = RTE_PTR_ADD(hdr, offset + hdr->w2.fi_pad); + finfo = RTE_PTR_ADD(hdr, offset); /* Frag-0: */ wqe = (uint64_t *)rte_be_to_cpu_64(hdr->wqe_ptr); @@ -685,20 +685,32 @@ nix_cqe_xtract_mseg(const union nix_rx_parse_u *rx, struct rte_mbuf *mbuf, struct rte_mbuf *head; const rte_iova_t *eol; uint8_t nb_segs; + uint64_t cq_w1; + int64_t len; uint64_t sg; + cq_w1 = *(const uint64_t *)rx; + /* Use inner rx parse for meta pkts sg list */ + if (cq_w1 & BIT(11) && flags & NIX_RX_OFFLOAD_SECURITY_F) { + const uint64_t *wqe = (const uint64_t *)(mbuf + 1); + rx = (const union nix_rx_parse_u *)(wqe + 1); + } + sg = *(const uint64_t *)(rx + 1); nb_segs = (sg >> 48) & 0x3; - if (nb_segs == 1 && !(flags & NIX_RX_SEC_REASSEMBLY_F)) { - mbuf->next = NULL; + if (nb_segs == 1) return; - } - mbuf->pkt_len = (rx->pkt_lenm1 + 1) - (flags & NIX_RX_OFFLOAD_TSTAMP_F ? - CNXK_NIX_TIMESYNC_RX_OFFSET : 0); - mbuf->data_len = (sg & 0xFFFF) - (flags & NIX_RX_OFFLOAD_TSTAMP_F ? - CNXK_NIX_TIMESYNC_RX_OFFSET : 0); + /* For security we have already updated right pkt_len */ + if (cq_w1 & BIT(11) && flags & NIX_RX_OFFLOAD_SECURITY_F) + len = mbuf->pkt_len; + else + len = rx->pkt_lenm1 + 1; + mbuf->pkt_len = len - (flags & NIX_RX_OFFLOAD_TSTAMP_F ? CNXK_NIX_TIMESYNC_RX_OFFSET : 0); + mbuf->data_len = + (sg & 0xFFFF) - (flags & NIX_RX_OFFLOAD_TSTAMP_F ? CNXK_NIX_TIMESYNC_RX_OFFSET : 0); + len -= mbuf->data_len; mbuf->nb_segs = nb_segs; sg = sg >> 16; @@ -717,6 +729,7 @@ nix_cqe_xtract_mseg(const union nix_rx_parse_u *rx, struct rte_mbuf *mbuf, RTE_MEMPOOL_CHECK_COOKIES(mbuf->pool, (void **)&mbuf, 1, 1); mbuf->data_len = sg & 0xFFFF; + len -= sg & 0XFFFF; sg = sg >> 16; *(uint64_t *)(&mbuf->rearm_data) = rearm; nb_segs--; @@ -729,7 +742,10 @@ nix_cqe_xtract_mseg(const union nix_rx_parse_u *rx, struct rte_mbuf *mbuf, iova_list = (const rte_iova_t *)(iova_list + 1); } } - mbuf->next = NULL; + + /* Adjust last mbuf data length with negative offset for security pkts if needed */ + if (cq_w1 & BIT(11) && flags & NIX_RX_OFFLOAD_SECURITY_F && len < 0) + mbuf->data_len += len; } static __rte_always_inline void @@ -787,9 +803,9 @@ cn10k_nix_cqe_to_mbuf(const struct nix_cqe_hdr_s *cq, const uint32_t tag, * For multi segment packets, mbuf length correction according * to Rx timestamp length will be handled later during * timestamp data process. - * Hence, flag argument is not required. + * Hence, timestamp flag argument is not required. */ - nix_cqe_xtract_mseg(rx, mbuf, val, 0); + nix_cqe_xtract_mseg(rx, mbuf, val, flag & ~NIX_RX_OFFLOAD_TSTAMP_F); } static inline uint16_t diff --git a/drivers/net/cnxk/cn10k_tx.h b/drivers/net/cnxk/cn10k_tx.h index ea13866b20..2be5ecdf5e 100644 --- a/drivers/net/cnxk/cn10k_tx.h +++ b/drivers/net/cnxk/cn10k_tx.h @@ -282,7 +282,7 @@ cn10k_nix_prep_sec_vec(struct rte_mbuf *m, uint64x2_t *cmd0, uint64x2_t *cmd1, uint8_t l2_len, l3_len; uintptr_t dptr, nixtx; uint64_t ucode_cmd[4]; - uint64_t *laddr; + uint64_t *laddr, w0; uint16_t tag; uint64_t sa; @@ -329,30 +329,57 @@ cn10k_nix_prep_sec_vec(struct rte_mbuf *m, uint64x2_t *cmd0, uint64x2_t *cmd1, /* Update send descriptors. Security is single segment only */ *cmd0 = vsetq_lane_u16(pkt_len + dlen_adj, *cmd0, 0); - *cmd1 = vsetq_lane_u16(pkt_len + dlen_adj, *cmd1, 0); - /* Get area where NIX descriptor needs to be stored */ - nixtx = dptr + pkt_len + dlen_adj; - nixtx += BIT_ULL(7); - nixtx = (nixtx - 1) & ~(BIT_ULL(7) - 1); + /* CPT word 5 and word 6 */ + w0 = 0; + ucode_cmd[2] = 0; + if (flags & NIX_TX_MULTI_SEG_F && m->nb_segs > 1) { + struct rte_mbuf *last = rte_pktmbuf_lastseg(m); + + /* Get area where NIX descriptor needs to be stored */ + nixtx = rte_pktmbuf_mtod_offset(last, uintptr_t, last->data_len + dlen_adj); + nixtx += BIT_ULL(7); + nixtx = (nixtx - 1) & ~(BIT_ULL(7) - 1); + nixtx += 16; + + dptr = nixtx + ((flags & NIX_TX_NEED_EXT_HDR) ? 32 : 16); + + /* Set l2 length as data offset */ + w0 = (uint64_t)l2_len << 16; + w0 |= cn10k_nix_tx_ext_subs(flags) + NIX_NB_SEGS_TO_SEGDW(m->nb_segs); + ucode_cmd[1] = dptr | ((uint64_t)m->nb_segs << 60); + } else { + /* Get area where NIX descriptor needs to be stored */ + nixtx = dptr + pkt_len + dlen_adj; + nixtx += BIT_ULL(7); + nixtx = (nixtx - 1) & ~(BIT_ULL(7) - 1); + nixtx += 16; + + w0 |= cn10k_nix_tx_ext_subs(flags) + 1; + dptr += l2_len; + ucode_cmd[1] = dptr; + *cmd1 = vsetq_lane_u16(pkt_len + dlen_adj, *cmd1, 0); + /* DLEN passed is excluding L2 HDR */ + pkt_len -= l2_len; + } + w0 |= nixtx; + /* CPT word 0 and 1 */ + cmd01 = vdupq_n_u64(0); + cmd01 = vsetq_lane_u64(w0, cmd01, 0); + /* CPT_RES_S is 16B above NIXTX */ + cmd01 = vsetq_lane_u64(nixtx - 16, cmd01, 1); /* Return nixtx addr */ - *nixtx_addr = (nixtx + 16); + *nixtx_addr = nixtx; - /* DLEN passed is excluding L2HDR */ - pkt_len -= l2_len; + /* CPT Word 4 and Word 7 */ tag = sa_base & 0xFFFFUL; sa_base &= ~0xFFFFUL; sa = (uintptr_t)roc_nix_inl_ot_ipsec_outb_sa(sa_base, sess_priv.sa_idx); ucode_cmd[3] = (ROC_CPT_DFLT_ENG_GRP_SE_IE << 61 | 1UL << 60 | sa); - ucode_cmd[0] = (ROC_IE_OT_MAJOR_OP_PROCESS_OUTBOUND_IPSEC << 48 | - ((uint64_t)sess_priv.chksum) << 32 | - ((uint64_t)sess_priv.dec_ttl) << 34 | pkt_len); - - /* CPT Word 0 and Word 1 */ - cmd01 = vdupq_n_u64((nixtx + 16) | (cn10k_nix_tx_ext_subs(flags) + 1)); - /* CPT_RES_S is 16B above NIXTX */ - cmd01 = vsetq_lane_u8(nixtx & BIT_ULL(7), cmd01, 8); + ucode_cmd[0] = (ROC_IE_OT_MAJOR_OP_PROCESS_OUTBOUND_IPSEC << 48 | 1UL << 54 | + ((uint64_t)sess_priv.chksum) << 32 | ((uint64_t)sess_priv.dec_ttl) << 34 | + pkt_len); /* CPT word 2 and 3 */ cmd23 = vdupq_n_u64(0); @@ -371,9 +398,6 @@ cn10k_nix_prep_sec_vec(struct rte_mbuf *m, uint64x2_t *cmd0, uint64x2_t *cmd1, rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV6); } - ucode_cmd[1] = dptr; - ucode_cmd[2] = dptr; - /* Move to our line */ laddr = LMT_OFF(lbase, *lnum, *loff ? 64 : 0); @@ -404,7 +428,7 @@ cn10k_nix_prep_sec(struct rte_mbuf *m, uint64_t *cmd, uintptr_t *nixtx_addr, uint8_t l2_len, l3_len; uintptr_t dptr, nixtx; uint64_t ucode_cmd[4]; - uint64_t *laddr; + uint64_t *laddr, w0; uint16_t tag; uint64_t sa; @@ -457,30 +481,56 @@ cn10k_nix_prep_sec(struct rte_mbuf *m, uint64_t *cmd, uintptr_t *nixtx_addr, /* Update send descriptors. Security is single segment only */ send_hdr->w0.total = pkt_len + dlen_adj; - sg->seg1_size = pkt_len + dlen_adj; - /* Get area where NIX descriptor needs to be stored */ - nixtx = dptr + pkt_len + dlen_adj; - nixtx += BIT_ULL(7); - nixtx = (nixtx - 1) & ~(BIT_ULL(7) - 1); + /* CPT word 5 and word 6 */ + w0 = 0; + ucode_cmd[2] = 0; + if (flags & NIX_TX_MULTI_SEG_F && m->nb_segs > 1) { + struct rte_mbuf *last = rte_pktmbuf_lastseg(m); + + /* Get area where NIX descriptor needs to be stored */ + nixtx = rte_pktmbuf_mtod_offset(last, uintptr_t, last->data_len + dlen_adj); + nixtx += BIT_ULL(7); + nixtx = (nixtx - 1) & ~(BIT_ULL(7) - 1); + nixtx += 16; + + dptr = nixtx + ((flags & NIX_TX_NEED_EXT_HDR) ? 32 : 16); + + /* Set l2 length as data offset */ + w0 = (uint64_t)l2_len << 16; + w0 |= cn10k_nix_tx_ext_subs(flags) + NIX_NB_SEGS_TO_SEGDW(m->nb_segs); + ucode_cmd[1] = dptr | ((uint64_t)m->nb_segs << 60); + } else { + /* Get area where NIX descriptor needs to be stored */ + nixtx = dptr + pkt_len + dlen_adj; + nixtx += BIT_ULL(7); + nixtx = (nixtx - 1) & ~(BIT_ULL(7) - 1); + nixtx += 16; + + w0 |= cn10k_nix_tx_ext_subs(flags) + 1; + dptr += l2_len; + ucode_cmd[1] = dptr; + sg->seg1_size = pkt_len + dlen_adj; + pkt_len -= l2_len; + } + w0 |= nixtx; + /* CPT word 0 and 1 */ + cmd01 = vdupq_n_u64(0); + cmd01 = vsetq_lane_u64(w0, cmd01, 0); + /* CPT_RES_S is 16B above NIXTX */ + cmd01 = vsetq_lane_u64(nixtx - 16, cmd01, 1); /* Return nixtx addr */ - *nixtx_addr = (nixtx + 16); + *nixtx_addr = nixtx; - /* DLEN passed is excluding L2HDR */ - pkt_len -= l2_len; + /* CPT Word 4 and Word 7 */ tag = sa_base & 0xFFFFUL; sa_base &= ~0xFFFFUL; sa = (uintptr_t)roc_nix_inl_ot_ipsec_outb_sa(sa_base, sess_priv.sa_idx); ucode_cmd[3] = (ROC_CPT_DFLT_ENG_GRP_SE_IE << 61 | 1UL << 60 | sa); - ucode_cmd[0] = (ROC_IE_OT_MAJOR_OP_PROCESS_OUTBOUND_IPSEC << 48 | - ((uint64_t)sess_priv.chksum) << 32 | - ((uint64_t)sess_priv.dec_ttl) << 34 | pkt_len); - - /* CPT Word 0 and Word 1. Assume no multi-seg support */ - cmd01 = vdupq_n_u64((nixtx + 16) | (cn10k_nix_tx_ext_subs(flags) + 1)); - /* CPT_RES_S is 16B above NIXTX */ - cmd01 = vsetq_lane_u8(nixtx & BIT_ULL(7), cmd01, 8); + ucode_cmd[0] = (ROC_IE_OT_MAJOR_OP_PROCESS_OUTBOUND_IPSEC << 48 | 1UL << 54 | + ((uint64_t)sess_priv.chksum) << 32 | ((uint64_t)sess_priv.dec_ttl) << 34 | + pkt_len); /* CPT word 2 and 3 */ cmd23 = vdupq_n_u64(0); @@ -498,8 +548,6 @@ cn10k_nix_prep_sec(struct rte_mbuf *m, uint64_t *cmd, uintptr_t *nixtx_addr, *((uint16_t *)(dptr - 2)) = rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV6); } - ucode_cmd[1] = dptr; - ucode_cmd[2] = dptr; /* Move to our line */ laddr = LMT_OFF(lbase, *lnum, *loff ? 64 : 0); @@ -858,6 +906,8 @@ cn10k_nix_prepare_mseg(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags) union nix_send_sg_s *sg; struct rte_mbuf *m_next; uint64_t *slist, sg_u; + uint64_t len, dlen; + uint64_t ol_flags; uint64_t nb_segs; uint64_t segdw; uint8_t off, i; @@ -870,10 +920,14 @@ cn10k_nix_prepare_mseg(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags) off = 0; sg = (union nix_send_sg_s *)&cmd[2 + off]; + len = send_hdr->w0.total; + if (flags & NIX_TX_OFFLOAD_SECURITY_F) + ol_flags = m->ol_flags; /* Start from second segment, first segment is already there */ i = 1; sg_u = sg->u; + len -= sg_u & 0xFFFF; nb_segs = m->nb_segs - 1; m_next = m->next; slist = &cmd[3 + off + 1]; @@ -888,6 +942,7 @@ cn10k_nix_prepare_mseg(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags) RTE_MEMPOOL_CHECK_COOKIES(m->pool, (void **)&m, 1, 0); rte_io_wmb(); #endif + m->next = NULL; m = m_next; if (!m) goto done; @@ -895,7 +950,9 @@ cn10k_nix_prepare_mseg(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags) /* Fill mbuf segments */ do { m_next = m->next; - sg_u = sg_u | ((uint64_t)m->data_len << (i << 4)); + dlen = m->data_len; + len -= dlen; + sg_u = sg_u | ((uint64_t)dlen << (i << 4)); *slist = rte_mbuf_data_iova(m); /* Set invert df if buffer is not to be freed by H/W */ if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) @@ -919,10 +976,20 @@ cn10k_nix_prepare_mseg(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags) sg_u = sg->u; slist++; } + m->next = NULL; m = m_next; } while (nb_segs); done: + /* Add remaining bytes of security data to last seg */ + if (flags & NIX_TX_OFFLOAD_SECURITY_F && ol_flags & RTE_MBUF_F_TX_SEC_OFFLOAD && len) { + uint8_t shft = ((i - 1) << 4); + + dlen = ((sg_u >> shft) & 0xFFFFULL) + len; + sg_u = sg_u & ~(0xFFFFULL << shft); + sg_u |= dlen << shft; + } + sg->u = sg_u; sg->segs = i; segdw = (uint64_t *)slist - (uint64_t *)&cmd[2 + off]; @@ -1266,17 +1333,26 @@ cn10k_nix_prepare_mseg_vec_list(struct rte_mbuf *m, uint64_t *cmd, union nix_send_sg_s *sg, const uint32_t flags) { struct rte_mbuf *m_next; + uint64_t ol_flags, len; uint64_t *slist, sg_u; uint16_t nb_segs; + uint64_t dlen; int i = 1; - sh->total = m->pkt_len; + len = m->pkt_len; + ol_flags = m->ol_flags; + /* For security we would have already populated the right length */ + if (flags & NIX_TX_OFFLOAD_SECURITY_F && ol_flags & RTE_MBUF_F_TX_SEC_OFFLOAD) + len = sh->total; + sh->total = len; /* Clear sg->u header before use */ sg->u &= 0xFC00000000000000; sg_u = sg->u; slist = &cmd[0]; - sg_u = sg_u | ((uint64_t)m->data_len); + dlen = m->data_len; + len -= dlen; + sg_u = sg_u | ((uint64_t)dlen); nb_segs = m->nb_segs - 1; m_next = m->next; @@ -1291,11 +1367,14 @@ cn10k_nix_prepare_mseg_vec_list(struct rte_mbuf *m, uint64_t *cmd, rte_io_wmb(); #endif + m->next = NULL; m = m_next; /* Fill mbuf segments */ do { m_next = m->next; - sg_u = sg_u | ((uint64_t)m->data_len << (i << 4)); + dlen = m->data_len; + len -= dlen; + sg_u = sg_u | ((uint64_t)dlen << (i << 4)); *slist = rte_mbuf_data_iova(m); /* Set invert df if buffer is not to be freed by H/W */ if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) @@ -1320,9 +1399,18 @@ cn10k_nix_prepare_mseg_vec_list(struct rte_mbuf *m, uint64_t *cmd, sg_u = sg->u; slist++; } + m->next = NULL; m = m_next; } while (nb_segs); + /* Add remaining bytes of security data to last seg */ + if (flags & NIX_TX_OFFLOAD_SECURITY_F && ol_flags & RTE_MBUF_F_TX_SEC_OFFLOAD && len) { + uint8_t shft = ((i - 1) << 4); + + dlen = ((sg_u >> shft) & 0xFFFF) + len; + sg_u = sg_u & ~(0xFFFFULL << shft); + sg_u |= dlen << shft; + } sg->u = sg_u; sg->segs = i; } @@ -2689,13 +2777,6 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, uint64_t *ws, lnum += 1; } - if (flags & NIX_TX_MULTI_SEG_F) { - tx_pkts[0]->next = NULL; - tx_pkts[1]->next = NULL; - tx_pkts[2]->next = NULL; - tx_pkts[3]->next = NULL; - } - tx_pkts = tx_pkts + NIX_DESCS_PER_LOOP; } -- 2.25.1