On Mon, Jun 21, 2021 at 1:59 AM <pbhagavat...@marvell.com> wrote: > > From: Pavan Nikhilesh <pbhagavat...@marvell.com> > > Add multi-segment Rx vector routine, form the primary mbufs using > vector path switch to scalar path when extracting segments. > > Signed-off-by: Pavan Nikhilesh <pbhagavat...@marvell.com> > --- > Depends-on: http://patches.dpdk.org/project/dpdk/list/?series=17394
Now that net/cnxk related changes merged to dpdk-next-net-mrvl/for-next-net, Could you rebase and send separate series based on dpdk-next-net-mrvl/for-next-net for net/cnxk related changes. > > v3 Changes: > - Spell check. > > drivers/net/cnxk/cn10k_rx.c | 31 +++++++++++------ > drivers/net/cnxk/cn10k_rx.h | 51 +++++++++++++++++++++------- > drivers/net/cnxk/cn10k_rx_vec_mseg.c | 17 ++++++++++ > drivers/net/cnxk/cn9k_rx.c | 31 +++++++++++------ > drivers/net/cnxk/cn9k_rx.h | 51 +++++++++++++++++++++------- > drivers/net/cnxk/cn9k_rx_vec_mseg.c | 18 ++++++++++ > drivers/net/cnxk/meson.build | 2 ++ > 7 files changed, 157 insertions(+), 44 deletions(-) > create mode 100644 drivers/net/cnxk/cn10k_rx_vec_mseg.c > create mode 100644 drivers/net/cnxk/cn9k_rx_vec_mseg.c > > diff --git a/drivers/net/cnxk/cn10k_rx.c b/drivers/net/cnxk/cn10k_rx.c > index 5c956c06b4..3a9fd71309 100644 > --- a/drivers/net/cnxk/cn10k_rx.c > +++ b/drivers/net/cnxk/cn10k_rx.c > @@ -29,6 +29,8 @@ pick_rx_func(struct rte_eth_dev *eth_dev, > [!!(dev->rx_offload_flags & NIX_RX_OFFLOAD_CHECKSUM_F)] > [!!(dev->rx_offload_flags & NIX_RX_OFFLOAD_PTYPE_F)] > [!!(dev->rx_offload_flags & NIX_RX_OFFLOAD_RSS_F)]; > + > + rte_atomic_thread_fence(__ATOMIC_RELEASE); > } > > void > @@ -60,20 +62,29 @@ cn10k_eth_set_rx_function(struct rte_eth_dev *eth_dev) > #undef R > }; > > - /* For PTP enabled, scalar rx function should be chosen as most of the > - * PTP apps are implemented to rx burst 1 pkt. > - */ > - if (dev->scalar_ena || dev->rx_offloads & DEV_RX_OFFLOAD_TIMESTAMP) > - pick_rx_func(eth_dev, nix_eth_rx_burst); > - else > - pick_rx_func(eth_dev, nix_eth_rx_vec_burst); > + const eth_rx_burst_t nix_eth_rx_vec_burst_mseg[2][2][2][2][2][2] = { > +#define R(name, f5, f4, f3, f2, f1, f0, flags) > \ > + [f5][f4][f3][f2][f1][f0] = cn10k_nix_recv_pkts_vec_mseg_##name, > > - if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER) > - pick_rx_func(eth_dev, nix_eth_rx_burst_mseg); > + NIX_RX_FASTPATH_MODES > +#undef R > + }; > > /* Copy multi seg version with no offload for tear down sequence */ > if (rte_eal_process_type() == RTE_PROC_PRIMARY) > dev->rx_pkt_burst_no_offload = > nix_eth_rx_burst_mseg[0][0][0][0][0][0]; > - rte_mb(); > + > + /* For PTP enabled, scalar rx function should be chosen as most of the > + * PTP apps are implemented to rx burst 1 pkt. > + */ > + if (dev->scalar_ena || dev->rx_offloads & DEV_RX_OFFLOAD_TIMESTAMP) { > + if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER) > + return pick_rx_func(eth_dev, nix_eth_rx_burst_mseg); > + return pick_rx_func(eth_dev, nix_eth_rx_burst); > + } > + > + if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER) > + return pick_rx_func(eth_dev, nix_eth_rx_vec_burst_mseg); > + return pick_rx_func(eth_dev, nix_eth_rx_vec_burst); > } > diff --git a/drivers/net/cnxk/cn10k_rx.h b/drivers/net/cnxk/cn10k_rx.h > index 1cc37cbaa0..5926ff7f46 100644 > --- a/drivers/net/cnxk/cn10k_rx.h > +++ b/drivers/net/cnxk/cn10k_rx.h > @@ -119,8 +119,15 @@ nix_cqe_xtract_mseg(const union nix_rx_parse_u *rx, > struct rte_mbuf *mbuf, > > sg = *(const uint64_t *)(rx + 1); > nb_segs = (sg >> 48) & 0x3; > - mbuf->nb_segs = nb_segs; > + > + if (nb_segs == 1) { > + mbuf->next = NULL; > + return; > + } > + > + mbuf->pkt_len = rx->pkt_lenm1 + 1; > mbuf->data_len = sg & 0xFFFF; > + mbuf->nb_segs = nb_segs; > sg = sg >> 16; > > eol = ((const rte_iova_t *)(rx + 1) + ((rx->desc_sizem1 + 1) << 1)); > @@ -195,15 +202,14 @@ cn10k_nix_cqe_to_mbuf(const struct nix_cqe_hdr_s *cq, > const uint32_t tag, > ol_flags = nix_update_match_id(rx->match_id, ol_flags, mbuf); > > mbuf->ol_flags = ol_flags; > - *(uint64_t *)(&mbuf->rearm_data) = val; > mbuf->pkt_len = len; > + mbuf->data_len = len; > + *(uint64_t *)(&mbuf->rearm_data) = val; > > - if (flag & NIX_RX_MULTI_SEG_F) { > + if (flag & NIX_RX_MULTI_SEG_F) > nix_cqe_xtract_mseg(rx, mbuf, val); > - } else { > - mbuf->data_len = len; > + else > mbuf->next = NULL; > - } > } > > static inline uint16_t > @@ -481,16 +487,34 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct > rte_mbuf **rx_pkts, > vst1q_u64((uint64_t *)mbuf2->rearm_data, rearm2); > vst1q_u64((uint64_t *)mbuf3->rearm_data, rearm3); > > - /* Update that no more segments */ > - mbuf0->next = NULL; > - mbuf1->next = NULL; > - mbuf2->next = NULL; > - mbuf3->next = NULL; > - > /* Store the mbufs to rx_pkts */ > vst1q_u64((uint64_t *)&rx_pkts[packets], mbuf01); > vst1q_u64((uint64_t *)&rx_pkts[packets + 2], mbuf23); > > + if (flags & NIX_RX_MULTI_SEG_F) { > + /* Multi segment is enable build mseg list for > + * individual mbufs in scalar mode. > + */ > + nix_cqe_xtract_mseg((union nix_rx_parse_u *) > + (cq0 + CQE_SZ(0) + 8), mbuf0, > + mbuf_initializer); > + nix_cqe_xtract_mseg((union nix_rx_parse_u *) > + (cq0 + CQE_SZ(1) + 8), mbuf1, > + mbuf_initializer); > + nix_cqe_xtract_mseg((union nix_rx_parse_u *) > + (cq0 + CQE_SZ(2) + 8), mbuf2, > + mbuf_initializer); > + nix_cqe_xtract_mseg((union nix_rx_parse_u *) > + (cq0 + CQE_SZ(3) + 8), mbuf3, > + mbuf_initializer); > + } else { > + /* Update that no more segments */ > + mbuf0->next = NULL; > + mbuf1->next = NULL; > + mbuf2->next = NULL; > + mbuf3->next = NULL; > + } > + > /* Prefetch mbufs */ > roc_prefetch_store_keep(mbuf0); > roc_prefetch_store_keep(mbuf1); > @@ -645,6 +669,9 @@ R(vlan_ts_mark_cksum_ptype_rss, 1, 1, 1, 1, 1, 1, > \ > void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts); > \ > > \ > uint16_t __rte_noinline __rte_hot cn10k_nix_recv_pkts_vec_##name( > \ > + void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts); > \ > + > \ > + uint16_t __rte_noinline __rte_hot > cn10k_nix_recv_pkts_vec_mseg_##name( \ > void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts); > > NIX_RX_FASTPATH_MODES > diff --git a/drivers/net/cnxk/cn10k_rx_vec_mseg.c > b/drivers/net/cnxk/cn10k_rx_vec_mseg.c > new file mode 100644 > index 0000000000..04d1e46c82 > --- /dev/null > +++ b/drivers/net/cnxk/cn10k_rx_vec_mseg.c > @@ -0,0 +1,17 @@ > +/* SPDX-License-Identifier: BSD-3-Clause > + * Copyright(C) 2021 Marvell. > + */ > + > +#include "cn10k_ethdev.h" > +#include "cn10k_rx.h" > + > +#define R(name, f5, f4, f3, f2, f1, f0, flags) > \ > + uint16_t __rte_noinline __rte_hot > cn10k_nix_recv_pkts_vec_mseg_##name( \ > + void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts) > \ > + { > \ > + return cn10k_nix_recv_pkts_vector(rx_queue, rx_pkts, pkts, > \ > + (flags) | NIX_RX_MULTI_SEG_F); > \ > + } > + > +NIX_RX_FASTPATH_MODES > +#undef R > diff --git a/drivers/net/cnxk/cn9k_rx.c b/drivers/net/cnxk/cn9k_rx.c > index 0acedd0a1f..d293d4eac3 100644 > --- a/drivers/net/cnxk/cn9k_rx.c > +++ b/drivers/net/cnxk/cn9k_rx.c > @@ -29,6 +29,8 @@ pick_rx_func(struct rte_eth_dev *eth_dev, > [!!(dev->rx_offload_flags & NIX_RX_OFFLOAD_CHECKSUM_F)] > [!!(dev->rx_offload_flags & NIX_RX_OFFLOAD_PTYPE_F)] > [!!(dev->rx_offload_flags & NIX_RX_OFFLOAD_RSS_F)]; > + > + rte_atomic_thread_fence(__ATOMIC_RELEASE); > } > > void > @@ -60,20 +62,29 @@ cn9k_eth_set_rx_function(struct rte_eth_dev *eth_dev) > #undef R > }; > > - /* For PTP enabled, scalar rx function should be chosen as most of the > - * PTP apps are implemented to rx burst 1 pkt. > - */ > - if (dev->scalar_ena || dev->rx_offloads & DEV_RX_OFFLOAD_TIMESTAMP) > - pick_rx_func(eth_dev, nix_eth_rx_burst); > - else > - pick_rx_func(eth_dev, nix_eth_rx_vec_burst); > + const eth_rx_burst_t nix_eth_rx_vec_burst_mseg[2][2][2][2][2][2] = { > +#define R(name, f5, f4, f3, f2, f1, f0, flags) > \ > + [f5][f4][f3][f2][f1][f0] = cn9k_nix_recv_pkts_vec_mseg_##name, > > - if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER) > - pick_rx_func(eth_dev, nix_eth_rx_burst_mseg); > + NIX_RX_FASTPATH_MODES > +#undef R > + }; > > /* Copy multi seg version with no offload for tear down sequence */ > if (rte_eal_process_type() == RTE_PROC_PRIMARY) > dev->rx_pkt_burst_no_offload = > nix_eth_rx_burst_mseg[0][0][0][0][0][0]; > - rte_mb(); > + > + /* For PTP enabled, scalar rx function should be chosen as most of the > + * PTP apps are implemented to rx burst 1 pkt. > + */ > + if (dev->scalar_ena || dev->rx_offloads & DEV_RX_OFFLOAD_TIMESTAMP) { > + if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER) > + return pick_rx_func(eth_dev, nix_eth_rx_burst_mseg); > + return pick_rx_func(eth_dev, nix_eth_rx_burst); > + } > + > + if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER) > + return pick_rx_func(eth_dev, nix_eth_rx_vec_burst_mseg); > + return pick_rx_func(eth_dev, nix_eth_rx_vec_burst); > } > diff --git a/drivers/net/cnxk/cn9k_rx.h b/drivers/net/cnxk/cn9k_rx.h > index 10ef5c6905..5ae9e8195c 100644 > --- a/drivers/net/cnxk/cn9k_rx.h > +++ b/drivers/net/cnxk/cn9k_rx.h > @@ -120,8 +120,15 @@ nix_cqe_xtract_mseg(const union nix_rx_parse_u *rx, > struct rte_mbuf *mbuf, > > sg = *(const uint64_t *)(rx + 1); > nb_segs = (sg >> 48) & 0x3; > - mbuf->nb_segs = nb_segs; > + > + if (nb_segs == 1) { > + mbuf->next = NULL; > + return; > + } > + > + mbuf->pkt_len = rx->pkt_lenm1 + 1; > mbuf->data_len = sg & 0xFFFF; > + mbuf->nb_segs = nb_segs; > sg = sg >> 16; > > eol = ((const rte_iova_t *)(rx + 1) + > @@ -198,15 +205,14 @@ cn9k_nix_cqe_to_mbuf(const struct nix_cqe_hdr_s *cq, > const uint32_t tag, > nix_update_match_id(rx->cn9k.match_id, ol_flags, > mbuf); > > mbuf->ol_flags = ol_flags; > - *(uint64_t *)(&mbuf->rearm_data) = val; > mbuf->pkt_len = len; > + mbuf->data_len = len; > + *(uint64_t *)(&mbuf->rearm_data) = val; > > - if (flag & NIX_RX_MULTI_SEG_F) { > + if (flag & NIX_RX_MULTI_SEG_F) > nix_cqe_xtract_mseg(rx, mbuf, val); > - } else { > - mbuf->data_len = len; > + else > mbuf->next = NULL; > - } > } > > static inline uint16_t > @@ -484,16 +490,34 @@ cn9k_nix_recv_pkts_vector(void *rx_queue, struct > rte_mbuf **rx_pkts, > vst1q_u64((uint64_t *)mbuf2->rearm_data, rearm2); > vst1q_u64((uint64_t *)mbuf3->rearm_data, rearm3); > > - /* Update that no more segments */ > - mbuf0->next = NULL; > - mbuf1->next = NULL; > - mbuf2->next = NULL; > - mbuf3->next = NULL; > - > /* Store the mbufs to rx_pkts */ > vst1q_u64((uint64_t *)&rx_pkts[packets], mbuf01); > vst1q_u64((uint64_t *)&rx_pkts[packets + 2], mbuf23); > > + if (flags & NIX_RX_MULTI_SEG_F) { > + /* Multi segment is enable build mseg list for > + * individual mbufs in scalar mode. > + */ > + nix_cqe_xtract_mseg((union nix_rx_parse_u *) > + (cq0 + CQE_SZ(0) + 8), mbuf0, > + mbuf_initializer); > + nix_cqe_xtract_mseg((union nix_rx_parse_u *) > + (cq0 + CQE_SZ(1) + 8), mbuf1, > + mbuf_initializer); > + nix_cqe_xtract_mseg((union nix_rx_parse_u *) > + (cq0 + CQE_SZ(2) + 8), mbuf2, > + mbuf_initializer); > + nix_cqe_xtract_mseg((union nix_rx_parse_u *) > + (cq0 + CQE_SZ(3) + 8), mbuf3, > + mbuf_initializer); > + } else { > + /* Update that no more segments */ > + mbuf0->next = NULL; > + mbuf1->next = NULL; > + mbuf2->next = NULL; > + mbuf3->next = NULL; > + } > + > /* Prefetch mbufs */ > roc_prefetch_store_keep(mbuf0); > roc_prefetch_store_keep(mbuf1); > @@ -647,6 +671,9 @@ R(vlan_ts_mark_cksum_ptype_rss, 1, 1, 1, 1, 1, 1, > \ > void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts); > \ > > \ > uint16_t __rte_noinline __rte_hot cn9k_nix_recv_pkts_vec_##name( > \ > + void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts); > \ > + > \ > + uint16_t __rte_noinline __rte_hot cn9k_nix_recv_pkts_vec_mseg_##name( > \ > void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts); > > NIX_RX_FASTPATH_MODES > diff --git a/drivers/net/cnxk/cn9k_rx_vec_mseg.c > b/drivers/net/cnxk/cn9k_rx_vec_mseg.c > new file mode 100644 > index 0000000000..e46d8a4749 > --- /dev/null > +++ b/drivers/net/cnxk/cn9k_rx_vec_mseg.c > @@ -0,0 +1,18 @@ > +/* SPDX-License-Identifier: BSD-3-Clause > + * Copyright(C) 2021 Marvell. > + */ > + > +#include "cn9k_ethdev.h" > +#include "cn9k_rx.h" > + > +#define R(name, f5, f4, f3, f2, f1, f0, flags) > \ > + uint16_t __rte_noinline __rte_hot cn9k_nix_recv_pkts_vec_mseg_##name( > \ > + void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts) > \ > + { > \ > + return cn9k_nix_recv_pkts_vector(rx_queue, rx_pkts, pkts, > \ > + (flags) | > \ > + NIX_RX_MULTI_SEG_F); > \ > + } > + > +NIX_RX_FASTPATH_MODES > +#undef R > diff --git a/drivers/net/cnxk/meson.build b/drivers/net/cnxk/meson.build > index 2071d0dcb2..aa8c7253fb 100644 > --- a/drivers/net/cnxk/meson.build > +++ b/drivers/net/cnxk/meson.build > @@ -23,6 +23,7 @@ sources += files('cn9k_ethdev.c', > 'cn9k_rx.c', > 'cn9k_rx_mseg.c', > 'cn9k_rx_vec.c', > + 'cn9k_rx_vec_mseg.c', > 'cn9k_tx.c', > 'cn9k_tx_mseg.c', > 'cn9k_tx_vec.c') > @@ -32,6 +33,7 @@ sources += files('cn10k_ethdev.c', > 'cn10k_rx.c', > 'cn10k_rx_mseg.c', > 'cn10k_rx_vec.c', > + 'cn10k_rx_vec_mseg.c', > 'cn10k_tx.c', > 'cn10k_tx_mseg.c', > 'cn10k_tx_vec.c') > -- > 2.17.1 >