On Fri, Dec 20, 2019 at 6:33 PM <vattun...@marvell.com> wrote: > > From: Vamsi Attunuru <vattun...@marvell.com> > > Current vector mode implementation floor-aligns pkt count > with NIX_DESCS_PER_LOOP and process that many packets. > > Patch addresses the case where pkt count modulo NIX_DESCS_PER_LOOP > is non-zero, after the vector mode processing, scalar routine is > used to process if there are any leftover packets. Scalar routine > is also used when descriptor head is about to wrap and turn out to > be unaligned. > > Signed-off-by: Vamsi Attunuru <vattun...@marvell.com> > Signed-off-by: Nithin Dabilpuram <ndabilpu...@marvell.com>
Applied to dpdk-next-net-mrvl/master. Thanks > --- > drivers/net/octeontx2/otx2_rx.c | 18 ++++++++++++++---- > drivers/net/octeontx2/otx2_tx.c | 18 +++++++++++++----- > 2 files changed, 27 insertions(+), 9 deletions(-) > > diff --git a/drivers/net/octeontx2/otx2_rx.c b/drivers/net/octeontx2/otx2_rx.c > index 48565db..8e6452a 100644 > --- a/drivers/net/octeontx2/otx2_rx.c > +++ b/drivers/net/octeontx2/otx2_rx.c > @@ -130,16 +130,22 @@ nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf > **rx_pkts, > const uintptr_t desc = rxq->desc; > uint8x16_t f0, f1, f2, f3; > uint32_t head = rxq->head; > + uint16_t pkts_left; > > pkts = nix_rx_nb_pkts(rxq, wdata, pkts, qmask); > + pkts_left = pkts & (NIX_DESCS_PER_LOOP - 1); > + > /* Packets has to be floor-aligned to NIX_DESCS_PER_LOOP */ > pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP); > > while (packets < pkts) { > - /* Get the CQ pointers, since the ring size is multiple of > - * 4, We can avoid checking the wrap around of head > - * value after the each access unlike scalar version. > - */ > + /* Exit loop if head is about to wrap and become unaligned */ > + if (((head + NIX_DESCS_PER_LOOP - 1) & qmask) < > + NIX_DESCS_PER_LOOP) { > + pkts_left += (pkts - packets); > + break; > + } > + > const uintptr_t cq0 = desc + CQE_SZ(head); > > /* Prefetch N desc ahead */ > @@ -301,6 +307,10 @@ nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf > **rx_pkts, > /* Free all the CQs that we've processed */ > otx2_write64((rxq->wdata | packets), rxq->cq_door); > > + if (unlikely(pkts_left)) > + packets += nix_recv_pkts(rx_queue, &rx_pkts[packets], > + pkts_left, flags); > + > return packets; > } > > diff --git a/drivers/net/octeontx2/otx2_tx.c b/drivers/net/octeontx2/otx2_tx.c > index fa53300..96be92a 100644 > --- a/drivers/net/octeontx2/otx2_tx.c > +++ b/drivers/net/octeontx2/otx2_tx.c > @@ -97,7 +97,7 @@ nix_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf > **tx_pkts, > #define NIX_DESCS_PER_LOOP 4 > static __rte_always_inline uint16_t > nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, > - uint16_t pkts, const uint16_t flags) > + uint16_t pkts, uint64_t *cmd, const uint16_t flags) > { > uint64x2_t dataoff_iova0, dataoff_iova1, dataoff_iova2, dataoff_iova3; > uint64x2_t len_olflags0, len_olflags1, len_olflags2, len_olflags3; > @@ -118,11 +118,13 @@ nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf > **tx_pkts, > uint64x2_t cmd20, cmd21; > uint64x2_t cmd30, cmd31; > uint64_t lmt_status, i; > - > - pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP); > + uint16_t pkts_left; > > NIX_XMIT_FC_OR_RETURN(txq, pkts); > > + pkts_left = pkts & (NIX_DESCS_PER_LOOP - 1); > + pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP); > + > /* Reduce the cached count */ > txq->fc_cache_pkts -= pkts; > > @@ -929,17 +931,21 @@ nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf > **tx_pkts, > } while (lmt_status == 0); > } > > + if (unlikely(pkts_left)) > + pkts += nix_xmit_pkts(tx_queue, tx_pkts, pkts_left, cmd, > flags); > + > return pkts; > } > > #else > static __rte_always_inline uint16_t > nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, > - uint16_t pkts, const uint16_t flags) > + uint16_t pkts, uint64_t *cmd, const uint16_t flags) > { > RTE_SET_USED(tx_queue); > RTE_SET_USED(tx_pkts); > RTE_SET_USED(pkts); > + RTE_SET_USED(cmd); > RTE_SET_USED(flags); > return 0; > } > @@ -985,12 +991,14 @@ static uint16_t __rte_noinline __hot > \ > otx2_nix_xmit_pkts_vec_ ## name(void *tx_queue, > \ > struct rte_mbuf **tx_pkts, uint16_t pkts) \ > { \ > + uint64_t cmd[sz]; \ > + \ > /* VLAN, TSTMP, TSO is not supported by vec */ \ > if ((flags) & NIX_TX_OFFLOAD_VLAN_QINQ_F || \ > (flags) & NIX_TX_OFFLOAD_TSTAMP_F || \ > (flags) & NIX_TX_OFFLOAD_TSO_F) \ > return 0; \ > - return nix_xmit_pkts_vector(tx_queue, tx_pkts, pkts, (flags)); \ > + return nix_xmit_pkts_vector(tx_queue, tx_pkts, pkts, cmd, (flags)); \ > } > > NIX_TX_FASTPATH_MODES > -- > 2.8.4 >