On 2/1/2024 3:00 AM, Jiawen Wu wrote: > To optimize Rx/Tx burst process, add SSE/NEON vector instructions on > x86/arm architecture. >
Do you have any performance improvement number with vector implementation, if so can you put it into commit log for record? > Signed-off-by: Jiawen Wu <jiawe...@trustnetic.com> > --- > drivers/net/txgbe/meson.build | 6 + > drivers/net/txgbe/txgbe_ethdev.c | 6 + > drivers/net/txgbe/txgbe_ethdev.h | 1 + > drivers/net/txgbe/txgbe_ethdev_vf.c | 1 + > drivers/net/txgbe/txgbe_rxtx.c | 150 ++++- > drivers/net/txgbe/txgbe_rxtx.h | 18 + > drivers/net/txgbe/txgbe_rxtx_vec_common.h | 301 +++++++++ > drivers/net/txgbe/txgbe_rxtx_vec_neon.c | 604 ++++++++++++++++++ > drivers/net/txgbe/txgbe_rxtx_vec_sse.c | 736 ++++++++++++++++++++++ > 9 files changed, 1817 insertions(+), 6 deletions(-) > create mode 100644 drivers/net/txgbe/txgbe_rxtx_vec_common.h > create mode 100644 drivers/net/txgbe/txgbe_rxtx_vec_neon.c > create mode 100644 drivers/net/txgbe/txgbe_rxtx_vec_sse.c > > diff --git a/drivers/net/txgbe/meson.build b/drivers/net/txgbe/meson.build > index 14729a6cf3..ba7167a511 100644 > --- a/drivers/net/txgbe/meson.build > +++ b/drivers/net/txgbe/meson.build > @@ -24,6 +24,12 @@ sources = files( > > deps += ['hash', 'security'] > > +if arch_subdir == 'x86' > + sources += files('txgbe_rxtx_vec_sse.c') > +elif arch_subdir == 'arm' > + sources += files('txgbe_rxtx_vec_neon.c') > +endif > + > includes += include_directories('base') > > install_headers('rte_pmd_txgbe.h') > diff --git a/drivers/net/txgbe/txgbe_ethdev.c > b/drivers/net/txgbe/txgbe_ethdev.c > index 6bc231a130..2d5b935002 100644 > --- a/drivers/net/txgbe/txgbe_ethdev.c > +++ b/drivers/net/txgbe/txgbe_ethdev.c > @@ -1544,6 +1544,7 @@ txgbe_dev_configure(struct rte_eth_dev *dev) > * allocation Rx preconditions we will reset it. > */ > adapter->rx_bulk_alloc_allowed = true; > + adapter->rx_vec_allowed = true; > > return 0; > } > @@ -2735,6 +2736,11 @@ txgbe_dev_supported_ptypes_get(struct rte_eth_dev *dev) > dev->rx_pkt_burst == txgbe_recv_pkts_bulk_alloc) > return txgbe_get_supported_ptypes(); > > +#if defined(RTE_ARCH_X86) || defined(__ARM_NEON) > + if (dev->rx_pkt_burst == txgbe_recv_pkts_vec || > + dev->rx_pkt_burst == txgbe_recv_scattered_pkts_vec) > + return txgbe_get_supported_ptypes(); > +#endif > Sometimes the packet parsing capability of the device changes based on vector Rx used, but above calls same function. If there is no ptype parsing capability difference, why not just add above checks to previous if block? Btw, 'txgbe_get_supported_ptypes()' now gets a parameter, based on changes in 'next-net', can you please rebase code on top of latest next-net? <...> > @@ -2198,8 +2220,15 @@ txgbe_set_tx_function(struct rte_eth_dev *dev, struct > txgbe_tx_queue *txq) > #endif > txq->tx_free_thresh >= RTE_PMD_TXGBE_TX_MAX_BURST) { > PMD_INIT_LOG(DEBUG, "Using simple tx code path"); > - dev->tx_pkt_burst = txgbe_xmit_pkts_simple; > dev->tx_pkt_prepare = NULL; > + if (txq->tx_free_thresh <= RTE_TXGBE_TX_MAX_FREE_BUF_SZ && > + (rte_eal_process_type() != RTE_PROC_PRIMARY || > Why vector Tx enable only for secondary process? <...> > @@ -297,6 +299,12 @@ struct txgbe_rx_queue { > #ifdef RTE_LIB_SECURITY > uint8_t using_ipsec; > /**< indicates that IPsec RX feature is in use */ > +#endif > + uint64_t mbuf_initializer; /**< value to init mbufs */ > + uint8_t rx_using_sse; > +#if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM) > RTE_ARCH_ARM, RTE_ARCH_ARM64 & __ARM_NEON seems used interchangable, what do you think to stick one? Similarly with RTE_ARCH_X86_64 & RTE_ARCH_X86. <...> > +++ b/drivers/net/txgbe/txgbe_rxtx_vec_neon.c > @@ -0,0 +1,604 @@ > +/* SPDX-License-Identifier: BSD-3-Clause > + * Copyright(c) 2015-2024 Beijing WangXun Technology Co., Ltd. > + * Copyright(c) 2010-2015 Intel Corporation > + */ > + > +#include <ethdev_driver.h> > +#include <rte_malloc.h> > +#include <rte_vect.h> > + > +#include "txgbe_ethdev.h" > +#include "txgbe_rxtx.h" > +#include "txgbe_rxtx_vec_common.h" > + > +#pragma GCC diagnostic ignored "-Wcast-qual" > + Is this pragma really required?