From: JieLiu <liuj...@linkdatatechnology.com>

Add simd function.

Signed-off-by: Jie Liu <liuj...@linkdatatechnology.com>
---
 drivers/net/sxe/Makefile                |   7 +
 drivers/net/sxe/base/sxe_queue_common.c |  55 ++
 drivers/net/sxe/base/sxe_rx_common.c    | 145 ++++-
 drivers/net/sxe/meson.build             |   9 +
 drivers/net/sxe/pf/sxe.h                |   3 +
 drivers/net/sxe/pf/sxe_ethdev.c         |   5 +
 drivers/net/sxe/pf/sxe_rx.c             |   3 +
 drivers/net/sxe/pf/sxe_vec_common.h     | 325 ++++++++++
 drivers/net/sxe/pf/sxe_vec_neon.c       | 760 ++++++++++++++++++++++++
 drivers/net/sxe/pf/sxe_vec_sse.c        | 638 ++++++++++++++++++++
 10 files changed, 1948 insertions(+), 2 deletions(-)
 create mode 100644 drivers/net/sxe/pf/sxe_vec_common.h
 create mode 100644 drivers/net/sxe/pf/sxe_vec_neon.c
 create mode 100644 drivers/net/sxe/pf/sxe_vec_sse.c

diff --git a/drivers/net/sxe/Makefile b/drivers/net/sxe/Makefile
index 8e1e2a53a2..17c24861db 100644
--- a/drivers/net/sxe/Makefile
+++ b/drivers/net/sxe/Makefile
@@ -11,6 +11,7 @@ LIB = librte_pmd_sxe.a
 CFLAGS += -DALLOW_EXPERIMENTAL_API
 CFLAGS += -DSXE_DPDK
 CFLAGS += -DSXE_HOST_DRIVER
+CFLAGS += -DSXE_DPDK_SIMD
 CFLAGS += -O3
 CFLAGS += $(WERROR_FLAGS)
 
@@ -80,6 +81,12 @@ SRCS-$(CONFIG_RTE_LIBRTE_SXE_PMD) += sxe_rx.c
 SRCS-$(CONFIG_RTE_LIBRTE_SXE_PMD) += sxe_stats.c
 SRCS-$(CONFIG_RTE_LIBRTE_SXE_PMD) += sxe_tx.c
 
+ifeq ($(CONFIG_RTE_ARCH_ARM64),y)
+SRCS-$(CONFIG_RTE_LIBRTE_SXE_PMD) += sxe_vec_neon.c
+else
+SRCS-$(CONFIG_RTE_LIBRTE_SXE_PMD) += sxe_vec_sse.c
+endif
+
 # install this header file
 SYMLINK-$(CONFIG_RTE_LIBRTE_SXE_PMD)-include := rte_pmd_sxe.h
 SYMLINK-$(CONFIG_RTE_LIBRTE_SXE_PMD)-include += sxe_dcb.h
diff --git a/drivers/net/sxe/base/sxe_queue_common.c 
b/drivers/net/sxe/base/sxe_queue_common.c
index 1470fb8e5c..f2af7923e8 100644
--- a/drivers/net/sxe/base/sxe_queue_common.c
+++ b/drivers/net/sxe/base/sxe_queue_common.c
@@ -22,6 +22,10 @@
 #include "sxe_logs.h"
 #include "sxe_regs.h"
 #include "sxe.h"
+#if defined SXE_DPDK_L4_FEATURES && defined SXE_DPDK_SIMD
+#include "sxe_vec_common.h"
+#include <rte_vect.h>
+#endif
 #include "sxe_queue_common.h"
 #include "sxe_queue.h"
 
@@ -66,6 +70,10 @@ s32 __rte_cold __sxe_rx_queue_setup(struct rx_setup 
*rx_setup, bool is_vf)
        u16 len;
        u64 offloads;
        s32 ret = 0;
+#if defined SXE_DPDK_L4_FEATURES && defined SXE_DPDK_SIMD
+       struct sxe_adapter *pf_adapter = dev->data->dev_private;
+       struct sxevf_adapter *vf_adapter = dev->data->dev_private;
+#endif
 
        PMD_INIT_FUNC_TRACE();
 
@@ -170,6 +178,23 @@ s32 __rte_cold __sxe_rx_queue_setup(struct rx_setup 
*rx_setup, bool is_vf)
                                "dma_addr=0x%" SXE_PRIX64,
                         rxq->buffer_ring, rxq->sc_buffer_ring, rxq->desc_ring,
                         rxq->base_addr);
+
+#if defined SXE_DPDK_L4_FEATURES && defined SXE_DPDK_SIMD
+       if (!rte_is_power_of_2(desc_num)) {
+               PMD_LOG_DEBUG(INIT, "queue[%d] doesn't meet Vector Rx "
+                                       "preconditions - canceling the feature 
for "
+                                       "the whole port[%d]",
+                                rxq->queue_id, rxq->port_id);
+               if (is_vf)
+                       vf_adapter->rx_vec_allowed = false;
+               else
+                       pf_adapter->rx_vec_allowed = false;
+
+       } else {
+               sxe_rxq_vec_setup(rxq);
+       }
+#endif
+
        dev->data->rx_queues[queue_idx] = rxq;
 
        sxe_rx_queue_init(*rx_setup->rx_batch_alloc_allowed, rxq);
@@ -265,6 +290,9 @@ void __sxe_recycle_rxq_info_get(struct rte_eth_dev *dev, 
u16 queue_id,
                struct rte_eth_recycle_rxq_info *q_info)
 {
        struct sxe_rx_queue *rxq;
+#if defined SXE_DPDK_L4_FEATURES && defined SXE_DPDK_SIMD
+       struct sxe_adapter *adapter = dev->data->dev_private;
+#endif
 
        rxq = dev->data->rx_queues[queue_id];
 
@@ -273,8 +301,22 @@ void __sxe_recycle_rxq_info_get(struct rte_eth_dev *dev, 
u16 queue_id,
        q_info->mbuf_ring_size = rxq->ring_depth;
        q_info->receive_tail = &rxq->processing_idx;
 
+#if defined SXE_DPDK_L4_FEATURES && defined SXE_DPDK_SIMD
+       if (adapter->rx_vec_allowed) {
+#if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM)
+               q_info->refill_requirement = rxq->realloc_num;
+               q_info->refill_head = &rxq->realloc_start;
+#endif
+       } else {
+               q_info->refill_requirement = rxq->batch_alloc_size;
+               q_info->refill_head = &rxq->batch_alloc_trigger;
+       }
+#else
        q_info->refill_requirement = rxq->batch_alloc_size;
        q_info->refill_head = &rxq->batch_alloc_trigger;
+#endif
+
+       return;
 }
 #endif
 #endif
@@ -302,7 +344,20 @@ s32 __sxe_tx_done_cleanup(void *tx_queue, u32 free_cnt)
        struct sxe_tx_queue *txq = (struct sxe_tx_queue *)tx_queue;
        if (txq->offloads == 0 &&
                txq->rs_thresh >= RTE_PMD_SXE_MAX_TX_BURST) {
+#if defined SXE_DPDK_L4_FEATURES && defined SXE_DPDK_SIMD
+               if (txq->rs_thresh <= RTE_SXE_MAX_TX_FREE_BUF_SZ &&
+#ifndef DPDK_19_11_6
+                       rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_128 &&
+#endif
+                       (rte_eal_process_type() != RTE_PROC_PRIMARY ||
+                       txq->buffer_ring_vec != NULL)) {
+                       ret = sxe_tx_done_cleanup_vec(txq, free_cnt);
+               } else{
+                       ret = sxe_tx_done_cleanup_simple(txq, free_cnt);
+               }
+#else
                ret = sxe_tx_done_cleanup_simple(txq, free_cnt);
+#endif
 
        } else {
                ret = sxe_tx_done_cleanup_full(txq, free_cnt);
diff --git a/drivers/net/sxe/base/sxe_rx_common.c 
b/drivers/net/sxe/base/sxe_rx_common.c
index aa830c89d7..8baed167a0 100644
--- a/drivers/net/sxe/base/sxe_rx_common.c
+++ b/drivers/net/sxe/base/sxe_rx_common.c
@@ -23,6 +23,10 @@
 #include "sxe_errno.h"
 #include "sxe_irq.h"
 #include "sxe_rx_common.h"
+#if defined SXE_DPDK_L4_FEATURES && defined SXE_DPDK_SIMD
+#include "sxe_vec_common.h"
+#include "rte_vect.h"
+#endif
 
 static inline void sxe_rx_resource_prefetch(u16 next_idx,
                                struct sxe_rx_buffer *buf_ring,
@@ -34,12 +38,70 @@ static inline void sxe_rx_resource_prefetch(u16 next_idx,
                rte_sxe_prefetch(&desc_ring[next_idx]);
                rte_sxe_prefetch(&buf_ring[next_idx]);
        }
+
 }
 
+#if defined SXE_DPDK_L4_FEATURES && defined SXE_DPDK_SIMD
+#if defined DPDK_23_11_3 || defined DPDK_24_11_1
+#ifndef DPDK_23_7
+#if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM)
+static void sxe_recycle_rx_descriptors_refill_vec(void *rx_queue, u16 nb_mbufs)
+{
+       struct sxe_rx_queue *rxq = rx_queue;
+       struct sxe_rx_buffer *rxep;
+       volatile union sxe_rx_data_desc *rxdp;
+       u16 rx_id;
+       u64 paddr;
+       u64 dma_addr;
+       u16 i;
+
+       rxdp = rxq->desc_ring + rxq->realloc_start;
+       rxep = &rxq->buffer_ring[rxq->realloc_start];
+
+       for (i = 0; i < nb_mbufs; i++) {
+               paddr = (rxep[i].mbuf)->buf_iova + RTE_PKTMBUF_HEADROOM;
+               dma_addr = rte_cpu_to_le_64(paddr);
+               rxdp[i].read.hdr_addr = 0;
+               rxdp[i].read.pkt_addr = dma_addr;
+       }
+
+       rxq->realloc_start += nb_mbufs;
+       if (rxq->realloc_start >= rxq->ring_depth)
+               rxq->realloc_start = 0;
+
+       rxq->realloc_num -= nb_mbufs;
+
+       rx_id = (u16)((rxq->realloc_start == 0) ?
+                                       (rxq->ring_depth - 1) : 
(rxq->realloc_start - 1));
+
+       SXE_PCI_REG_WC_WRITE_RELAXED(rxq->rdt_reg_addr, rx_id);
+}
+#endif
+#endif
+#endif
+#endif
+
 void __rte_cold __sxe_rx_function_set(struct rte_eth_dev *dev,
        bool rx_batch_alloc_allowed, bool *rx_vec_allowed)
 {
+#if defined SXE_DPDK_L4_FEATURES && defined SXE_DPDK_SIMD
+       u16  i, is_using_sse;
+
+       if (sxe_rx_vec_condition_check(dev) ||
+#ifndef DPDK_19_11_6
+               !rx_batch_alloc_allowed ||
+               rte_vect_get_max_simd_bitwidth() < RTE_VECT_SIMD_128
+#else
+               !rx_batch_alloc_allowed
+#endif
+               ) {
+               PMD_LOG_DEBUG(INIT, "Port[%d] doesn't meet Vector Rx "
+                                       "preconditions", dev->data->port_id);
+               *rx_vec_allowed = false;
+       }
+#else
        UNUSED(rx_vec_allowed);
+#endif
 
        if (dev->data->lro) {
                if (rx_batch_alloc_allowed) {
@@ -52,7 +114,29 @@ void __rte_cold __sxe_rx_function_set(struct rte_eth_dev 
*dev,
                        dev->rx_pkt_burst = sxe_single_alloc_lro_pkts_recv;
                }
        } else if (dev->data->scattered_rx) {
+#if defined SXE_DPDK_L4_FEATURES && defined SXE_DPDK_SIMD
+               if (*rx_vec_allowed) {
+                       PMD_LOG_DEBUG(INIT, "Using Vector Scattered Rx "
+                                               "callback (port=%d).",
+                                        dev->data->port_id);
+#if defined DPDK_23_11_3 || defined DPDK_24_11_1
+#ifndef DPDK_23_7
+#if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM)
+                       dev->recycle_rx_descriptors_refill = 
sxe_recycle_rx_descriptors_refill_vec;
+#endif
+#endif
+#endif
+                       dev->rx_pkt_burst = sxe_scattered_pkts_vec_recv;
+
+#endif
+
+#if defined SXE_DPDK_L4_FEATURES && defined SXE_DPDK_SIMD
+
+               } else if (rx_batch_alloc_allowed) {
+#else
                if (rx_batch_alloc_allowed) {
+#endif
+
                        PMD_LOG_DEBUG(INIT, "Using a Scattered with bulk "
                                           "allocation callback (port=%d).",
                                         dev->data->port_id);
@@ -67,7 +151,24 @@ void __rte_cold __sxe_rx_function_set(struct rte_eth_dev 
*dev,
 
                        dev->rx_pkt_burst = sxe_single_alloc_lro_pkts_recv;
                }
-       } else if (rx_batch_alloc_allowed) {
+       }
+       #if defined SXE_DPDK_L4_FEATURES && defined SXE_DPDK_SIMD
+       else if (*rx_vec_allowed) {
+               PMD_LOG_DEBUG(INIT, "Vector rx enabled, please make sure RX "
+                                       "burst size no less than %d (port=%d).",
+                                SXE_DESCS_PER_LOOP,
+                                dev->data->port_id);
+#if defined DPDK_23_11_3 || defined DPDK_24_11_1
+#ifndef DPDK_23_7
+#if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM)
+               dev->recycle_rx_descriptors_refill = 
sxe_recycle_rx_descriptors_refill_vec;
+#endif
+#endif
+#endif
+               dev->rx_pkt_burst = sxe_pkts_vec_recv;
+       }
+#endif
+       else if (rx_batch_alloc_allowed) {
                PMD_LOG_DEBUG(INIT, "Rx Burst Bulk Alloc Preconditions are "
                                        "satisfied. Rx Burst Bulk Alloc 
function "
                                        "will be used on port=%d.",
@@ -82,6 +183,19 @@ void __rte_cold __sxe_rx_function_set(struct rte_eth_dev 
*dev,
 
                dev->rx_pkt_burst = sxe_pkts_recv;
        }
+
+#if defined SXE_DPDK_L4_FEATURES && defined SXE_DPDK_SIMD
+       is_using_sse =
+               (dev->rx_pkt_burst == sxe_scattered_pkts_vec_recv ||
+               dev->rx_pkt_burst == sxe_pkts_vec_recv);
+
+       for (i = 0; i < dev->data->nb_rx_queues; i++) {
+               struct sxe_rx_queue *rxq = dev->data->rx_queues[i];
+
+               rxq->is_using_sse = is_using_sse;
+       }
+#endif
+
 }
 
 #if defined DPDK_20_11_5 || defined DPDK_19_11_6
@@ -127,7 +241,15 @@ s32 __sxe_rx_descriptor_status(void *rx_queue, u16 offset)
                ret = -EINVAL;
                goto l_end;
        }
-       hold_num = rxq->hold_num;
+
+#if defined SXE_DPDK_L4_FEATURES && defined SXE_DPDK_SIMD
+#if defined(RTE_ARCH_X86)
+       if (rxq->is_using_sse)
+               hold_num = rxq->realloc_num;
+       else
+#endif
+#endif
+               hold_num = rxq->hold_num;
        if (offset >= rxq->ring_depth - hold_num) {
                ret = RTE_ETH_RX_DESC_UNAVAIL;
                goto l_end;
@@ -268,6 +390,16 @@ const u32 *__sxe_dev_supported_ptypes_get(struct 
rte_eth_dev *dev, size_t *no_of
                goto l_end;
        }
 
+#if defined SXE_DPDK_L4_FEATURES && defined SXE_DPDK_SIMD
+#if defined(RTE_ARCH_X86)
+       if (dev->rx_pkt_burst == sxe_pkts_vec_recv ||
+               dev->rx_pkt_burst == sxe_scattered_pkts_vec_recv) {
+               *no_of_elements = RTE_DIM(ptypes_arr);
+               ptypes = ptypes_arr;
+       }
+#endif
+#endif
+
 l_end:
        return ptypes;
 }
@@ -300,6 +432,15 @@ const u32 *__sxe_dev_supported_ptypes_get(struct 
rte_eth_dev *dev)
                goto l_end;
        }
 
+#if defined SXE_DPDK_L4_FEATURES && defined SXE_DPDK_SIMD
+#if defined(RTE_ARCH_X86)
+       if (dev->rx_pkt_burst == sxe_pkts_vec_recv ||
+               dev->rx_pkt_burst == sxe_scattered_pkts_vec_recv) {
+               ptypes = ptypes_arr;
+       }
+#endif
+#endif
+
 l_end:
        return ptypes;
 }
diff --git a/drivers/net/sxe/meson.build b/drivers/net/sxe/meson.build
index 0e89676375..ecf64ea524 100644
--- a/drivers/net/sxe/meson.build
+++ b/drivers/net/sxe/meson.build
@@ -2,6 +2,9 @@
 # Copyright (C), 2022, Linkdata Technology Co., Ltd.
 cflags += ['-DSXE_DPDK']
 cflags += ['-DSXE_HOST_DRIVER']
+cflags += ['-DSXE_DPDK_L4_FEATURES']
+cflags += ['-DSXE_DPDK_SRIOV']
+cflags += ['-DSXE_DPDK_SIMD']
 
 #subdir('base')
 #objs = [base_objs]
@@ -32,6 +35,12 @@ sources = files(
 
 testpmd_sources = files('sxe_testpmd.c')
 
+if arch_subdir == 'x86'
+       sources += files('pf/sxe_vec_sse.c')
+elif arch_subdir == 'arm'
+       sources += files('pf/sxe_vec_neon.c')
+endif
+
 includes += include_directories('base')
 includes += include_directories('pf')
 includes += include_directories('include/sxe/')
diff --git a/drivers/net/sxe/pf/sxe.h b/drivers/net/sxe/pf/sxe.h
index c7dafd0e75..c9c71a0c90 100644
--- a/drivers/net/sxe/pf/sxe.h
+++ b/drivers/net/sxe/pf/sxe.h
@@ -66,6 +66,9 @@ struct sxe_adapter {
        struct sxe_dcb_context dcb_ctxt;
 
        bool rx_batch_alloc_allowed;
+#if defined SXE_DPDK_L4_FEATURES && defined SXE_DPDK_SIMD
+       bool rx_vec_allowed;
+#endif
        s8 name[PCI_PRI_STR_SIZE + 1];
 
        u32 mtu;
diff --git a/drivers/net/sxe/pf/sxe_ethdev.c b/drivers/net/sxe/pf/sxe_ethdev.c
index f3ac4cbfc8..46d7f0dbf7 100644
--- a/drivers/net/sxe/pf/sxe_ethdev.c
+++ b/drivers/net/sxe/pf/sxe_ethdev.c
@@ -98,6 +98,11 @@ static s32 sxe_dev_configure(struct rte_eth_dev *dev)
 
        /* Default use batch alloc  */
        adapter->rx_batch_alloc_allowed = true;
+
+#if defined SXE_DPDK_L4_FEATURES && defined SXE_DPDK_SIMD
+       adapter->rx_vec_allowed = true;
+#endif
+
 l_end:
        return ret;
 }
diff --git a/drivers/net/sxe/pf/sxe_rx.c b/drivers/net/sxe/pf/sxe_rx.c
index 232fab0ab1..8504e1ac43 100644
--- a/drivers/net/sxe/pf/sxe_rx.c
+++ b/drivers/net/sxe/pf/sxe_rx.c
@@ -26,6 +26,9 @@
 #include "sxe_errno.h"
 #include "sxe_irq.h"
 #include "sxe_ethdev.h"
+#if defined SXE_DPDK_L4_FEATURES && defined SXE_DPDK_SIMD
+#include "sxe_vec_common.h"
+#endif
 #include "sxe_rx_common.h"
 
 #define SXE_LRO_HDR_SIZE                               128
diff --git a/drivers/net/sxe/pf/sxe_vec_common.h 
b/drivers/net/sxe/pf/sxe_vec_common.h
new file mode 100644
index 0000000000..d3571dbf5b
--- /dev/null
+++ b/drivers/net/sxe/pf/sxe_vec_common.h
@@ -0,0 +1,325 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (C), 2022, Linkdata Technology Co., Ltd.
+ */
+#ifndef __SXE_VEC_COMMON_H__
+#define __SXE_VEC_COMMON_H__
+
+#if defined SXE_DPDK_L4_FEATURES && defined SXE_DPDK_SIMD
+#include <stdint.h>
+#include <rte_mempool.h>
+
+#if defined DPDK_20_11_5 || defined DPDK_19_11_6
+#include <rte_ethdev_driver.h>
+#include <rte_ethdev_pci.h>
+#elif defined DPDK_21_11_5
+#include <ethdev_driver.h>
+#include <rte_dev.h>
+#include <rte_malloc.h>
+#else
+#include <ethdev_driver.h>
+#include <dev_driver.h>
+#include <rte_malloc.h>
+#endif
+#include "sxe.h"
+#include "sxe_rx.h"
+
+#define RTE_SXE_MAX_TX_FREE_BUF_SZ     64
+#define SXE_TXD_STAT_DD                                0x00000001
+
+static __rte_always_inline s32
+sxe_tx_bufs_vec_free(struct sxe_tx_queue *txq)
+{
+       struct sxe_tx_buffer_vec *txep;
+       u32 status;
+       u32 n;
+       u32 i;
+       s32 ret;
+       s32 nb_free = 0;
+       struct rte_mbuf *m, *free[RTE_SXE_MAX_TX_FREE_BUF_SZ];
+
+       status = txq->desc_ring[txq->next_dd].wb.status;
+       if (!(status & SXE_TXD_STAT_DD)) {
+               ret = 0;
+               goto out;
+       }
+
+       n = txq->rs_thresh;
+
+       txep = &txq->buffer_ring_vec[txq->next_dd - (n - 1)];
+       m = rte_pktmbuf_prefree_seg(txep[0].mbuf);
+
+       if (likely(m != NULL)) {
+               free[0] = m;
+               nb_free = 1;
+               for (i = 1; i < n; i++) {
+                       m = rte_pktmbuf_prefree_seg(txep[i].mbuf);
+                       if (likely(m != NULL)) {
+                               if (likely(m->pool == free[0]->pool)) {
+                                       free[nb_free++] = m;
+                               } else {
+                                       rte_mempool_put_bulk(free[0]->pool,
+                                                       (void *)free, nb_free);
+                                       free[0] = m;
+                                       nb_free = 1;
+                               }
+                       }
+               }
+               rte_mempool_put_bulk(free[0]->pool, (void **)free, nb_free);
+       } else {
+               for (i = 1; i < n; i++) {
+                       m = rte_pktmbuf_prefree_seg(txep[i].mbuf);
+                       if (m != NULL)
+                               rte_mempool_put(m->pool, m);
+               }
+       }
+
+       txq->desc_free_num = (u16)(txq->desc_free_num + txq->rs_thresh);
+       txq->next_dd = (u16)(txq->next_dd + txq->rs_thresh);
+       if (txq->next_dd >= txq->ring_depth)
+               txq->next_dd = (u16)(txq->rs_thresh - 1);
+
+       ret = txq->rs_thresh;
+out:
+       return ret;
+}
+
+static inline u16
+sxe_packets_reassemble(sxe_rx_queue_s *rxq, struct rte_mbuf **rx_bufs,
+                       u16 bufs_num, u8 *split_flags)
+{
+       struct rte_mbuf *pkts[bufs_num];
+       struct rte_mbuf *start = rxq->pkt_first_seg;
+       struct rte_mbuf *end = rxq->pkt_last_seg;
+       u32 pkt_idx, buf_idx;
+
+       for (buf_idx = 0, pkt_idx = 0; buf_idx < bufs_num; buf_idx++) {
+               if (end != NULL) {
+                       end->next = rx_bufs[buf_idx];
+                       rx_bufs[buf_idx]->data_len += rxq->crc_len;
+
+                       start->nb_segs++;
+                       start->pkt_len += rx_bufs[buf_idx]->data_len;
+                       end = end->next;
+
+                       if (!split_flags[buf_idx]) {
+                               start->hash = end->hash;
+                               start->ol_flags = end->ol_flags;
+                               start->pkt_len -= rxq->crc_len;
+                               if (end->data_len > rxq->crc_len) {
+                                       end->data_len -= rxq->crc_len;
+                               } else {
+                                       struct rte_mbuf *secondlast = start;
+
+                                       start->nb_segs--;
+                                       while (secondlast->next != end)
+                                               secondlast = secondlast->next;
+
+                                       secondlast->data_len -= (rxq->crc_len -
+                                                       end->data_len);
+                                       secondlast->next = NULL;
+                                       rte_pktmbuf_free_seg(end);
+                               }
+                               pkts[pkt_idx++] = start;
+                               start = NULL;
+                               end = NULL;
+                       }
+               } else {
+                       if (!split_flags[buf_idx]) {
+                               pkts[pkt_idx++] = rx_bufs[buf_idx];
+                               continue;
+                       }
+                       start = rx_bufs[buf_idx];
+                       end = rx_bufs[buf_idx];
+                       rx_bufs[buf_idx]->data_len += rxq->crc_len;
+                       rx_bufs[buf_idx]->pkt_len += rxq->crc_len;
+               }
+       }
+
+       rxq->pkt_first_seg = start;
+       rxq->pkt_last_seg = end;
+       memcpy(rx_bufs, pkts, pkt_idx * (sizeof(*pkts)));
+
+       return pkt_idx;
+}
+
+static inline void
+sxe_rx_vec_mbufs_release(sxe_rx_queue_s *rxq)
+{
+       u16 i;
+
+       if (rxq->buffer_ring == NULL || rxq->realloc_num >= rxq->ring_depth)
+               return;
+
+       if (rxq->realloc_num == 0) {
+               for (i = 0; i < rxq->ring_depth; i++) {
+                       if (rxq->buffer_ring[i].mbuf != NULL)
+                               rte_pktmbuf_free_seg(rxq->buffer_ring[i].mbuf);
+               }
+       } else {
+               for (i = rxq->processing_idx;
+                        i != rxq->realloc_start;
+                        i = (i + 1) % rxq->ring_depth) {
+                       if (rxq->buffer_ring[i].mbuf != NULL)
+                               rte_pktmbuf_free_seg(rxq->buffer_ring[i].mbuf);
+               }
+       }
+
+       rxq->realloc_num = rxq->ring_depth;
+
+       memset(rxq->buffer_ring, 0, sizeof(rxq->buffer_ring[0]) * 
rxq->ring_depth);
+}
+
+static inline s32
+sxe_default_rxq_vec_setup(sxe_rx_queue_s *rxq)
+{
+       uintptr_t p;
+       struct rte_mbuf mbuf = { .buf_addr = 0 };
+
+       mbuf.nb_segs = 1;
+       mbuf.data_off = RTE_PKTMBUF_HEADROOM;
+       mbuf.port = rxq->port_id;
+       rte_mbuf_refcnt_set(&mbuf, 1);
+
+       rte_compiler_barrier();
+       p = (uintptr_t)&mbuf.rearm_data;
+       rxq->mbuf_init_value = *(u64 *)p;
+
+       return 0;
+}
+
+static inline s32
+sxe_default_rx_vec_condition_check(struct rte_eth_dev *dev)
+{
+       s32 ret = 0;
+
+#ifndef RTE_LIBRTE_IEEE1588
+       struct rte_eth_fdir_conf *fnav_conf = SXE_DEV_FNAV_CONF(dev);
+       if (fnav_conf->mode != RTE_FDIR_MODE_NONE)
+               ret = -1;
+#else
+       RTE_SET_USED(dev);
+       ret = -1;
+#endif
+
+       return ret;
+}
+
+static __rte_always_inline void
+sxe_vec_mbuf_fill(struct sxe_tx_buffer_vec *buffer_ring,
+                struct rte_mbuf **tx_pkts, u16 pkts_num)
+{
+       s32 i;
+
+       for (i = 0; i < pkts_num; ++i)
+               buffer_ring[i].mbuf = tx_pkts[i];
+}
+
+static inline void
+sxe_tx_queue_vec_init(sxe_tx_queue_s *txq)
+{
+       u16 i;
+       volatile sxe_tx_data_desc_u *txd;
+       static const sxe_tx_data_desc_u zeroed_desc = { {0} };
+       struct sxe_tx_buffer_vec *tx_buffer = txq->buffer_ring_vec;
+
+       for (i = 0; i < txq->ring_depth; i++)
+               txq->desc_ring[i] = zeroed_desc;
+
+       for (i = 0; i < txq->ring_depth; i++) {
+               txd = &txq->desc_ring[i];
+               txd->wb.status = SXE_TX_DESC_STAT_DD;
+               tx_buffer[i].mbuf = NULL;
+       }
+
+       txq->ctx_curr     = 0;
+       txq->desc_used_num = 0;
+       txq->desc_free_num = txq->ring_depth - 1;
+       txq->next_to_use   = 0;
+       txq->next_to_clean = txq->ring_depth - 1;
+       txq->next_dd       = txq->rs_thresh  - 1;
+       txq->next_rs       = txq->rs_thresh  - 1;
+       memset((void *)&txq->ctx_cache, 0,
+                       SXE_CTXT_DESC_NUM * sizeof(struct sxe_ctxt_info));
+}
+
+static inline void
+sxe_tx_mbufs_vec_release(sxe_tx_queue_s *txq)
+{
+       u16 i;
+       struct sxe_tx_buffer_vec *tx_buffer;
+       const u16 max_desc = (u16)(txq->ring_depth - 1);
+
+       if (txq->buffer_ring_vec == NULL || txq->desc_free_num == max_desc)
+               return;
+
+       for (i = txq->next_dd - (txq->rs_thresh - 1);
+                i != txq->next_to_use;
+                i = (i + 1) % txq->ring_depth) {
+               tx_buffer = &txq->buffer_ring_vec[i];
+               rte_pktmbuf_free_seg(tx_buffer->mbuf);
+       }
+       txq->desc_free_num = max_desc;
+
+       for (i = 0; i < txq->ring_depth; i++) {
+               tx_buffer = &txq->buffer_ring_vec[i];
+               tx_buffer->mbuf = NULL;
+       }
+}
+
+static inline void
+sxe_tx_buffer_ring_vec_free(sxe_tx_queue_s *txq)
+{
+       if (txq == NULL)
+               return;
+
+       if (txq->buffer_ring_vec != NULL) {
+               rte_free(txq->buffer_ring_vec - 1);
+               txq->buffer_ring_vec = NULL;
+       }
+}
+
+static inline s32
+sxe_default_txq_vec_setup(sxe_tx_queue_s *txq,
+                               const struct sxe_txq_ops *txq_ops)
+{
+       s32 ret = 0;
+
+       if (txq->buffer_ring_vec == NULL) {
+               ret = -1;
+               goto l_out;
+       }
+
+       txq->buffer_ring_vec = txq->buffer_ring_vec + 1;
+       txq->ops = txq_ops;
+
+l_out:
+       return ret;
+}
+
+static inline int
+sxe_tx_done_cleanup_vec(sxe_tx_queue_s *txq, u32 free_cnt)
+{
+       UNUSED(txq);
+       UNUSED(free_cnt);
+
+       return -ENOTSUP;
+}
+
+s32 sxe_txq_vec_setup(sxe_tx_queue_s *txq);
+
+s32 sxe_rx_vec_condition_check(struct rte_eth_dev *dev);
+
+s32 sxe_rxq_vec_setup(sxe_rx_queue_s *rxq);
+
+void sxe_rx_queue_vec_mbufs_release(sxe_rx_queue_s *rxq);
+
+u16 sxe_scattered_pkts_vec_recv(void *rx_queue, struct rte_mbuf **rx_pkts, u16 
pkts_num);
+
+u16 sxe_pkts_vec_recv(void *rx_queue, struct rte_mbuf **rx_pkts, u16 pkts_num);
+
+u16
+__sxe_pkts_vector_xmit(void *tx_queue, struct rte_mbuf **tx_pkts,
+                          u16 pkts_num);
+
+#endif
+#endif
diff --git a/drivers/net/sxe/pf/sxe_vec_neon.c 
b/drivers/net/sxe/pf/sxe_vec_neon.c
new file mode 100644
index 0000000000..8e425e8487
--- /dev/null
+++ b/drivers/net/sxe/pf/sxe_vec_neon.c
@@ -0,0 +1,760 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (C), 2022, Linkdata Technology Co., Ltd.
+ */
+
+#if defined SXE_DPDK_L4_FEATURES && defined SXE_DPDK_SIMD
+#include <stdint.h>
+#include "sxe_dpdk_version.h"
+#if defined DPDK_20_11_5 || defined DPDK_19_11_6
+#include <rte_ethdev_driver.h>
+#else
+#include <ethdev_driver.h>
+#endif
+#include <rte_malloc.h>
+
+#include <rte_vect.h>
+#include "sxe_vec_common.h"
+
+#define RTE_SXE_DESCS_PER_LOOP                 4
+#define SXE_PACKET_TYPE_MASK_TUNNEL            0xFF
+#define SXE_PACKET_TYPE_SHIFT                  0x04
+#define SXE_RXDADV_ERR_TCPE                            0x40000000
+#define SXE_VPMD_DESC_EOP_MASK                 0x02020202
+#define SXE_UINT8_BIT                                  (CHAR_BIT * sizeof(u8))
+
+#pragma GCC diagnostic ignored "-Wcast-qual"
+
+static inline void
+sxe_rxq_rearm(struct sxe_rx_queue *rxq)
+{
+       s32 i;
+       u16 rx_id;
+       volatile union sxe_rx_data_desc *rxdp;
+       struct sxe_rx_buffer *rxep = &rxq->buffer_ring[rxq->realloc_start];
+       struct rte_mbuf *mb0, *mb1;
+       uint64x2_t dma_addr0, dma_addr1;
+       uint64x2_t zero = vdupq_n_u64(0);
+       u64 paddr;
+       uint8x8_t p;
+
+       rxdp = rxq->desc_ring + rxq->realloc_start;
+
+       if (unlikely(rte_mempool_get_bulk(rxq->mb_pool,
+                                         (void *)rxep,
+                                         RTE_PMD_SXE_MAX_RX_BURST) < 0)) {
+               if (rxq->realloc_num + RTE_PMD_SXE_MAX_RX_BURST >=
+                       rxq->ring_depth) {
+                       for (i = 0; i < RTE_SXE_DESCS_PER_LOOP; i++) {
+                               rxep[i].mbuf = &rxq->fake_mbuf;
+                               vst1q_u64((u64 *)&rxdp[i].read,
+                                         zero);
+                       }
+               }
+               rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
+                       RTE_PMD_SXE_MAX_RX_BURST;
+               return;
+       }
+
+       p = vld1_u8((u8 *)&rxq->mbuf_init_value);
+
+       for (i = 0; i < RTE_PMD_SXE_MAX_RX_BURST; i += 2, rxep += 2) {
+               mb0 = rxep[0].mbuf;
+               mb1 = rxep[1].mbuf;
+
+               vst1_u8((u8 *)&mb0->rearm_data, p);
+               paddr = mb0->buf_iova + RTE_PKTMBUF_HEADROOM;
+               dma_addr0 = vsetq_lane_u64(paddr, zero, 0);
+
+               vst1q_u64((u64 *)&rxdp++->read, dma_addr0);
+
+               vst1_u8((u8 *)&mb1->rearm_data, p);
+               paddr = mb1->buf_iova + RTE_PKTMBUF_HEADROOM;
+               dma_addr1 = vsetq_lane_u64(paddr, zero, 0);
+               vst1q_u64((u64 *)&rxdp++->read, dma_addr1);
+       }
+
+       rxq->realloc_start += RTE_PMD_SXE_MAX_RX_BURST;
+       if (rxq->realloc_start >= rxq->ring_depth)
+               rxq->realloc_start = 0;
+
+       rxq->realloc_num -= RTE_PMD_SXE_MAX_RX_BURST;
+
+       rx_id = (u16)((rxq->realloc_start == 0) ?
+                               (rxq->ring_depth - 1) : (rxq->realloc_start - 
1));
+
+       sxe_write_addr(rx_id, rxq->rdt_reg_addr);
+}
+
+#if defined DPDK_22_11_3 || defined DPDK_21_11_5 || defined DPDK_23_11_3 || 
defined DPDK_24_11_1
+
+static inline void
+sxe_desc_to_olflags_v(uint8x16x2_t sterr_tmp1, uint8x16x2_t sterr_tmp2,
+                 uint8x16_t staterr, u8 vlan_flags, u16 udp_p_flag,
+                 struct rte_mbuf **rx_pkts)
+{
+       u16 udp_p_flag_hi;
+       uint8x16_t ptype, udp_csum_skip;
+       uint32x4_t temp_udp_csum_skip = {0, 0, 0, 0};
+       uint8x16_t vtag_lo, vtag_hi, vtag;
+       uint8x16_t temp_csum;
+       uint32x4_t csum = {0, 0, 0, 0};
+
+       union {
+               u16 e[4];
+               u64 word;
+       } vol;
+
+       const uint8x16_t rsstype_msk = {
+                       0x0F, 0x0F, 0x0F, 0x0F,
+                       0x00, 0x00, 0x00, 0x00,
+                       0x00, 0x00, 0x00, 0x00,
+                       0x00, 0x00, 0x00, 0x00};
+
+       const uint8x16_t rss_flags = {
+                       0, RTE_MBUF_F_RX_RSS_HASH, RTE_MBUF_F_RX_RSS_HASH, 
RTE_MBUF_F_RX_RSS_HASH,
+                       0, RTE_MBUF_F_RX_RSS_HASH, 0, RTE_MBUF_F_RX_RSS_HASH,
+                       RTE_MBUF_F_RX_RSS_HASH, 0, 0, 0,
+                       0, 0, 0, RTE_MBUF_F_RX_FDIR};
+
+       const uint8x16_t vlan_csum_msk = {
+                       SXE_RXD_STAT_VP, SXE_RXD_STAT_VP,
+                       SXE_RXD_STAT_VP, SXE_RXD_STAT_VP,
+                       0, 0, 0, 0,
+                       0, 0, 0, 0,
+                       (SXE_RXDADV_ERR_TCPE | SXE_RXDADV_ERR_IPE) >> 24,
+                       (SXE_RXDADV_ERR_TCPE | SXE_RXDADV_ERR_IPE) >> 24,
+                       (SXE_RXDADV_ERR_TCPE | SXE_RXDADV_ERR_IPE) >> 24,
+                       (SXE_RXDADV_ERR_TCPE | SXE_RXDADV_ERR_IPE) >> 24};
+
+       const uint8x16_t vlan_csum_map_lo = {
+                       RTE_MBUF_F_RX_IP_CKSUM_GOOD,
+                       RTE_MBUF_F_RX_IP_CKSUM_GOOD | 
RTE_MBUF_F_RX_L4_CKSUM_BAD,
+                       RTE_MBUF_F_RX_IP_CKSUM_BAD,
+                       RTE_MBUF_F_RX_IP_CKSUM_BAD | RTE_MBUF_F_RX_L4_CKSUM_BAD,
+                       0, 0, 0, 0,
+                       vlan_flags | RTE_MBUF_F_RX_IP_CKSUM_GOOD,
+                       vlan_flags | RTE_MBUF_F_RX_IP_CKSUM_GOOD | 
RTE_MBUF_F_RX_L4_CKSUM_BAD,
+                       vlan_flags | RTE_MBUF_F_RX_IP_CKSUM_BAD,
+                       vlan_flags | RTE_MBUF_F_RX_IP_CKSUM_BAD | 
RTE_MBUF_F_RX_L4_CKSUM_BAD,
+                       0, 0, 0, 0};
+
+       const uint8x16_t vlan_csum_map_hi = {
+                       RTE_MBUF_F_RX_L4_CKSUM_GOOD >> sizeof(u8), 0,
+                       RTE_MBUF_F_RX_L4_CKSUM_GOOD >> sizeof(u8), 0,
+                       0, 0, 0, 0,
+                       RTE_MBUF_F_RX_L4_CKSUM_GOOD >> sizeof(u8), 0,
+                       RTE_MBUF_F_RX_L4_CKSUM_GOOD >> sizeof(u8), 0,
+                       0, 0, 0, 0};
+
+       udp_p_flag_hi = udp_p_flag >> 8;
+
+       const uint8x16_t udp_hdr_p_msk = {
+                       0, 0, 0, 0,
+                       udp_p_flag_hi, udp_p_flag_hi, udp_p_flag_hi, 
udp_p_flag_hi,
+                       0, 0, 0, 0,
+                       0, 0, 0, 0};
+
+       const uint8x16_t udp_csum_bad_shuf = {
+                       0xFF, ~(u8)RTE_MBUF_F_RX_L4_CKSUM_BAD, 0, 0,
+                       0, 0, 0, 0,
+                       0, 0, 0, 0,
+                       0, 0, 0, 0};
+
+       ptype = vzipq_u8(sterr_tmp1.val[0], sterr_tmp2.val[0]).val[0];
+
+       udp_csum_skip = vandq_u8(ptype, udp_hdr_p_msk);
+
+       temp_udp_csum_skip = vcopyq_laneq_u32(temp_udp_csum_skip, 0,
+                               vreinterpretq_u32_u8(udp_csum_skip), 1);
+
+       ptype = vandq_u8(ptype, rsstype_msk);
+       ptype = vqtbl1q_u8(rss_flags, ptype);
+
+       vtag = vandq_u8(staterr, vlan_csum_msk);
+
+       temp_csum = vshrq_n_u8(vtag, 6);
+
+       csum = vsetq_lane_u32(vgetq_lane_u32(vreinterpretq_u32_u8(temp_csum), 
3), csum, 0);
+       vtag = vorrq_u8(vreinterpretq_u8_u32(csum), vtag);
+
+       vtag_hi = vqtbl1q_u8(vlan_csum_map_hi, vtag);
+       vtag_hi = vshrq_n_u8(vtag_hi, 7);
+
+       vtag_lo = vqtbl1q_u8(vlan_csum_map_lo, vtag);
+       vtag_lo = vorrq_u8(ptype, vtag_lo);
+
+       udp_csum_skip = vshrq_n_u8(vreinterpretq_u8_u32(temp_udp_csum_skip), 1);
+       udp_csum_skip = vqtbl1q_u8(udp_csum_bad_shuf, udp_csum_skip);
+       vtag_lo = vandq_u8(vtag_lo, udp_csum_skip);
+
+       vtag = vzipq_u8(vtag_lo, vtag_hi).val[0];
+       vol.word = vgetq_lane_u64(vreinterpretq_u64_u8(vtag), 0);
+
+       rx_pkts[0]->ol_flags = vol.e[0];
+       rx_pkts[1]->ol_flags = vol.e[1];
+       rx_pkts[2]->ol_flags = vol.e[2];
+       rx_pkts[3]->ol_flags = vol.e[3];
+}
+
+#elif defined DPDK_20_11_5
+
+#define SXE_VTAG_SHIFT (3)
+
+static inline void
+sxe_desc_to_olflags_v(uint8x16x2_t sterr_tmp1, uint8x16x2_t sterr_tmp2,
+                 uint8x16_t staterr, struct rte_mbuf **rx_pkts)
+{
+       uint8x16_t ptype;
+       uint8x16_t vtag;
+
+       union {
+               u8 e[4];
+               u32 word;
+       } vol;
+
+       const uint8x16_t pkttype_msk = {
+                       PKT_RX_VLAN, PKT_RX_VLAN,
+                       PKT_RX_VLAN, PKT_RX_VLAN,
+                       0x00, 0x00, 0x00, 0x00,
+                       0x00, 0x00, 0x00, 0x00,
+                       0x00, 0x00, 0x00, 0x00};
+
+       const uint8x16_t rsstype_msk = {
+                       0x0F, 0x0F, 0x0F, 0x0F,
+                       0x00, 0x00, 0x00, 0x00,
+                       0x00, 0x00, 0x00, 0x00,
+                       0x00, 0x00, 0x00, 0x00};
+
+       const uint8x16_t rss_flags = {
+                       0, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH,
+                       0, PKT_RX_RSS_HASH, 0, PKT_RX_RSS_HASH,
+                       PKT_RX_RSS_HASH, 0, 0, 0,
+                       0, 0, 0, PKT_RX_FDIR};
+
+       ptype = vzipq_u8(sterr_tmp1.val[0], sterr_tmp2.val[0]).val[0];
+       ptype = vandq_u8(ptype, rsstype_msk);
+       ptype = vqtbl1q_u8(rss_flags, ptype);
+
+       vtag = vshrq_n_u8(staterr, SXE_VTAG_SHIFT);
+       vtag = vandq_u8(vtag, pkttype_msk);
+       vtag = vorrq_u8(ptype, vtag);
+
+       vol.word = vgetq_lane_u32(vreinterpretq_u32_u8(vtag), 0);
+
+       rx_pkts[0]->ol_flags = vol.e[0];
+       rx_pkts[1]->ol_flags = vol.e[1];
+       rx_pkts[2]->ol_flags = vol.e[2];
+       rx_pkts[3]->ol_flags = vol.e[3];
+}
+
+#elif defined DPDK_19_11_6
+
+static inline void
+sxe_desc_to_olflags_v(uint8x16x2_t sterr_tmp1, uint8x16x2_t sterr_tmp2,
+                 uint8x16_t staterr, u8 vlan_flags, struct rte_mbuf **rx_pkts)
+{
+       uint8x16_t ptype;
+       uint8x16_t vtag_lo, vtag_hi, vtag;
+       uint8x16_t temp_csum;
+       uint32x4_t csum = {0, 0, 0, 0};
+
+       union {
+               u16 e[4];
+               u64 word;
+       } vol;
+
+       const uint8x16_t rsstype_msk = {
+                       0x0F, 0x0F, 0x0F, 0x0F,
+                       0x00, 0x00, 0x00, 0x00,
+                       0x00, 0x00, 0x00, 0x00,
+                       0x00, 0x00, 0x00, 0x00};
+
+       const uint8x16_t rss_flags = {
+                       0, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH,
+                       0, PKT_RX_RSS_HASH, 0, PKT_RX_RSS_HASH,
+                       PKT_RX_RSS_HASH, 0, 0, 0,
+                       0, 0, 0, PKT_RX_FDIR};
+
+       const uint8x16_t vlan_csum_msk = {
+                       SXE_RXD_STAT_VP, SXE_RXD_STAT_VP,
+                       SXE_RXD_STAT_VP, SXE_RXD_STAT_VP,
+                       0, 0, 0, 0,
+                       0, 0, 0, 0,
+                       (SXE_RXDADV_ERR_TCPE | SXE_RXDADV_ERR_IPE) >> 24,
+                       (SXE_RXDADV_ERR_TCPE | SXE_RXDADV_ERR_IPE) >> 24,
+                       (SXE_RXDADV_ERR_TCPE | SXE_RXDADV_ERR_IPE) >> 24,
+                       (SXE_RXDADV_ERR_TCPE | SXE_RXDADV_ERR_IPE) >> 24};
+
+       const uint8x16_t vlan_csum_map_lo = {
+                       PKT_RX_IP_CKSUM_GOOD,
+                       PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD,
+                       PKT_RX_IP_CKSUM_BAD,
+                       PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD,
+                       0, 0, 0, 0,
+                       vlan_flags | PKT_RX_IP_CKSUM_GOOD,
+                       vlan_flags | PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD,
+                       vlan_flags | PKT_RX_IP_CKSUM_BAD,
+                       vlan_flags | PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD,
+                       0, 0, 0, 0};
+
+       const uint8x16_t vlan_csum_map_hi = {
+                       PKT_RX_L4_CKSUM_GOOD >> sizeof(uint8_t), 0,
+                       PKT_RX_L4_CKSUM_GOOD >> sizeof(uint8_t), 0,
+                       0, 0, 0, 0,
+                       PKT_RX_L4_CKSUM_GOOD >> sizeof(uint8_t), 0,
+                       PKT_RX_L4_CKSUM_GOOD >> sizeof(uint8_t), 0,
+                       0, 0, 0, 0};
+
+       ptype = vzipq_u8(sterr_tmp1.val[0], sterr_tmp2.val[0]).val[0];
+       ptype = vandq_u8(ptype, rsstype_msk);
+       ptype = vqtbl1q_u8(rss_flags, ptype);
+
+       vtag = vandq_u8(staterr, vlan_csum_msk);
+
+       temp_csum = vshrq_n_u8(vtag, 6);
+
+       csum = vsetq_lane_u32(vgetq_lane_u32(vreinterpretq_u32_u8(temp_csum), 
3), csum, 0);
+       vtag = vorrq_u8(vreinterpretq_u8_u32(csum), vtag);
+
+       vtag_hi = vqtbl1q_u8(vlan_csum_map_hi, vtag);
+       vtag_hi = vshrq_n_u8(vtag_hi, 7);
+
+       vtag_lo = vqtbl1q_u8(vlan_csum_map_lo, vtag);
+       vtag_lo = vorrq_u8(ptype, vtag_lo);
+
+       vtag = vzipq_u8(vtag_lo, vtag_hi).val[0];
+       vol.word = vgetq_lane_u64(vreinterpretq_u64_u8(vtag), 0);
+
+       rx_pkts[0]->ol_flags = vol.e[0];
+       rx_pkts[1]->ol_flags = vol.e[1];
+       rx_pkts[2]->ol_flags = vol.e[2];
+       rx_pkts[3]->ol_flags = vol.e[3];
+}
+#endif
+
+static inline u32
+sxe_get_packet_type(u32 pkt_info,
+               u32 etqf_check,
+               u32 tunnel_check)
+{
+       u32 rte;
+
+       if (etqf_check) {
+               rte = RTE_PTYPE_UNKNOWN;
+               goto out;
+       }
+
+       if (tunnel_check) {
+               pkt_info &= SXE_PACKET_TYPE_MASK_TUNNEL;
+               rte = sxe_ptype_table_tn[pkt_info];
+               goto out;
+       }
+
+       pkt_info &= SXE_PACKET_TYPE_MASK;
+       rte = sxe_ptype_table[pkt_info];
+
+out:
+       return rte;
+}
+
+static inline void
+sxe_desc_to_ptype_v(uint64x2_t descs[4], u16 pkt_type_mask,
+               struct rte_mbuf **rx_pkts)
+{
+       uint32x4_t etqf_check, tunnel_check;
+       uint32x4_t etqf_mask = vdupq_n_u32(0x8000);
+       uint32x4_t tunnel_mask = vdupq_n_u32(0x10000);
+       uint32x4_t ptype_mask = vdupq_n_u32((u32)pkt_type_mask);
+       uint32x4_t ptype0 = vzipq_u32(vreinterpretq_u32_u64(descs[0]),
+                               vreinterpretq_u32_u64(descs[2])).val[0];
+       uint32x4_t ptype1 = vzipq_u32(vreinterpretq_u32_u64(descs[1]),
+                               vreinterpretq_u32_u64(descs[3])).val[0];
+
+       ptype0 = vzipq_u32(ptype0, ptype1).val[0];
+
+       etqf_check = vandq_u32(ptype0, etqf_mask);
+       tunnel_check = vandq_u32(ptype0, tunnel_mask);
+
+       ptype0 = vandq_u32(vshrq_n_u32(ptype0, SXE_PACKET_TYPE_SHIFT),
+                       ptype_mask);
+
+       rx_pkts[0]->packet_type =
+               sxe_get_packet_type(vgetq_lane_u32(ptype0, 0),
+                               vgetq_lane_u32(etqf_check, 0),
+                               vgetq_lane_u32(tunnel_check, 0));
+       rx_pkts[1]->packet_type =
+               sxe_get_packet_type(vgetq_lane_u32(ptype0, 1),
+                               vgetq_lane_u32(etqf_check, 1),
+                               vgetq_lane_u32(tunnel_check, 1));
+       rx_pkts[2]->packet_type =
+               sxe_get_packet_type(vgetq_lane_u32(ptype0, 2),
+                               vgetq_lane_u32(etqf_check, 2),
+                               vgetq_lane_u32(tunnel_check, 2));
+       rx_pkts[3]->packet_type =
+               sxe_get_packet_type(vgetq_lane_u32(ptype0, 3),
+                               vgetq_lane_u32(etqf_check, 3),
+                               vgetq_lane_u32(tunnel_check, 3));
+}
+
+static inline u16
+sxe_recv_raw_pkts_vec(struct sxe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
+                  u16 nb_pkts, u8 *split_packet)
+{
+       volatile union sxe_rx_data_desc *rxdp;
+       struct sxe_rx_buffer *sw_ring;
+       u16 nb_pkts_recd;
+       s32 pos;
+       u16 rte;
+       uint8x16_t shuf_msk = {
+               0xFF, 0xFF,
+               0xFF, 0xFF,
+               12, 13,
+               0xFF, 0xFF,
+               12, 13,
+               14, 15,
+               4, 5, 6, 7
+               };
+       uint16x8_t crc_adjust = {0, 0, rxq->crc_len, 0,
+                                rxq->crc_len, 0, 0, 0};
+
+       nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, RTE_SXE_DESCS_PER_LOOP);
+
+       rxdp = rxq->desc_ring + rxq->processing_idx;
+
+       rte_prefetch_non_temporal(rxdp);
+
+       if (rxq->realloc_num > RTE_PMD_SXE_MAX_RX_BURST)
+               sxe_rxq_rearm(rxq);
+
+       if (!(rxdp->wb.upper.status_error &
+                               rte_cpu_to_le_32(SXE_RXDADV_STAT_DD))) {
+               rte = 0;
+               goto out;
+       }
+
+       sw_ring = &rxq->buffer_ring[rxq->processing_idx];
+
+       RTE_BUILD_BUG_ON((RTE_MBUF_F_RX_VLAN | RTE_MBUF_F_RX_VLAN_STRIPPED) > 
UINT8_MAX);
+
+#if defined DPDK_22_11_3 || defined DPDK_21_11_5 || defined DPDK_23_11_3 || 
defined DPDK_24_11_1
+       u16 udp_p_flag = SXE_RXDADV_PKTTYPE_UDP;
+       u8 vlan_flags = rxq->vlan_flags & UINT8_MAX;
+#elif defined DPDK_19_11_6
+       u8 vlan_flags = rxq->vlan_flags & UINT8_MAX;
+#endif
+
+       for (pos = 0, nb_pkts_recd = 0; pos < nb_pkts;
+                       pos += RTE_SXE_DESCS_PER_LOOP,
+                       rxdp += RTE_SXE_DESCS_PER_LOOP) {
+               uint64x2_t descs[RTE_SXE_DESCS_PER_LOOP];
+               uint8x16_t pkt_mb1, pkt_mb2, pkt_mb3, pkt_mb4;
+               uint8x16x2_t sterr_tmp1, sterr_tmp2;
+               uint64x2_t mbp1, mbp2;
+               uint8x16_t staterr;
+               uint16x8_t tmp;
+               u32 stat;
+
+               mbp1 = vld1q_u64((u64 *)&sw_ring[pos]);
+
+               vst1q_u64((u64 *)&rx_pkts[pos], mbp1);
+
+               mbp2 = vld1q_u64((u64 *)&sw_ring[pos + 2]);
+
+               descs[0] =  vld1q_u64((u64 *)(rxdp));
+               descs[1] =  vld1q_u64((u64 *)(rxdp + 1));
+               descs[2] =  vld1q_u64((u64 *)(rxdp + 2));
+               descs[3] =  vld1q_u64((u64 *)(rxdp + 3));
+
+               vst1q_u64((u64 *)&rx_pkts[pos + 2], mbp2);
+
+               if (split_packet) {
+                       rte_mbuf_prefetch_part2(rx_pkts[pos]);
+                       rte_mbuf_prefetch_part2(rx_pkts[pos + 1]);
+                       rte_mbuf_prefetch_part2(rx_pkts[pos + 2]);
+                       rte_mbuf_prefetch_part2(rx_pkts[pos + 3]);
+               }
+
+               pkt_mb4 = vqtbl1q_u8(vreinterpretq_u8_u64(descs[3]), shuf_msk);
+               pkt_mb3 = vqtbl1q_u8(vreinterpretq_u8_u64(descs[2]), shuf_msk);
+
+               pkt_mb2 = vqtbl1q_u8(vreinterpretq_u8_u64(descs[1]), shuf_msk);
+               pkt_mb1 = vqtbl1q_u8(vreinterpretq_u8_u64(descs[0]), shuf_msk);
+
+               sterr_tmp2 = vzipq_u8(vreinterpretq_u8_u64(descs[1]),
+                                         vreinterpretq_u8_u64(descs[3]));
+               sterr_tmp1 = vzipq_u8(vreinterpretq_u8_u64(descs[0]),
+                                         vreinterpretq_u8_u64(descs[2]));
+
+               staterr = vzipq_u8(sterr_tmp1.val[1], sterr_tmp2.val[1]).val[0];
+
+#if defined DPDK_22_11_3 || defined DPDK_21_11_5 || defined DPDK_23_11_3 || 
defined DPDK_24_11_1
+               sxe_desc_to_olflags_v(sterr_tmp1, sterr_tmp2, staterr, 
vlan_flags,
+                                 udp_p_flag, &rx_pkts[pos]);
+#elif defined DPDK_19_11_6
+               sxe_desc_to_olflags_v(sterr_tmp1, sterr_tmp2, staterr, 
vlan_flags,
+                                 &rx_pkts[pos]);
+#else
+               sxe_desc_to_olflags_v(sterr_tmp1, sterr_tmp2, staterr, 
&rx_pkts[pos]);
+#endif
+
+               tmp = vsubq_u16(vreinterpretq_u16_u8(pkt_mb4), crc_adjust);
+               pkt_mb4 = vreinterpretq_u8_u16(tmp);
+               tmp = vsubq_u16(vreinterpretq_u16_u8(pkt_mb3), crc_adjust);
+               pkt_mb3 = vreinterpretq_u8_u16(tmp);
+
+               vst1q_u8((void *)&rx_pkts[pos + 3]->rx_descriptor_fields1,
+                        pkt_mb4);
+               vst1q_u8((void *)&rx_pkts[pos + 2]->rx_descriptor_fields1,
+                        pkt_mb3);
+
+               tmp = vsubq_u16(vreinterpretq_u16_u8(pkt_mb2), crc_adjust);
+               pkt_mb2 = vreinterpretq_u8_u16(tmp);
+               tmp = vsubq_u16(vreinterpretq_u16_u8(pkt_mb1), crc_adjust);
+               pkt_mb1 = vreinterpretq_u8_u16(tmp);
+
+               if (split_packet) {
+                       stat = vgetq_lane_u32(vreinterpretq_u32_u8(staterr), 0);
+                       *(s32 *)split_packet = ~stat & SXE_VPMD_DESC_EOP_MASK;
+
+                       split_packet += RTE_SXE_DESCS_PER_LOOP;
+               }
+
+               staterr = vshlq_n_u8(staterr, SXE_UINT8_BIT - 1);
+               staterr = vreinterpretq_u8_s8
+                               (vshrq_n_s8(vreinterpretq_s8_u8(staterr),
+                                       SXE_UINT8_BIT - 1));
+               stat = ~vgetq_lane_u32(vreinterpretq_u32_u8(staterr), 0);
+
+               rte_prefetch_non_temporal(rxdp + RTE_SXE_DESCS_PER_LOOP);
+
+               vst1q_u8((u8 *)&rx_pkts[pos + 1]->rx_descriptor_fields1,
+                        pkt_mb2);
+               vst1q_u8((u8 *)&rx_pkts[pos]->rx_descriptor_fields1,
+                        pkt_mb1);
+
+               sxe_desc_to_ptype_v(descs, rxq->pkt_type_mask, &rx_pkts[pos]);
+
+               if (unlikely(stat == 0)) {
+                       nb_pkts_recd += RTE_SXE_DESCS_PER_LOOP;
+               } else {
+#if (defined DPDK_23_11_3 && !defined DPDK_23_7) || defined DPDK_24_11_1
+                       nb_pkts_recd += rte_ctz32(stat) / SXE_UINT8_BIT;
+#else
+                       nb_pkts_recd += __builtin_ctz(stat) / SXE_UINT8_BIT;
+#endif
+                       break;
+               }
+       }
+
+       rxq->processing_idx = (u16)(rxq->processing_idx + nb_pkts_recd);
+       rxq->processing_idx = (u16)(rxq->processing_idx & (rxq->ring_depth - 
1));
+       rxq->realloc_num = (u16)(rxq->realloc_num + nb_pkts_recd);
+
+       rte = nb_pkts_recd;
+
+out:
+       return rte;
+}
+
+u16 sxe_pkts_vec_recv(void *rx_queue, struct rte_mbuf **rx_pkts, u16 nb_pkts)
+{
+       return sxe_recv_raw_pkts_vec(rx_queue, rx_pkts, nb_pkts, NULL);
+}
+
+static u16 sxe_recv_scattered_burst_vec(void *rx_queue,
+                       struct rte_mbuf **rx_pkts, u16 nb_pkts)
+{
+       u32 i = 0;
+       struct sxe_rx_queue *rxq = rx_queue;
+       u8 split_flags[RTE_PMD_SXE_MAX_RX_BURST] = {0};
+
+       u16 nb_bufs = sxe_recv_raw_pkts_vec(rxq, rx_pkts, nb_pkts,
+                       split_flags);
+       if (nb_bufs == 0)
+               goto l_out;
+
+       const u64 *split_fl64 = (u64 *)split_flags;
+       if (rxq->pkt_first_seg == NULL &&
+                       split_fl64[0] == 0 && split_fl64[1] == 0 &&
+                       split_fl64[2] == 0 && split_fl64[3] == 0)
+               goto l_out;
+
+       if (rxq->pkt_first_seg == NULL) {
+               while (i < nb_bufs && !split_flags[i])
+                       i++;
+               if (i == nb_bufs)
+                       goto l_out;
+               rxq->pkt_first_seg = rx_pkts[i];
+       }
+
+       nb_bufs = i + sxe_packets_reassemble(rxq, &rx_pkts[i], nb_bufs - i,
+               &split_flags[i]);
+
+l_out:
+       return nb_bufs;
+}
+
+u16
+sxe_scattered_pkts_vec_recv(void *rx_queue, struct rte_mbuf **rx_pkts,
+                                 u16 nb_pkts)
+{
+       u16 retval = 0;
+
+       while (nb_pkts > RTE_PMD_SXE_MAX_RX_BURST) {
+               u16 burst;
+
+               burst = sxe_recv_scattered_burst_vec(rx_queue,
+                                                          rx_pkts + retval,
+                                                          
RTE_PMD_SXE_MAX_RX_BURST);
+               retval += burst;
+               nb_pkts -= burst;
+               if (burst < RTE_PMD_SXE_MAX_RX_BURST)
+                       goto l_out;
+       }
+
+       retval += sxe_recv_scattered_burst_vec(rx_queue,
+                                               rx_pkts + retval,
+                                               nb_pkts);
+l_out:
+       return retval;
+}
+
+static inline void
+sxe_single_vec_desc_fill(volatile union sxe_tx_data_desc *txdp,
+               struct rte_mbuf *pkt, u64 flags)
+{
+       uint64x2_t descriptor = {
+                       pkt->buf_iova + pkt->data_off,
+                       (u64)pkt->pkt_len << 46 | flags | pkt->data_len};
+
+       vst1q_u64((u64 *)&txdp->read, descriptor);
+}
+
+static inline void
+sxe_vec_desc_fill(volatile union sxe_tx_data_desc *txdp,
+               struct rte_mbuf **pkt, u16 nb_pkts,  u64 flags)
+{
+       s32 i;
+
+       for (i = 0; i < nb_pkts; ++i, ++txdp, ++pkt)
+               sxe_single_vec_desc_fill(txdp, *pkt, flags);
+}
+
+u16 __sxe_pkts_vector_xmit(void *tx_queue, struct rte_mbuf **tx_pkts,
+                          u16 nb_pkts)
+{
+       struct sxe_tx_queue *txq = (struct sxe_tx_queue *)tx_queue;
+       volatile union sxe_tx_data_desc *txdp;
+       struct sxe_tx_buffer_vec *txep;
+       u16 n, nb_commit, tx_id;
+       u64 flags = SXE_TX_DESC_FLAGS;
+       u64 rs = SXE_TX_DESC_RS_MASK | SXE_TX_DESC_FLAGS;
+       s32 i;
+
+       nb_pkts = RTE_MIN(nb_pkts, txq->rs_thresh);
+
+       if (txq->desc_free_num < txq->free_thresh)
+               sxe_tx_bufs_vec_free(txq);
+
+       nb_pkts = (u16)RTE_MIN(txq->desc_free_num, nb_pkts);
+       nb_commit = nb_pkts;
+       if (unlikely(nb_pkts == 0))
+               goto l_out;
+
+       tx_id = txq->next_to_use;
+       txdp = &txq->desc_ring[tx_id];
+       txep = &txq->buffer_ring_vec[tx_id];
+
+       txq->desc_free_num = (u16)(txq->desc_free_num - nb_pkts);
+
+       n = (u16)(txq->ring_depth - tx_id);
+       if (nb_commit >= n) {
+               sxe_vec_mbuf_fill(txep, tx_pkts, n);
+
+               for (i = 0; i < n - 1; ++i, ++tx_pkts, ++txdp)
+                       sxe_single_vec_desc_fill(txdp, *tx_pkts, flags);
+
+               sxe_single_vec_desc_fill(txdp, *tx_pkts++, rs);
+
+               nb_commit = (u16)(nb_commit - n);
+
+               tx_id = 0;
+               txq->next_rs = (u16)(txq->rs_thresh - 1);
+
+               txdp = &txq->desc_ring[tx_id];
+               txep = &txq->buffer_ring_vec[tx_id];
+       }
+
+       sxe_vec_mbuf_fill(txep, tx_pkts, nb_commit);
+       sxe_vec_desc_fill(txdp, tx_pkts, nb_commit, flags);
+
+       tx_id = (u16)(tx_id + nb_commit);
+       if (tx_id > txq->next_rs) {
+               txq->desc_ring[txq->next_rs].read.cmd_type_len |=
+                       rte_cpu_to_le_32(SXE_TX_DESC_RS_MASK);
+               txq->next_rs = (u16)(txq->next_rs +
+                       txq->rs_thresh);
+       }
+
+       txq->next_to_use = tx_id;
+
+       sxe_write_addr(txq->next_to_use, txq->tdt_reg_addr);
+
+l_out:
+       return nb_pkts;
+}
+
+static void __rte_cold
+sxe_tx_queue_release_mbufs_vec(struct sxe_tx_queue *txq)
+{
+       sxe_tx_mbufs_vec_release(txq);
+}
+
+void __rte_cold
+sxe_rx_queue_vec_mbufs_release(struct sxe_rx_queue *rxq)
+{
+       sxe_rx_vec_mbufs_release(rxq);
+}
+
+static void __rte_cold
+sxe_tx_free_swring(struct sxe_tx_queue *txq)
+{
+       sxe_tx_buffer_ring_vec_free(txq);
+}
+
+static void __rte_cold
+sxe_reset_tx_queue(struct sxe_tx_queue *txq)
+{
+       sxe_tx_queue_vec_init(txq);
+}
+
+static const struct sxe_txq_ops vec_txq_ops = {
+       .init = sxe_reset_tx_queue,
+       .mbufs_release = sxe_tx_queue_release_mbufs_vec,
+       .buffer_ring_free = sxe_tx_free_swring,
+};
+
+s32 __rte_cold
+sxe_rxq_vec_setup(struct sxe_rx_queue *rxq)
+{
+       return sxe_default_rxq_vec_setup(rxq);
+}
+
+s32 __rte_cold
+sxe_txq_vec_setup(struct sxe_tx_queue *txq)
+{
+       return sxe_default_txq_vec_setup(txq, &vec_txq_ops);
+}
+
+s32 __rte_cold
+sxe_rx_vec_condition_check(struct rte_eth_dev *dev)
+{
+       struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode;
+
+       if (rxmode->offloads & DEV_RX_OFFLOAD_CHECKSUM)
+               return -1;
+
+       return sxe_default_rx_vec_condition_check(dev);
+}
+
+#endif
diff --git a/drivers/net/sxe/pf/sxe_vec_sse.c b/drivers/net/sxe/pf/sxe_vec_sse.c
new file mode 100644
index 0000000000..70b74ba945
--- /dev/null
+++ b/drivers/net/sxe/pf/sxe_vec_sse.c
@@ -0,0 +1,638 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (C), 2022, Linkdata Technology Co., Ltd.
+ */
+
+#if defined SXE_DPDK_L4_FEATURES && defined SXE_DPDK_SIMD
+#include <stdint.h>
+#include "sxe_dpdk_version.h"
+#if defined DPDK_20_11_5 || defined DPDK_19_11_6
+#include <rte_ethdev_driver.h>
+#else
+#include <ethdev_driver.h>
+#endif
+#include <rte_malloc.h>
+#ifdef DPDK_24_11_1
+#include <rte_vect.h>
+#else
+#include <tmmintrin.h>
+#endif
+
+#include "sxe_vec_common.h"
+#include "sxe_compat_version.h"
+
+#pragma GCC diagnostic ignored "-Wcast-qual"
+
+#define SXE_MAX_TX_FREE_BUF_SZ 64
+
+static inline void
+sxe_rxq_realloc(sxe_rx_queue_s *rx_queue)
+{
+       s32 i;
+       u16 rx_index;
+       volatile union sxe_rx_data_desc *desc_ring;
+       sxe_rx_buffer_s *buf_ring =
+                       &rx_queue->buffer_ring[rx_queue->realloc_start];
+       struct rte_mbuf *mbuf_0, *mbuf_1;
+       __m128i head_room = _mm_set_epi64x(RTE_PKTMBUF_HEADROOM,
+                       RTE_PKTMBUF_HEADROOM);
+       __m128i dma_addr0, dma_addr1;
+
+       const __m128i addr_mask = _mm_set_epi64x(0, UINT64_MAX);
+
+       desc_ring = rx_queue->desc_ring + rx_queue->realloc_start;
+
+       if (rte_mempool_get_bulk(rx_queue->mb_pool,
+                                (void *)buf_ring,
+                                RTE_PMD_SXE_MAX_RX_BURST) < 0) {
+               if (rx_queue->realloc_num + RTE_PMD_SXE_MAX_RX_BURST >=
+                       rx_queue->ring_depth) {
+                       dma_addr0 = _mm_setzero_si128();
+                       for (i = 0; i < SXE_DESCS_PER_LOOP; i++) {
+                               buf_ring[i].mbuf = &rx_queue->fake_mbuf;
+                               _mm_store_si128((__m128i *)&desc_ring[i].read,
+                                               dma_addr0);
+                       }
+               }
+               rte_eth_devices[rx_queue->port_id].data->rx_mbuf_alloc_failed +=
+                       RTE_PMD_SXE_MAX_RX_BURST;
+               return;
+       }
+
+       for (i = 0; i < RTE_PMD_SXE_MAX_RX_BURST; i += 2, buf_ring += 2) {
+               __m128i vaddr0, vaddr1;
+
+               mbuf_0 = buf_ring[0].mbuf;
+               mbuf_1 = buf_ring[1].mbuf;
+
+               RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
+                               offsetof(struct rte_mbuf, buf_addr) + 8);
+
+               vaddr0 = _mm_loadu_si128((__m128i *)&mbuf_0->buf_addr);
+               vaddr1 = _mm_loadu_si128((__m128i *)&mbuf_1->buf_addr);
+
+               dma_addr0 = _mm_unpackhi_epi64(vaddr0, vaddr0);
+               dma_addr1 = _mm_unpackhi_epi64(vaddr1, vaddr1);
+
+               dma_addr0 = _mm_add_epi64(dma_addr0, head_room);
+               dma_addr1 = _mm_add_epi64(dma_addr1, head_room);
+
+               dma_addr0 = _mm_and_si128(dma_addr0, addr_mask);
+               dma_addr1 = _mm_and_si128(dma_addr1, addr_mask);
+
+               _mm_store_si128((__m128i *)&desc_ring++->read, dma_addr0);
+               _mm_store_si128((__m128i *)&desc_ring++->read, dma_addr1);
+       }
+
+       rx_queue->realloc_start += RTE_PMD_SXE_MAX_RX_BURST;
+       if (rx_queue->realloc_start >= rx_queue->ring_depth)
+               rx_queue->realloc_start = 0;
+
+       rx_queue->realloc_num -= RTE_PMD_SXE_MAX_RX_BURST;
+
+       rx_index = (u16)((rx_queue->realloc_start == 0) ?
+                       (rx_queue->ring_depth - 1) : (rx_queue->realloc_start - 
1));
+
+       SXE_PCI_REG_WC_WRITE_RELAXED(rx_queue->rdt_reg_addr, rx_index);
+}
+
+static inline void
+sxe_desc_to_olflags(__m128i descs[4], __m128i mbuf_init, u8 vlan_flags,
+                       u16 udp_p_flag, struct rte_mbuf **rx_pkts)
+{
+       __m128i ptype0, ptype1, vtype0, vtype1, csum, udp_csum_skip;
+       __m128i rearm0, rearm1, rearm2, rearm3;
+
+       const __m128i rsstype_mask = _mm_set_epi16
+                       (0x0000, 0x0000, 0x0000, 0x0000,
+                       0x000F, 0x000F, 0x000F, 0x000F);
+
+       const __m128i ol_flags_mask = _mm_set_epi16
+                       (0x0000, 0x0000, 0x0000, 0x0000,
+                       0x00FF, 0x00FF, 0x00FF, 0x00FF);
+
+       const __m128i rss_flags = _mm_set_epi8(RTE_MBUF_F_RX_FDIR, 0, 0, 0,
+                       0, 0, 0, RTE_MBUF_F_RX_RSS_HASH,
+                       RTE_MBUF_F_RX_RSS_HASH, 0, RTE_MBUF_F_RX_RSS_HASH, 0,
+                       RTE_MBUF_F_RX_RSS_HASH, RTE_MBUF_F_RX_RSS_HASH, 
RTE_MBUF_F_RX_RSS_HASH, 0);
+
+       const __m128i vlan_csum_mask = _mm_set_epi16
+               ((SXE_RXDADV_ERR_L4E | SXE_RXDADV_ERR_IPE) >> 16,
+               (SXE_RXDADV_ERR_L4E | SXE_RXDADV_ERR_IPE) >> 16,
+               (SXE_RXDADV_ERR_L4E | SXE_RXDADV_ERR_IPE) >> 16,
+               (SXE_RXDADV_ERR_L4E | SXE_RXDADV_ERR_IPE) >> 16,
+               SXE_RXD_STAT_VP, SXE_RXD_STAT_VP,
+               SXE_RXD_STAT_VP, SXE_RXD_STAT_VP);
+
+       const __m128i vlan_csum_map_low = _mm_set_epi8
+               (0, 0, 0, 0,
+               vlan_flags | RTE_MBUF_F_RX_IP_CKSUM_BAD | 
RTE_MBUF_F_RX_L4_CKSUM_BAD,
+               vlan_flags | RTE_MBUF_F_RX_IP_CKSUM_BAD,
+               vlan_flags | RTE_MBUF_F_RX_IP_CKSUM_GOOD | 
RTE_MBUF_F_RX_L4_CKSUM_BAD,
+               vlan_flags | RTE_MBUF_F_RX_IP_CKSUM_GOOD,
+               0, 0, 0, 0,
+               RTE_MBUF_F_RX_IP_CKSUM_BAD | RTE_MBUF_F_RX_L4_CKSUM_BAD,
+               RTE_MBUF_F_RX_IP_CKSUM_BAD,
+               RTE_MBUF_F_RX_IP_CKSUM_GOOD | RTE_MBUF_F_RX_L4_CKSUM_BAD,
+               RTE_MBUF_F_RX_IP_CKSUM_GOOD);
+
+       const __m128i vlan_csum_map_high = _mm_set_epi8
+               (0, 0, 0, 0,
+               0, RTE_MBUF_F_RX_L4_CKSUM_GOOD >> sizeof(u8), 0,
+               RTE_MBUF_F_RX_L4_CKSUM_GOOD >> sizeof(u8),
+               0, 0, 0, 0,
+               0, RTE_MBUF_F_RX_L4_CKSUM_GOOD >> sizeof(u8), 0,
+               RTE_MBUF_F_RX_L4_CKSUM_GOOD >> sizeof(u8));
+
+       const __m128i udp_hdr_p_msk = _mm_set_epi16
+               (0, 0, 0, 0,
+                udp_p_flag, udp_p_flag, udp_p_flag, udp_p_flag);
+
+       const __m128i udp_csum_bad_shuf = _mm_set_epi8
+               (0, 0, 0, 0, 0, 0, 0, 0,
+                0, 0, 0, 0, 0, 0, ~(u8)RTE_MBUF_F_RX_L4_CKSUM_BAD, 0xFF);
+
+       ptype0 = _mm_unpacklo_epi16(descs[0], descs[1]);
+       ptype1 = _mm_unpacklo_epi16(descs[2], descs[3]);
+
+       vtype0 = _mm_unpackhi_epi16(descs[0], descs[1]);
+       vtype1 = _mm_unpackhi_epi16(descs[2], descs[3]);
+
+       ptype0 = _mm_unpacklo_epi32(ptype0, ptype1);
+
+       udp_csum_skip = _mm_and_si128(ptype0, udp_hdr_p_msk);
+
+       ptype0 = _mm_and_si128(ptype0, rsstype_mask);
+
+       ptype0 = _mm_shuffle_epi8(rss_flags, ptype0);
+
+       vtype1 = _mm_unpacklo_epi32(vtype0, vtype1);
+       vtype1 = _mm_and_si128(vtype1, vlan_csum_mask);
+
+       csum = _mm_srli_epi16(vtype1, 14);
+
+       csum = _mm_srli_si128(csum, 8);
+       vtype1 = _mm_or_si128(csum, vtype1);
+
+       vtype0 = _mm_shuffle_epi8(vlan_csum_map_high, vtype1);
+       vtype0 = _mm_slli_epi16(vtype0, sizeof(u8));
+
+       vtype1 = _mm_shuffle_epi8(vlan_csum_map_low, vtype1);
+       vtype1 = _mm_and_si128(vtype1, ol_flags_mask);
+       vtype1 = _mm_or_si128(vtype0, vtype1);
+
+       vtype1 = _mm_or_si128(ptype0, vtype1);
+
+       udp_csum_skip = _mm_srli_epi16(udp_csum_skip, 9);
+       udp_csum_skip = _mm_shuffle_epi8(udp_csum_bad_shuf, udp_csum_skip);
+       vtype1 = _mm_and_si128(vtype1, udp_csum_skip);
+
+       rearm0 = _mm_blend_epi16(mbuf_init, _mm_slli_si128(vtype1, 8), 0x10);
+       rearm1 = _mm_blend_epi16(mbuf_init, _mm_slli_si128(vtype1, 6), 0x10);
+       rearm2 = _mm_blend_epi16(mbuf_init, _mm_slli_si128(vtype1, 4), 0x10);
+       rearm3 = _mm_blend_epi16(mbuf_init, _mm_slli_si128(vtype1, 2), 0x10);
+
+       RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, ol_flags) !=
+                       offsetof(struct rte_mbuf, rearm_data) + 8);
+       RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, rearm_data) !=
+                       RTE_ALIGN(offsetof(struct rte_mbuf, rearm_data), 16));
+
+       _mm_store_si128((__m128i *)&rx_pkts[0]->rearm_data, rearm0);
+       _mm_store_si128((__m128i *)&rx_pkts[1]->rearm_data, rearm1);
+       _mm_store_si128((__m128i *)&rx_pkts[2]->rearm_data, rearm2);
+       _mm_store_si128((__m128i *)&rx_pkts[3]->rearm_data, rearm3);
+}
+
+static inline u32 sxe_packet_type_get(int index,
+                                       u32 pkt_info,
+                                       u32 etqf_check)
+{
+       if (etqf_check & (0x02 << (index * SXE_DESCS_PER_LOOP)))
+               return RTE_PTYPE_UNKNOWN;
+
+       pkt_info &= SXE_PACKET_TYPE_MASK;
+       return sxe_ptype_table[pkt_info];
+}
+
+static inline void
+sxe_desc_to_ptype_vec(__m128i descs[4], u16 pkt_type_mask,
+               struct rte_mbuf **rx_pkts)
+{
+       __m128i etqf_mask = _mm_set_epi64x(0x800000008000LL, 0x800000008000LL);
+       __m128i ptype_mask = _mm_set_epi32(pkt_type_mask,
+                       pkt_type_mask, pkt_type_mask, pkt_type_mask);
+
+       u32 etqf_check, pkt_info;
+
+       __m128i ptype0 = _mm_unpacklo_epi32(descs[0], descs[2]);
+       __m128i ptype1 = _mm_unpacklo_epi32(descs[1], descs[3]);
+
+       ptype0 = _mm_unpacklo_epi32(ptype0, ptype1);
+
+       etqf_check = _mm_movemask_epi8(_mm_and_si128(ptype0, etqf_mask));
+
+       ptype0 = _mm_and_si128(_mm_srli_epi32(ptype0, 
SXE_RXDADV_PKTTYPE_ETQF_SHIFT),
+                                  ptype_mask);
+
+
+       pkt_info = _mm_extract_epi32(ptype0, 0);
+       rx_pkts[0]->packet_type =
+               sxe_packet_type_get(0, pkt_info, etqf_check);
+       pkt_info = _mm_extract_epi32(ptype0, 1);
+       rx_pkts[1]->packet_type =
+               sxe_packet_type_get(1, pkt_info, etqf_check);
+       pkt_info = _mm_extract_epi32(ptype0, 2);
+       rx_pkts[2]->packet_type =
+               sxe_packet_type_get(2, pkt_info, etqf_check);
+       pkt_info = _mm_extract_epi32(ptype0, 3);
+       rx_pkts[3]->packet_type =
+               sxe_packet_type_get(3, pkt_info, etqf_check);
+}
+
+static inline u16
+sxe_raw_pkts_vec_recv(sxe_rx_queue_s *rx_queue, struct rte_mbuf **rx_pkts,
+               u16 pkts_num, u8 *split_packet)
+{
+       volatile union sxe_rx_data_desc *desc_ring;
+       sxe_rx_buffer_s *buffer_ring;
+       u16 pkts_recd_num;
+       s32 pos;
+       u64 var;
+       __m128i shuf_msk;
+       __m128i crc_adjust = _mm_set_epi16
+                               (0, 0, 0,
+                               -rx_queue->crc_len,
+                               0,
+                               -rx_queue->crc_len,
+                               0, 0
+                       );
+
+       RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, pkt_len) !=
+                       offsetof(struct rte_mbuf, rx_descriptor_fields1) + 4);
+       RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, data_len) !=
+                       offsetof(struct rte_mbuf, rx_descriptor_fields1) + 8);
+       __m128i dd_check, eop_check;
+       __m128i mbuf_init;
+       u8 vlan_flags;
+       u16 udp_p_flag = 0;
+
+       pkts_num = RTE_MIN(pkts_num, RTE_PMD_SXE_MAX_RX_BURST);
+
+       pkts_num = RTE_ALIGN_FLOOR(pkts_num, SXE_DESCS_PER_LOOP);
+
+       desc_ring = rx_queue->desc_ring + rx_queue->processing_idx;
+
+       rte_prefetch0(desc_ring);
+
+       if (rx_queue->realloc_num > RTE_PMD_SXE_MAX_RX_BURST)
+               sxe_rxq_realloc(rx_queue);
+
+       if (!(desc_ring->wb.upper.status_error &
+                               rte_cpu_to_le_32(SXE_RXDADV_STAT_DD))) {
+               pkts_recd_num = 0;
+               goto l_out;
+       }
+
+       udp_p_flag = SXE_RXDADV_PKTTYPE_UDP;
+
+       dd_check = _mm_set_epi64x(0x0000000100000001LL, 0x0000000100000001LL);
+
+       eop_check = _mm_set_epi64x(0x0000000200000002LL, 0x0000000200000002LL);
+
+       shuf_msk = _mm_set_epi8
+               (7, 6, 5, 4,
+               15, 14,
+               13, 12,
+               0xFF, 0xFF,
+               13, 12,
+               0xFF, 0xFF,
+               0xFF, 0xFF
+               );
+
+       RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, pkt_len) !=
+                       offsetof(struct rte_mbuf, rx_descriptor_fields1) + 4);
+       RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, data_len) !=
+                       offsetof(struct rte_mbuf, rx_descriptor_fields1) + 8);
+       RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, vlan_tci) !=
+                       offsetof(struct rte_mbuf, rx_descriptor_fields1) + 10);
+       RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, hash) !=
+                       offsetof(struct rte_mbuf, rx_descriptor_fields1) + 12);
+
+       mbuf_init = _mm_set_epi64x(0, rx_queue->mbuf_init_value);
+
+       buffer_ring = &rx_queue->buffer_ring[rx_queue->processing_idx];
+
+       RTE_BUILD_BUG_ON((RTE_MBUF_F_RX_VLAN | RTE_MBUF_F_RX_VLAN_STRIPPED) > 
UINT8_MAX);
+       vlan_flags = rx_queue->vlan_flags & UINT8_MAX;
+
+       for (pos = 0, pkts_recd_num = 0; pos < pkts_num;
+                       pos += SXE_DESCS_PER_LOOP,
+                       desc_ring += SXE_DESCS_PER_LOOP) {
+               __m128i descs[SXE_DESCS_PER_LOOP];
+               __m128i pkt_mb1, pkt_mb2, pkt_mb3, pkt_mb4;
+               __m128i zero, staterr, state_err1, state_err2;
+               __m128i mbp1;
+#if defined(RTE_ARCH_X86_64)
+               __m128i mbp2;
+#endif
+
+               mbp1 = _mm_loadu_si128((__m128i *)&buffer_ring[pos]);
+
+               descs[3] = _mm_loadu_si128((__m128i *)(desc_ring + 3));
+               rte_compiler_barrier();
+
+               _mm_storeu_si128((__m128i *)&rx_pkts[pos], mbp1);
+
+#if defined(RTE_ARCH_X86_64)
+               mbp2 = _mm_loadu_si128((__m128i *)&buffer_ring[pos + 2]);
+#endif
+
+               descs[2] = _mm_loadu_si128((__m128i *)(desc_ring + 2));
+               rte_compiler_barrier();
+               descs[1] = _mm_loadu_si128((__m128i *)(desc_ring + 1));
+               rte_compiler_barrier();
+               descs[0] = _mm_loadu_si128((__m128i *)(desc_ring));
+
+#if defined(RTE_ARCH_X86_64)
+               _mm_storeu_si128((__m128i *)&rx_pkts[pos + 2], mbp2);
+#endif
+
+               if (split_packet) {
+                       rte_mbuf_prefetch_part2(rx_pkts[pos]);
+                       rte_mbuf_prefetch_part2(rx_pkts[pos + 1]);
+                       rte_mbuf_prefetch_part2(rx_pkts[pos + 2]);
+                       rte_mbuf_prefetch_part2(rx_pkts[pos + 3]);
+               }
+
+               rte_compiler_barrier();
+
+               pkt_mb4 = _mm_shuffle_epi8(descs[3], shuf_msk);
+               pkt_mb3 = _mm_shuffle_epi8(descs[2], shuf_msk);
+               pkt_mb2 = _mm_shuffle_epi8(descs[1], shuf_msk);
+               pkt_mb1 = _mm_shuffle_epi8(descs[0], shuf_msk);
+
+               state_err2 = _mm_unpackhi_epi32(descs[3], descs[2]);
+               state_err1 = _mm_unpackhi_epi32(descs[1], descs[0]);
+
+               sxe_desc_to_olflags(descs, mbuf_init, vlan_flags, udp_p_flag,
+                                       &rx_pkts[pos]);
+
+               pkt_mb4 = _mm_add_epi16(pkt_mb4, crc_adjust);
+               pkt_mb3 = _mm_add_epi16(pkt_mb3, crc_adjust);
+
+               zero = _mm_xor_si128(dd_check, dd_check);
+
+               staterr = _mm_unpacklo_epi32(state_err1, state_err2);
+
+               _mm_storeu_si128((void *)&rx_pkts[pos + 
3]->rx_descriptor_fields1,
+                               pkt_mb4);
+               _mm_storeu_si128((void *)&rx_pkts[pos + 
2]->rx_descriptor_fields1,
+                               pkt_mb3);
+
+               pkt_mb2 = _mm_add_epi16(pkt_mb2, crc_adjust);
+               pkt_mb1 = _mm_add_epi16(pkt_mb1, crc_adjust);
+
+               if (split_packet) {
+                       __m128i eop_shuf_mask = _mm_set_epi8
+                               (0xFF, 0xFF, 0xFF, 0xFF,
+                               0xFF, 0xFF, 0xFF, 0xFF,
+                               0xFF, 0xFF, 0xFF, 0xFF,
+                               0x04, 0x0C, 0x00, 0x08
+                               );
+
+                       __m128i eop_bits = _mm_andnot_si128(staterr, eop_check);
+                       eop_bits = _mm_shuffle_epi8(eop_bits, eop_shuf_mask);
+                       *(int *)split_packet = _mm_cvtsi128_si32(eop_bits);
+                       split_packet += SXE_DESCS_PER_LOOP;
+               }
+
+               staterr = _mm_and_si128(staterr, dd_check);
+
+               staterr = _mm_packs_epi32(staterr, zero);
+
+               _mm_storeu_si128((void *)&rx_pkts[pos + 
1]->rx_descriptor_fields1,
+                               pkt_mb2);
+               _mm_storeu_si128((void *)&rx_pkts[pos]->rx_descriptor_fields1,
+                               pkt_mb1);
+
+               sxe_desc_to_ptype_vec(descs, rx_queue->pkt_type_mask, 
&rx_pkts[pos]);
+
+#if (defined DPDK_23_11_3 && !defined DPDK_23_7) || defined DPDK_24_11_1
+               var = rte_popcount64(_mm_cvtsi128_si64(staterr));
+#else
+               var = __builtin_popcountll(_mm_cvtsi128_si64(staterr));
+#endif
+               pkts_recd_num += var;
+               if (likely(var != SXE_DESCS_PER_LOOP))
+                       break;
+       }
+
+       rx_queue->processing_idx = (u16)(rx_queue->processing_idx + 
pkts_recd_num);
+       rx_queue->processing_idx = (u16)(rx_queue->processing_idx & 
(rx_queue->ring_depth - 1));
+       rx_queue->realloc_num = (u16)(rx_queue->realloc_num + pkts_recd_num);
+
+l_out:
+       return pkts_recd_num;
+}
+
+u16
+sxe_pkts_vec_recv(void *rx_queue, struct rte_mbuf **rx_pkts, u16 pkts_num)
+{
+       return sxe_raw_pkts_vec_recv(rx_queue, rx_pkts, pkts_num, NULL);
+}
+
+static u16
+sxe_scattered_burst_vec_recv(void *rx_queue, struct rte_mbuf **rx_pkts,
+                               u16 pkts_num)
+{
+       u16 i = 0;
+       u16 bufs_num;
+       sxe_rx_queue_s *rxq = rx_queue;
+       u8 split_flags[RTE_PMD_SXE_MAX_RX_BURST] = {0};
+
+       bufs_num = sxe_raw_pkts_vec_recv(rxq, rx_pkts, pkts_num,
+                       split_flags);
+       if (bufs_num == 0)
+               goto l_out;
+
+       const u64 *split_flag_64 = (u64 *)split_flags;
+       if (rxq->pkt_first_seg == NULL &&
+               split_flag_64[0] == 0 && split_flag_64[1] == 0 &&
+               split_flag_64[2] == 0 && split_flag_64[3] == 0)
+               goto l_out;
+
+       if (rxq->pkt_first_seg == NULL) {
+               while (i < bufs_num && !split_flags[i])
+                       i++;
+               if (i == bufs_num)
+                       goto l_out;
+               rxq->pkt_first_seg = rx_pkts[i];
+       }
+
+       bufs_num = i + sxe_packets_reassemble(rxq, &rx_pkts[i], bufs_num - i,
+               &split_flags[i]);
+
+l_out:
+       return bufs_num;
+}
+
+u16
+sxe_scattered_pkts_vec_recv(void *rx_queue, struct rte_mbuf **rx_pkts,
+                                 u16 pkts_num)
+{
+       u16 ret = 0;
+
+       while (pkts_num > RTE_PMD_SXE_MAX_RX_BURST) {
+               u16 burst;
+
+               burst = sxe_scattered_burst_vec_recv(rx_queue,
+                                               rx_pkts + ret,
+                                               RTE_PMD_SXE_MAX_RX_BURST);
+               ret += burst;
+               pkts_num -= burst;
+               if (burst < RTE_PMD_SXE_MAX_RX_BURST)
+                       goto l_out;
+       }
+
+       ret += sxe_scattered_burst_vec_recv(rx_queue,
+                                       rx_pkts + ret,
+                                       pkts_num);
+l_out:
+       return ret;
+}
+
+void __rte_cold
+sxe_rx_queue_vec_mbufs_release(sxe_rx_queue_s *rx_queue)
+{
+       sxe_rx_vec_mbufs_release(rx_queue);
+}
+
+s32 __rte_cold
+sxe_rxq_vec_setup(sxe_rx_queue_s *rx_queue)
+{
+       return sxe_default_rxq_vec_setup(rx_queue);
+}
+
+s32 __rte_cold
+sxe_rx_vec_condition_check(struct rte_eth_dev *dev)
+{
+       return sxe_default_rx_vec_condition_check(dev);
+}
+
+static inline void
+sxe_single_vec_desc_fill(volatile sxe_tx_data_desc_u *desc_ring,
+               struct rte_mbuf *pkts, u64 flags)
+{
+       __m128i descriptor = _mm_set_epi64x((u64)pkts->pkt_len << 46 |
+                       flags | pkts->data_len,
+                       pkts->buf_iova + pkts->data_off);
+       _mm_store_si128((__m128i *)&desc_ring->read, descriptor);
+}
+
+static inline void
+sxe_vec_desc_fill(volatile sxe_tx_data_desc_u *desc_ring,
+               struct rte_mbuf **pkts, u16 pkts_num, u64 flags)
+{
+       s32 i;
+
+       for (i = 0; i < pkts_num; ++i, ++desc_ring, ++pkts)
+               sxe_single_vec_desc_fill(desc_ring, *pkts, flags);
+}
+
+u16
+__sxe_pkts_vector_xmit(void *tx_queue, struct rte_mbuf **tx_pkts,
+                          u16 pkts_num)
+{
+       sxe_tx_queue_s *txq = (sxe_tx_queue_s *)tx_queue;
+       volatile sxe_tx_data_desc_u *desc_ring;
+       struct sxe_tx_buffer_vec *buffer_ring;
+       u16 n, commit_num, ntu, xmit_pkts_num;
+       u64 flags = SXE_TX_DESC_FLAGS;
+       u64 rs_flags = SXE_TX_DESC_RS_MASK | SXE_TX_DESC_FLAGS;
+       s32 i;
+
+       if (txq->desc_free_num < txq->free_thresh)
+               sxe_tx_bufs_vec_free(txq);
+
+       xmit_pkts_num = RTE_MIN(pkts_num, txq->rs_thresh);
+       xmit_pkts_num = (u16)RTE_MIN(txq->desc_free_num, xmit_pkts_num);
+
+       commit_num = xmit_pkts_num;
+       if (unlikely(commit_num == 0))
+               goto l_out;
+
+       ntu = txq->next_to_use;
+       desc_ring = &txq->desc_ring[ntu];
+       buffer_ring = &txq->buffer_ring_vec[ntu];
+
+       txq->desc_free_num = (u16)(txq->desc_free_num - xmit_pkts_num);
+
+       n = (u16)(txq->ring_depth - ntu);
+       if (commit_num >= n) {
+               sxe_vec_mbuf_fill(buffer_ring, tx_pkts, n);
+
+               for (i = 0; i < n - 1; ++i, ++tx_pkts, ++desc_ring)
+                       sxe_single_vec_desc_fill(desc_ring, *tx_pkts, flags);
+
+               sxe_single_vec_desc_fill(desc_ring, *tx_pkts++, rs_flags);
+
+               commit_num = (u16)(commit_num - n);
+
+               ntu = 0;
+               txq->next_rs = (u16)(txq->rs_thresh - 1);
+
+               desc_ring = &txq->desc_ring[ntu];
+               buffer_ring = &txq->buffer_ring_vec[ntu];
+       }
+
+       sxe_vec_mbuf_fill(buffer_ring, tx_pkts, commit_num);
+
+       sxe_vec_desc_fill(desc_ring, tx_pkts, commit_num, flags);
+
+       ntu = (u16)(ntu + commit_num);
+       if (ntu > txq->next_rs) {
+               txq->desc_ring[txq->next_rs].read.cmd_type_len |=
+                       rte_cpu_to_le_32(SXE_TX_DESC_RS_MASK);
+               txq->next_rs = (u16)(txq->next_rs +
+                       txq->rs_thresh);
+       }
+
+       txq->next_to_use = ntu;
+       rte_wmb();
+       rte_write32_wc_relaxed((rte_cpu_to_le_32(txq->next_to_use)),
+                                                       txq->tdt_reg_addr);
+
+l_out:
+       return xmit_pkts_num;
+}
+
+static void __rte_cold
+sxe_tx_queue_init(sxe_tx_queue_s *tx_queue)
+{
+       sxe_tx_queue_vec_init(tx_queue);
+}
+
+static void __rte_cold
+sxe_tx_queue_mbufs_release(sxe_tx_queue_s *tx_queue)
+{
+       sxe_tx_mbufs_vec_release(tx_queue);
+}
+
+static void __rte_cold
+sxe_tx_buffer_ring_free(sxe_tx_queue_s *tx_queue)
+{
+       sxe_tx_buffer_ring_vec_free(tx_queue);
+}
+
+static const struct sxe_txq_ops txq_vec_ops = {
+       .init                    = sxe_tx_queue_init,
+       .mbufs_release  = sxe_tx_queue_mbufs_release,
+       .buffer_ring_free = sxe_tx_buffer_ring_free,
+};
+
+s32 __rte_cold
+sxe_txq_vec_setup(sxe_tx_queue_s *tx_queue)
+{
+       return sxe_default_txq_vec_setup(tx_queue, &txq_vec_ops);
+}
+
+#endif
-- 
2.18.4

Reply via email to