The limitation of burst size in vector rx was removed, since it should
retrieve as much received packets as possible. And also the scattered
receive path should use a wrapper function to achieve the goal of
burst maximizing. And do some code cleaning for vector rx path.

Signed-off-by: Jeff Guo <jia....@intel.com>
---
 drivers/net/ice/ice_rxtx.h          |  1 +
 drivers/net/ice/ice_rxtx_vec_avx2.c | 23 ++++++------
 drivers/net/ice/ice_rxtx_vec_sse.c  | 56 +++++++++++++++++++----------
 3 files changed, 49 insertions(+), 31 deletions(-)

diff --git a/drivers/net/ice/ice_rxtx.h b/drivers/net/ice/ice_rxtx.h
index 2fdcfb7d0..3ef5f300d 100644
--- a/drivers/net/ice/ice_rxtx.h
+++ b/drivers/net/ice/ice_rxtx.h
@@ -35,6 +35,7 @@
 #define ICE_MAX_RX_BURST            ICE_RXQ_REARM_THRESH
 #define ICE_TX_MAX_FREE_BUF_SZ      64
 #define ICE_DESCS_PER_LOOP          4
+#define ICE_DESCS_PER_LOOP_AVX     8
 
 #define ICE_FDIR_PKT_LEN       512
 
diff --git a/drivers/net/ice/ice_rxtx_vec_avx2.c 
b/drivers/net/ice/ice_rxtx_vec_avx2.c
index be50677c2..843e4f32a 100644
--- a/drivers/net/ice/ice_rxtx_vec_avx2.c
+++ b/drivers/net/ice/ice_rxtx_vec_avx2.c
@@ -29,7 +29,7 @@ ice_rxq_rearm(struct ice_rx_queue *rxq)
                        __m128i dma_addr0;
 
                        dma_addr0 = _mm_setzero_si128();
-                       for (i = 0; i < ICE_DESCS_PER_LOOP; i++) {
+                       for (i = 0; i < ICE_DESCS_PER_LOOP_AVX; i++) {
                                rxep[i].mbuf = &rxq->fake_mbuf;
                                _mm_store_si128((__m128i *)&rxdp[i].read,
                                                dma_addr0);
@@ -132,12 +132,17 @@ ice_rxq_rearm(struct ice_rx_queue *rxq)
        ICE_PCI_REG_WRITE(rxq->qrx_tail, rx_id);
 }
 
+/**
+ * vPMD raw receive routine, only accept(nb_pkts >= ICE_DESCS_PER_LOOP_AVX)
+ *
+ * Notice:
+ * - nb_pkts < ICE_DESCS_PER_LOOP_AVX, just return no packet
+ * - floor align nb_pkts to a ICE_DESCS_PER_LOOP_AVX power-of-two
+ */
 static inline uint16_t
 _ice_recv_raw_pkts_vec_avx2(struct ice_rx_queue *rxq, struct rte_mbuf 
**rx_pkts,
                            uint16_t nb_pkts, uint8_t *split_packet)
 {
-#define ICE_DESCS_PER_LOOP_AVX 8
-
        const uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
        const __m256i mbuf_init = _mm256_set_epi64x(0, 0,
                        0, rxq->mbuf_initializer);
@@ -603,10 +608,6 @@ _ice_recv_raw_pkts_vec_avx2(struct ice_rx_queue *rxq, 
struct rte_mbuf **rx_pkts,
        return received;
 }
 
-/**
- * Notice:
- * - nb_pkts < ICE_DESCS_PER_LOOP, just return no packet
- */
 uint16_t
 ice_recv_pkts_vec_avx2(void *rx_queue, struct rte_mbuf **rx_pkts,
                       uint16_t nb_pkts)
@@ -616,8 +617,6 @@ ice_recv_pkts_vec_avx2(void *rx_queue, struct rte_mbuf 
**rx_pkts,
 
 /**
  * vPMD receive routine that reassembles single burst of 32 scattered packets
- * Notice:
- * - nb_pkts < ICE_DESCS_PER_LOOP, just return no packet
  */
 static uint16_t
 ice_recv_scattered_burst_vec_avx2(void *rx_queue, struct rte_mbuf **rx_pkts,
@@ -626,6 +625,9 @@ ice_recv_scattered_burst_vec_avx2(void *rx_queue, struct 
rte_mbuf **rx_pkts,
        struct ice_rx_queue *rxq = rx_queue;
        uint8_t split_flags[ICE_VPMD_RX_BURST] = {0};
 
+       /* split_flags only can support max of ICE_VPMD_RX_BURST */
+       nb_pkts = RTE_MIN(nb_pkts, ICE_VPMD_RX_BURST);
+
        /* get some new buffers */
        uint16_t nb_bufs = _ice_recv_raw_pkts_vec_avx2(rxq, rx_pkts, nb_pkts,
                                                       split_flags);
@@ -657,9 +659,6 @@ ice_recv_scattered_burst_vec_avx2(void *rx_queue, struct 
rte_mbuf **rx_pkts,
 
 /**
  * vPMD receive routine that reassembles scattered packets.
- * Main receive routine that can handle arbitrary burst sizes
- * Notice:
- * - nb_pkts < ICE_DESCS_PER_LOOP, just return no packet
  */
 uint16_t
 ice_recv_scattered_pkts_vec_avx2(void *rx_queue, struct rte_mbuf **rx_pkts,
diff --git a/drivers/net/ice/ice_rxtx_vec_sse.c 
b/drivers/net/ice/ice_rxtx_vec_sse.c
index 382ef31f3..c03e24092 100644
--- a/drivers/net/ice/ice_rxtx_vec_sse.c
+++ b/drivers/net/ice/ice_rxtx_vec_sse.c
@@ -205,10 +205,11 @@ ice_rx_desc_to_ptype_v(__m128i descs[4], struct rte_mbuf 
**rx_pkts,
 }
 
 /**
+ * vPMD raw receive routine, only accept(nb_pkts >= ICE_DESCS_PER_LOOP)
+ *
  * Notice:
  * - nb_pkts < ICE_DESCS_PER_LOOP, just return no packet
- * - nb_pkts > ICE_VPMD_RX_BURST, only scan ICE_VPMD_RX_BURST
- *   numbers of DD bits
+ * - floor align nb_pkts to a ICE_DESCS_PER_LOOP power-of-two
  */
 static inline uint16_t
 _ice_recv_raw_pkts_vec(struct ice_rx_queue *rxq, struct rte_mbuf **rx_pkts,
@@ -264,9 +265,6 @@ _ice_recv_raw_pkts_vec(struct ice_rx_queue *rxq, struct 
rte_mbuf **rx_pkts,
        const __m128i eop_check = _mm_set_epi64x(0x0000000200000002LL,
                                                 0x0000000200000002LL);
 
-       /* nb_pkts shall be less equal than ICE_MAX_RX_BURST */
-       nb_pkts = RTE_MIN(nb_pkts, ICE_MAX_RX_BURST);
-
        /* nb_pkts has to be floor-aligned to ICE_DESCS_PER_LOOP */
        nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, ICE_DESCS_PER_LOOP);
 
@@ -441,12 +439,6 @@ _ice_recv_raw_pkts_vec(struct ice_rx_queue *rxq, struct 
rte_mbuf **rx_pkts,
        return nb_pkts_recd;
 }
 
-/**
- * Notice:
- * - nb_pkts < ICE_DESCS_PER_LOOP, just return no packet
- * - nb_pkts > ICE_VPMD_RX_BURST, only scan ICE_VPMD_RX_BURST
- *   numbers of DD bits
- */
 uint16_t
 ice_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
                  uint16_t nb_pkts)
@@ -454,19 +446,19 @@ ice_recv_pkts_vec(void *rx_queue, struct rte_mbuf 
**rx_pkts,
        return _ice_recv_raw_pkts_vec(rx_queue, rx_pkts, nb_pkts, NULL);
 }
 
-/* vPMD receive routine that reassembles scattered packets
- * Notice:
- * - nb_pkts < ICE_DESCS_PER_LOOP, just return no packet
- * - nb_pkts > ICE_VPMD_RX_BURST, only scan ICE_VPMD_RX_BURST
- *   numbers of DD bits
+/**
+ * vPMD receive routine that reassembles single burst of 32 scattered packets
  */
-uint16_t
-ice_recv_scattered_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
-                           uint16_t nb_pkts)
+static uint16_t
+ice_recv_scattered_burst_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
+                            uint16_t nb_pkts)
 {
        struct ice_rx_queue *rxq = rx_queue;
        uint8_t split_flags[ICE_VPMD_RX_BURST] = {0};
 
+       /* split_flags only can support max of ICE_VPMD_RX_BURST */
+       nb_pkts = RTE_MIN(nb_pkts, ICE_VPMD_RX_BURST);
+
        /* get some new buffers */
        uint16_t nb_bufs = _ice_recv_raw_pkts_vec(rxq, rx_pkts, nb_pkts,
                                                  split_flags);
@@ -496,6 +488,32 @@ ice_recv_scattered_pkts_vec(void *rx_queue, struct 
rte_mbuf **rx_pkts,
                                             &split_flags[i]);
 }
 
+/**
+ * vPMD receive routine that reassembles scattered packets.
+ */
+uint16_t
+ice_recv_scattered_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
+                           uint16_t nb_pkts)
+{
+       uint16_t retval = 0;
+
+       while (nb_pkts > ICE_VPMD_RX_BURST) {
+               uint16_t burst;
+
+               burst = ice_recv_scattered_burst_vec(rx_queue,
+                                                    rx_pkts + retval,
+                                                    ICE_VPMD_RX_BURST);
+               retval += burst;
+               nb_pkts -= burst;
+               if (burst < ICE_VPMD_RX_BURST)
+                       return retval;
+       }
+
+       return retval + ice_recv_scattered_burst_vec(rx_queue,
+                                                    rx_pkts + retval,
+                                                    nb_pkts);
+}
+
 static inline void
 ice_vtx1(volatile struct ice_tx_desc *txdp, struct rte_mbuf *pkt,
         uint64_t flags)
-- 
2.20.1

Reply via email to