Tested-by: Yingya Han <yingyax....@intel.com> -----Original Message----- From: dev <dev-boun...@dpdk.org> On Behalf Of Jeff Guo Sent: Wednesday, September 9, 2020 2:37 PM To: Yang, Qiming <qiming.y...@intel.com>; Xing, Beilei <beilei.x...@intel.com>; Zhao1, Wei <wei.zh...@intel.com>; Zhang, Qi Z <qi.z.zh...@intel.com>; Wu, Jingjing <jingjing...@intel.com> Cc: Richardson, Bruce <bruce.richard...@intel.com>; dev@dpdk.org; Guo, Jia <jia....@intel.com>; Zhang, Helin <helin.zh...@intel.com>; m...@smartsharesystems.com; Yigit, Ferruh <ferruh.yi...@intel.com>; Wang, Haiyue <haiyue.w...@intel.com>; step...@networkplumber.org; barbe...@kth.se Subject: [dpdk-dev] [PATCH v3 4/5] net/ice: fix vector rx burst for ice
The limitation of burst size in vector rx was removed, since it should retrieve as much received packets as possible. And also the scattered receive path should use a wrapper function to achieve the goal of burst maximizing. And do some code cleaning for vector rx path. Signed-off-by: Jeff Guo <jia....@intel.com> --- drivers/net/ice/ice_rxtx.h | 1 + drivers/net/ice/ice_rxtx_vec_avx2.c | 23 ++++++------ drivers/net/ice/ice_rxtx_vec_sse.c | 56 +++++++++++++++++++---------- 3 files changed, 49 insertions(+), 31 deletions(-) diff --git a/drivers/net/ice/ice_rxtx.h b/drivers/net/ice/ice_rxtx.h index 2fdcfb7d0..3ef5f300d 100644 --- a/drivers/net/ice/ice_rxtx.h +++ b/drivers/net/ice/ice_rxtx.h @@ -35,6 +35,7 @@ #define ICE_MAX_RX_BURST ICE_RXQ_REARM_THRESH #define ICE_TX_MAX_FREE_BUF_SZ 64 #define ICE_DESCS_PER_LOOP 4 +#define ICE_DESCS_PER_LOOP_AVX 8 #define ICE_FDIR_PKT_LEN 512 diff --git a/drivers/net/ice/ice_rxtx_vec_avx2.c b/drivers/net/ice/ice_rxtx_vec_avx2.c index be50677c2..843e4f32a 100644 --- a/drivers/net/ice/ice_rxtx_vec_avx2.c +++ b/drivers/net/ice/ice_rxtx_vec_avx2.c @@ -29,7 +29,7 @@ ice_rxq_rearm(struct ice_rx_queue *rxq) __m128i dma_addr0; dma_addr0 = _mm_setzero_si128(); - for (i = 0; i < ICE_DESCS_PER_LOOP; i++) { + for (i = 0; i < ICE_DESCS_PER_LOOP_AVX; i++) { rxep[i].mbuf = &rxq->fake_mbuf; _mm_store_si128((__m128i *)&rxdp[i].read, dma_addr0); @@ -132,12 +132,17 @@ ice_rxq_rearm(struct ice_rx_queue *rxq) ICE_PCI_REG_WRITE(rxq->qrx_tail, rx_id); } +/** + * vPMD raw receive routine, only accept(nb_pkts >= +ICE_DESCS_PER_LOOP_AVX) + * + * Notice: + * - nb_pkts < ICE_DESCS_PER_LOOP_AVX, just return no packet + * - floor align nb_pkts to a ICE_DESCS_PER_LOOP_AVX power-of-two */ static inline uint16_t _ice_recv_raw_pkts_vec_avx2(struct ice_rx_queue *rxq, struct rte_mbuf **rx_pkts, uint16_t nb_pkts, uint8_t *split_packet) { -#define ICE_DESCS_PER_LOOP_AVX 8 - const uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl; const __m256i mbuf_init = _mm256_set_epi64x(0, 0, 0, rxq->mbuf_initializer); @@ -603,10 +608,6 @@ _ice_recv_raw_pkts_vec_avx2(struct ice_rx_queue *rxq, struct rte_mbuf **rx_pkts, return received; } -/** - * Notice: - * - nb_pkts < ICE_DESCS_PER_LOOP, just return no packet - */ uint16_t ice_recv_pkts_vec_avx2(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) @@ -616,8 +617,6 @@ ice_recv_pkts_vec_avx2(void *rx_queue, struct rte_mbuf **rx_pkts, /** * vPMD receive routine that reassembles single burst of 32 scattered packets - * Notice: - * - nb_pkts < ICE_DESCS_PER_LOOP, just return no packet */ static uint16_t ice_recv_scattered_burst_vec_avx2(void *rx_queue, struct rte_mbuf **rx_pkts, @@ -626,6 +625,9 @@ ice_recv_scattered_burst_vec_avx2(void *rx_queue, struct rte_mbuf **rx_pkts, struct ice_rx_queue *rxq = rx_queue; uint8_t split_flags[ICE_VPMD_RX_BURST] = {0}; + /* split_flags only can support max of ICE_VPMD_RX_BURST */ + nb_pkts = RTE_MIN(nb_pkts, ICE_VPMD_RX_BURST); + /* get some new buffers */ uint16_t nb_bufs = _ice_recv_raw_pkts_vec_avx2(rxq, rx_pkts, nb_pkts, split_flags); @@ -657,9 +659,6 @@ ice_recv_scattered_burst_vec_avx2(void *rx_queue, struct rte_mbuf **rx_pkts, /** * vPMD receive routine that reassembles scattered packets. - * Main receive routine that can handle arbitrary burst sizes - * Notice: - * - nb_pkts < ICE_DESCS_PER_LOOP, just return no packet */ uint16_t ice_recv_scattered_pkts_vec_avx2(void *rx_queue, struct rte_mbuf **rx_pkts, diff --git a/drivers/net/ice/ice_rxtx_vec_sse.c b/drivers/net/ice/ice_rxtx_vec_sse.c index 382ef31f3..c03e24092 100644 --- a/drivers/net/ice/ice_rxtx_vec_sse.c +++ b/drivers/net/ice/ice_rxtx_vec_sse.c @@ -205,10 +205,11 @@ ice_rx_desc_to_ptype_v(__m128i descs[4], struct rte_mbuf **rx_pkts, } /** + * vPMD raw receive routine, only accept(nb_pkts >= ICE_DESCS_PER_LOOP) + * * Notice: * - nb_pkts < ICE_DESCS_PER_LOOP, just return no packet - * - nb_pkts > ICE_VPMD_RX_BURST, only scan ICE_VPMD_RX_BURST - * numbers of DD bits + * - floor align nb_pkts to a ICE_DESCS_PER_LOOP power-of-two */ static inline uint16_t _ice_recv_raw_pkts_vec(struct ice_rx_queue *rxq, struct rte_mbuf **rx_pkts, @@ -264,9 +265,6 @@ _ice_recv_raw_pkts_vec(struct ice_rx_queue *rxq, struct rte_mbuf **rx_pkts, const __m128i eop_check = _mm_set_epi64x(0x0000000200000002LL, 0x0000000200000002LL); - /* nb_pkts shall be less equal than ICE_MAX_RX_BURST */ - nb_pkts = RTE_MIN(nb_pkts, ICE_MAX_RX_BURST); - /* nb_pkts has to be floor-aligned to ICE_DESCS_PER_LOOP */ nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, ICE_DESCS_PER_LOOP); @@ -441,12 +439,6 @@ _ice_recv_raw_pkts_vec(struct ice_rx_queue *rxq, struct rte_mbuf **rx_pkts, return nb_pkts_recd; } -/** - * Notice: - * - nb_pkts < ICE_DESCS_PER_LOOP, just return no packet - * - nb_pkts > ICE_VPMD_RX_BURST, only scan ICE_VPMD_RX_BURST - * numbers of DD bits - */ uint16_t ice_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) @@ -454,19 +446,19 @@ ice_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, return _ice_recv_raw_pkts_vec(rx_queue, rx_pkts, nb_pkts, NULL); } -/* vPMD receive routine that reassembles scattered packets - * Notice: - * - nb_pkts < ICE_DESCS_PER_LOOP, just return no packet - * - nb_pkts > ICE_VPMD_RX_BURST, only scan ICE_VPMD_RX_BURST - * numbers of DD bits +/** + * vPMD receive routine that reassembles single burst of 32 scattered +packets */ -uint16_t -ice_recv_scattered_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, - uint16_t nb_pkts) +static uint16_t +ice_recv_scattered_burst_vec(void *rx_queue, struct rte_mbuf **rx_pkts, + uint16_t nb_pkts) { struct ice_rx_queue *rxq = rx_queue; uint8_t split_flags[ICE_VPMD_RX_BURST] = {0}; + /* split_flags only can support max of ICE_VPMD_RX_BURST */ + nb_pkts = RTE_MIN(nb_pkts, ICE_VPMD_RX_BURST); + /* get some new buffers */ uint16_t nb_bufs = _ice_recv_raw_pkts_vec(rxq, rx_pkts, nb_pkts, split_flags); @@ -496,6 +488,32 @@ ice_recv_scattered_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, &split_flags[i]); } +/** + * vPMD receive routine that reassembles scattered packets. + */ +uint16_t +ice_recv_scattered_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, + uint16_t nb_pkts) +{ + uint16_t retval = 0; + + while (nb_pkts > ICE_VPMD_RX_BURST) { + uint16_t burst; + + burst = ice_recv_scattered_burst_vec(rx_queue, + rx_pkts + retval, + ICE_VPMD_RX_BURST); + retval += burst; + nb_pkts -= burst; + if (burst < ICE_VPMD_RX_BURST) + return retval; + } + + return retval + ice_recv_scattered_burst_vec(rx_queue, + rx_pkts + retval, + nb_pkts); +} + static inline void ice_vtx1(volatile struct ice_tx_desc *txdp, struct rte_mbuf *pkt, uint64_t flags) -- 2.20.1