Integrated zero-copy get and put API's in mempool cache in i40e PMD Signed-off-by: Kamalakshitha Aligeri <kamalakshitha.alig...@arm.com> --- 1. I have replaced the rte_mempool_get_bulk and rte_mempool_put_bulk in net/i40e with the zero-copy get and put API's
drivers/net/i40e/i40e_rxtx_vec_avx512.c | 10 +--------- drivers/net/i40e/i40e_rxtx_vec_common.h | 21 +++++++++++++-------- drivers/net/i40e/i40e_rxtx_vec_neon.c | 16 ++++++++++++---- 3 files changed, 26 insertions(+), 21 deletions(-) diff --git a/drivers/net/i40e/i40e_rxtx_vec_avx512.c b/drivers/net/i40e/i40e_rxtx_vec_avx512.c index 60c97d5331..736bd4650f 100644 --- a/drivers/net/i40e/i40e_rxtx_vec_avx512.c +++ b/drivers/net/i40e/i40e_rxtx_vec_avx512.c @@ -909,7 +909,7 @@ i40e_tx_free_bufs_avx512(struct i40e_tx_queue *txq) if (!cache || cache->len == 0) goto normal; - cache_objs = &cache->objs[cache->len]; + cache_objs = rte_mempool_cache_zc_put_bulk(cache, mp, n); if (n > RTE_MEMPOOL_CACHE_MAX_SIZE) { rte_mempool_ops_enqueue_bulk(mp, (void *)txep, n); @@ -936,14 +936,6 @@ i40e_tx_free_bufs_avx512(struct i40e_tx_queue *txq) _mm512_storeu_si512(&cache_objs[copied + 24], d); copied += 32; } - cache->len += n; - - if (cache->len >= cache->flushthresh) { - rte_mempool_ops_enqueue_bulk - (mp, &cache->objs[cache->size], - cache->len - cache->size); - cache->len = cache->size; - } goto done; } diff --git a/drivers/net/i40e/i40e_rxtx_vec_common.h b/drivers/net/i40e/i40e_rxtx_vec_common.h index fe1a6ec75e..4fc4aa0aec 100644 --- a/drivers/net/i40e/i40e_rxtx_vec_common.h +++ b/drivers/net/i40e/i40e_rxtx_vec_common.h @@ -89,23 +89,28 @@ i40e_tx_free_bufs(struct i40e_tx_queue *txq) /* check DD bits on threshold descriptor */ if ((txq->tx_ring[txq->tx_next_dd].cmd_type_offset_bsz & - rte_cpu_to_le_64(I40E_TXD_QW1_DTYPE_MASK)) != + rte_cpu_to_le_64(I40E_TXD_QW1_DTYPE_MASK)) != rte_cpu_to_le_64(I40E_TX_DESC_DTYPE_DESC_DONE)) return 0; n = txq->tx_rs_thresh; - /* first buffer to free from S/W ring is at index - * tx_next_dd - (tx_rs_thresh-1) - */ + /* first buffer to free from S/W ring is at index + * tx_next_dd - (tx_rs_thresh-1) + */ txep = &txq->sw_ring[txq->tx_next_dd - (n - 1)]; + struct rte_mempool *mp = txep[0].mbuf->pool; + struct rte_mempool_cache *cache = rte_mempool_default_cache(mp, rte_lcore_id()); + void **cache_objs; + + cache_objs = rte_mempool_cache_zc_put_bulk(cache, mp, n); if (txq->offloads & RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE) { for (i = 0; i < n; i++) { - free[i] = txep[i].mbuf; + rte_memcpy(&cache_objs[i], &txep->mbuf, sizeof(struct rte_mbuf)); /* no need to reset txep[i].mbuf in vector path */ + txep++; } - rte_mempool_put_bulk(free[0]->pool, (void **)free, n); goto done; } @@ -120,8 +125,8 @@ i40e_tx_free_bufs(struct i40e_tx_queue *txq) free[nb_free++] = m; } else { rte_mempool_put_bulk(free[0]->pool, - (void *)free, - nb_free); + (void *)free, + nb_free); free[0] = m; nb_free = 1; } diff --git a/drivers/net/i40e/i40e_rxtx_vec_neon.c b/drivers/net/i40e/i40e_rxtx_vec_neon.c index 12e6f1cbcb..ebc2161b84 100644 --- a/drivers/net/i40e/i40e_rxtx_vec_neon.c +++ b/drivers/net/i40e/i40e_rxtx_vec_neon.c @@ -28,15 +28,19 @@ i40e_rxq_rearm(struct i40e_rx_queue *rxq) uint64x2_t dma_addr0, dma_addr1; uint64x2_t zero = vdupq_n_u64(0); uint64_t paddr; + uint32_t index, n; + n = RTE_I40E_RXQ_REARM_THRESH; rxdp = rxq->rx_ring + rxq->rxrearm_start; + struct rte_mempool_cache *cache = rte_mempool_default_cache(rxq->mp, rte_lcore_id()); + void **cache_objs; + + cache_objs = rte_mempool_cache_zc_get_bulk(cache, rxq->mp, n); /* Pull 'n' more MBUFs into the software ring */ - if (unlikely(rte_mempool_get_bulk(rxq->mp, - (void *)rxep, - RTE_I40E_RXQ_REARM_THRESH) < 0)) { + if (unlikely(!cache_objs)) { if (rxq->rxrearm_nb + RTE_I40E_RXQ_REARM_THRESH >= - rxq->nb_rx_desc) { + rxq->nb_rx_desc) { for (i = 0; i < RTE_I40E_DESCS_PER_LOOP; i++) { rxep[i].mbuf = &rxq->fake_mbuf; vst1q_u64((uint64_t *)&rxdp[i].read, zero); @@ -46,6 +50,10 @@ i40e_rxq_rearm(struct i40e_rx_queue *rxq) RTE_I40E_RXQ_REARM_THRESH; return; } + for (index = 0; index < n; index++) { + rxep->mbuf = cache_objs[index]; + rxep++; + } /* Initialize the mbufs in vector, process 2 mbufs in one loop */ for (i = 0; i < RTE_I40E_RXQ_REARM_THRESH; i += 2, rxep += 2) { -- 2.25.1