Integrated zero-copy get and put API's in mempool cache in i40e PMD

Signed-off-by: Kamalakshitha Aligeri <kamalakshitha.alig...@arm.com>
---
1. I have replaced the rte_mempool_get_bulk and rte_mempool_put_bulk in
net/i40e with the zero-copy get and put API's

 drivers/net/i40e/i40e_rxtx_vec_avx512.c | 10 +---------
 drivers/net/i40e/i40e_rxtx_vec_common.h | 21 +++++++++++++--------
 drivers/net/i40e/i40e_rxtx_vec_neon.c   | 16 ++++++++++++----
 3 files changed, 26 insertions(+), 21 deletions(-)

diff --git a/drivers/net/i40e/i40e_rxtx_vec_avx512.c 
b/drivers/net/i40e/i40e_rxtx_vec_avx512.c
index 60c97d5331..736bd4650f 100644
--- a/drivers/net/i40e/i40e_rxtx_vec_avx512.c
+++ b/drivers/net/i40e/i40e_rxtx_vec_avx512.c
@@ -909,7 +909,7 @@ i40e_tx_free_bufs_avx512(struct i40e_tx_queue *txq)
                if (!cache || cache->len == 0)
                        goto normal;
 
-               cache_objs = &cache->objs[cache->len];
+               cache_objs = rte_mempool_cache_zc_put_bulk(cache, mp, n);
 
                if (n > RTE_MEMPOOL_CACHE_MAX_SIZE) {
                        rte_mempool_ops_enqueue_bulk(mp, (void *)txep, n);
@@ -936,14 +936,6 @@ i40e_tx_free_bufs_avx512(struct i40e_tx_queue *txq)
                        _mm512_storeu_si512(&cache_objs[copied + 24], d);
                        copied += 32;
                }
-               cache->len += n;
-
-               if (cache->len >= cache->flushthresh) {
-                       rte_mempool_ops_enqueue_bulk
-                               (mp, &cache->objs[cache->size],
-                               cache->len - cache->size);
-                       cache->len = cache->size;
-               }
                goto done;
        }
 
diff --git a/drivers/net/i40e/i40e_rxtx_vec_common.h 
b/drivers/net/i40e/i40e_rxtx_vec_common.h
index fe1a6ec75e..4fc4aa0aec 100644
--- a/drivers/net/i40e/i40e_rxtx_vec_common.h
+++ b/drivers/net/i40e/i40e_rxtx_vec_common.h
@@ -89,23 +89,28 @@ i40e_tx_free_bufs(struct i40e_tx_queue *txq)
 
        /* check DD bits on threshold descriptor */
        if ((txq->tx_ring[txq->tx_next_dd].cmd_type_offset_bsz &
-                       rte_cpu_to_le_64(I40E_TXD_QW1_DTYPE_MASK)) !=
+                               rte_cpu_to_le_64(I40E_TXD_QW1_DTYPE_MASK)) !=
                        rte_cpu_to_le_64(I40E_TX_DESC_DTYPE_DESC_DONE))
                return 0;
 
        n = txq->tx_rs_thresh;
 
-        /* first buffer to free from S/W ring is at index
-         * tx_next_dd - (tx_rs_thresh-1)
-         */
+       /* first buffer to free from S/W ring is at index
+        * tx_next_dd - (tx_rs_thresh-1)
+        */
        txep = &txq->sw_ring[txq->tx_next_dd - (n - 1)];
+       struct rte_mempool *mp = txep[0].mbuf->pool;
+       struct rte_mempool_cache *cache = rte_mempool_default_cache(mp, 
rte_lcore_id());
+       void **cache_objs;
+
+       cache_objs = rte_mempool_cache_zc_put_bulk(cache, mp, n);
 
        if (txq->offloads & RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE) {
                for (i = 0; i < n; i++) {
-                       free[i] = txep[i].mbuf;
+                       rte_memcpy(&cache_objs[i], &txep->mbuf, sizeof(struct 
rte_mbuf));
                        /* no need to reset txep[i].mbuf in vector path */
+                       txep++;
                }
-               rte_mempool_put_bulk(free[0]->pool, (void **)free, n);
                goto done;
        }
 
@@ -120,8 +125,8 @@ i40e_tx_free_bufs(struct i40e_tx_queue *txq)
                                        free[nb_free++] = m;
                                } else {
                                        rte_mempool_put_bulk(free[0]->pool,
-                                                            (void *)free,
-                                                            nb_free);
+                                                       (void *)free,
+                                                       nb_free);
                                        free[0] = m;
                                        nb_free = 1;
                                }
diff --git a/drivers/net/i40e/i40e_rxtx_vec_neon.c 
b/drivers/net/i40e/i40e_rxtx_vec_neon.c
index 12e6f1cbcb..ebc2161b84 100644
--- a/drivers/net/i40e/i40e_rxtx_vec_neon.c
+++ b/drivers/net/i40e/i40e_rxtx_vec_neon.c
@@ -28,15 +28,19 @@ i40e_rxq_rearm(struct i40e_rx_queue *rxq)
        uint64x2_t dma_addr0, dma_addr1;
        uint64x2_t zero = vdupq_n_u64(0);
        uint64_t paddr;
+       uint32_t index, n;
 
+       n = RTE_I40E_RXQ_REARM_THRESH;
        rxdp = rxq->rx_ring + rxq->rxrearm_start;
+       struct rte_mempool_cache *cache = rte_mempool_default_cache(rxq->mp, 
rte_lcore_id());
+       void **cache_objs;
+
+       cache_objs = rte_mempool_cache_zc_get_bulk(cache, rxq->mp, n);
 
        /* Pull 'n' more MBUFs into the software ring */
-       if (unlikely(rte_mempool_get_bulk(rxq->mp,
-                                         (void *)rxep,
-                                         RTE_I40E_RXQ_REARM_THRESH) < 0)) {
+       if (unlikely(!cache_objs)) {
                if (rxq->rxrearm_nb + RTE_I40E_RXQ_REARM_THRESH >=
-                   rxq->nb_rx_desc) {
+                               rxq->nb_rx_desc) {
                        for (i = 0; i < RTE_I40E_DESCS_PER_LOOP; i++) {
                                rxep[i].mbuf = &rxq->fake_mbuf;
                                vst1q_u64((uint64_t *)&rxdp[i].read, zero);
@@ -46,6 +50,10 @@ i40e_rxq_rearm(struct i40e_rx_queue *rxq)
                        RTE_I40E_RXQ_REARM_THRESH;
                return;
        }
+       for (index = 0; index < n; index++) {
+               rxep->mbuf = cache_objs[index];
+               rxep++;
+       }
 
        /* Initialize the mbufs in vector, process 2 mbufs in one loop */
        for (i = 0; i < RTE_I40E_RXQ_REARM_THRESH; i += 2, rxep += 2) {
-- 
2.25.1

Reply via email to