Integrated zero-copy get and put API's in mempool cache in i40e PMD

Signed-off-by: Kamalakshitha Aligeri <kamalakshitha.alig...@arm.com>
---
Link: 
https://patchwork.dpdk.org/project/dpdk/patch/20221227151700.80887-1...@smartsharesystems.com/
1. Added support for mempools without cache (Morten Brorup)

 .mailmap                                |  1 +
 drivers/net/i40e/i40e_rxtx_vec_avx512.c | 35 ++++++++-----------------
 drivers/net/i40e/i40e_rxtx_vec_common.h | 23 ++++++++++------
 drivers/net/i40e/i40e_rxtx_vec_neon.c   | 35 ++++++++++++++++---------
 4 files changed, 49 insertions(+), 45 deletions(-)

diff --git a/.mailmap b/.mailmap
index 75884b6fe2..05a42edbcf 100644
--- a/.mailmap
+++ b/.mailmap
@@ -670,6 +670,7 @@ Kai Ji <kai...@intel.com>
 Kaiwen Deng <kaiwenx.d...@intel.com>
 Kalesh AP <kalesh-anakkur.pura...@broadcom.com>
 Kamalakannan R <kamalakanna...@intel.com>
+Kamalakshitha Aligeri <kamalakshitha.alig...@arm.com>
 Kamil Bednarczyk <kamil.bednarc...@intel.com>
 Kamil Chalupnik <kamilx.chalup...@intel.com>
 Kamil Rytarowski <kamil.rytarow...@caviumnetworks.com>
diff --git a/drivers/net/i40e/i40e_rxtx_vec_avx512.c 
b/drivers/net/i40e/i40e_rxtx_vec_avx512.c
index 60c97d5331..a4fba4ddc9 100644
--- a/drivers/net/i40e/i40e_rxtx_vec_avx512.c
+++ b/drivers/net/i40e/i40e_rxtx_vec_avx512.c
@@ -35,6 +35,9 @@ i40e_rxq_rearm(struct i40e_rx_queue *rxq)
 
        if (unlikely(!cache))
                return i40e_rxq_rearm_common(rxq, true);
+       void **cache_objs;
+
+       cache_objs = rte_mempool_cache_zc_get_bulk(cache, rxq->mp, 
RTE_I40E_RXQ_REARM_THRESH);
 
        /* We need to pull 'n' more MBUFs into the software ring from mempool
         * We inline the mempool function here, so we can vectorize the copy
@@ -45,15 +48,12 @@ i40e_rxq_rearm(struct i40e_rx_queue *rxq)
                /* No. Backfill the cache first, and then fill from it */
                uint32_t req = RTE_I40E_RXQ_REARM_THRESH + (cache->size -
                                cache->len);
-
                /* How many do we require
                 * i.e. number to fill the cache + the request
                 */
                int ret = rte_mempool_ops_dequeue_bulk(rxq->mp,
-                               &cache->objs[cache->len], req);
-               if (ret == 0) {
-                       cache->len += req;
-               } else {
+                               cache_objs[cache->len], req);
+               if (ret != 0) {
                        if (rxq->rxrearm_nb + RTE_I40E_RXQ_REARM_THRESH >=
                                        rxq->nb_rx_desc) {
                                __m128i dma_addr0;
@@ -63,11 +63,11 @@ i40e_rxq_rearm(struct i40e_rx_queue *rxq)
                                        rxep[i].mbuf = &rxq->fake_mbuf;
                                        _mm_store_si128
                                                ((__m128i *)&rxdp[i].read,
-                                                       dma_addr0);
+                                                dma_addr0);
                                }
                        }
                        
rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
-                                       RTE_I40E_RXQ_REARM_THRESH;
+                               RTE_I40E_RXQ_REARM_THRESH;
                        return;
                }
        }
@@ -90,7 +90,7 @@ i40e_rxq_rearm(struct i40e_rx_queue *rxq)
         */
        for (i = 0; i < RTE_I40E_RXQ_REARM_THRESH / 8; i++) {
                const __m512i mbuf_ptrs = _mm512_loadu_si512
-                       (&cache->objs[cache->len - 8]);
+                       (cache_objs[cache->len - 8]);
                _mm512_store_si512(rxep, mbuf_ptrs);
 
                /* gather iova of mbuf0-7 into one zmm reg */
@@ -906,21 +906,16 @@ i40e_tx_free_bufs_avx512(struct i40e_tx_queue *txq)
                struct rte_mempool_cache *cache = rte_mempool_default_cache(mp,
                                rte_lcore_id());
 
-               if (!cache || cache->len == 0)
+               if (!cache)
                        goto normal;
 
-               cache_objs = &cache->objs[cache->len];
+               cache_objs = rte_mempool_cache_zc_put_bulk(cache, mp, n);
 
-               if (n > RTE_MEMPOOL_CACHE_MAX_SIZE) {
+               if (!cache_objs) {
                        rte_mempool_ops_enqueue_bulk(mp, (void *)txep, n);
                        goto done;
                }
 
-               /* The cache follows the following algorithm
-                *   1. Add the objects to the cache
-                *   2. Anything greater than the cache min value (if it
-                *   crosses the cache flush threshold) is flushed to the ring.
-                */
                /* Add elements back into the cache */
                uint32_t copied = 0;
                /* n is multiple of 32 */
@@ -936,14 +931,6 @@ i40e_tx_free_bufs_avx512(struct i40e_tx_queue *txq)
                        _mm512_storeu_si512(&cache_objs[copied + 24], d);
                        copied += 32;
                }
-               cache->len += n;
-
-               if (cache->len >= cache->flushthresh) {
-                       rte_mempool_ops_enqueue_bulk
-                               (mp, &cache->objs[cache->size],
-                               cache->len - cache->size);
-                       cache->len = cache->size;
-               }
                goto done;
        }
 
diff --git a/drivers/net/i40e/i40e_rxtx_vec_common.h 
b/drivers/net/i40e/i40e_rxtx_vec_common.h
index fe1a6ec75e..70e11a2ef2 100644
--- a/drivers/net/i40e/i40e_rxtx_vec_common.h
+++ b/drivers/net/i40e/i40e_rxtx_vec_common.h
@@ -95,17 +95,24 @@ i40e_tx_free_bufs(struct i40e_tx_queue *txq)
 
        n = txq->tx_rs_thresh;
 
-        /* first buffer to free from S/W ring is at index
-         * tx_next_dd - (tx_rs_thresh-1)
-         */
+       /* first buffer to free from S/W ring is at index
+        * tx_next_dd - (tx_rs_thresh-1)
+        */
        txep = &txq->sw_ring[txq->tx_next_dd - (n - 1)];
 
        if (txq->offloads & RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE) {
-               for (i = 0; i < n; i++) {
-                       free[i] = txep[i].mbuf;
-                       /* no need to reset txep[i].mbuf in vector path */
+               struct rte_mempool *mp = txep[0].mbuf->pool;
+               struct rte_mempool_cache *cache = rte_mempool_default_cache(mp, 
rte_lcore_id());
+               void **cache_objs;
+
+               cache_objs = rte_mempool_cache_zc_put_bulk(cache, mp, n);
+               if (cache_objs) {
+                       for (i = 0; i < n; i++) {
+                               cache_objs[i] = txep->mbuf;
+                               /* no need to reset txep[i].mbuf in vector path 
*/
+                               txep++;
+                       }
                }
-               rte_mempool_put_bulk(free[0]->pool, (void **)free, n);
                goto done;
        }
 
@@ -121,7 +128,7 @@ i40e_tx_free_bufs(struct i40e_tx_queue *txq)
                                } else {
                                        rte_mempool_put_bulk(free[0]->pool,
                                                             (void *)free,
-                                                            nb_free);
+                                                             nb_free);
                                        free[0] = m;
                                        nb_free = 1;
                                }
diff --git a/drivers/net/i40e/i40e_rxtx_vec_neon.c 
b/drivers/net/i40e/i40e_rxtx_vec_neon.c
index 12e6f1cbcb..5ffc462a47 100644
--- a/drivers/net/i40e/i40e_rxtx_vec_neon.c
+++ b/drivers/net/i40e/i40e_rxtx_vec_neon.c
@@ -30,23 +30,32 @@ i40e_rxq_rearm(struct i40e_rx_queue *rxq)
        uint64_t paddr;
 
        rxdp = rxq->rx_ring + rxq->rxrearm_start;
-
-       /* Pull 'n' more MBUFs into the software ring */
-       if (unlikely(rte_mempool_get_bulk(rxq->mp,
-                                         (void *)rxep,
-                                         RTE_I40E_RXQ_REARM_THRESH) < 0)) {
-               if (rxq->rxrearm_nb + RTE_I40E_RXQ_REARM_THRESH >=
-                   rxq->nb_rx_desc) {
-                       for (i = 0; i < RTE_I40E_DESCS_PER_LOOP; i++) {
-                               rxep[i].mbuf = &rxq->fake_mbuf;
-                               vst1q_u64((uint64_t *)&rxdp[i].read, zero);
+       struct rte_mempool_cache *cache = rte_mempool_default_cache(rxq->mp, 
rte_lcore_id());
+
+       /*When no cache provided, get the objects directly from backend */
+       if (!cache) {
+               int ret = rte_mempool_ops_dequeue_bulk(rxq->mp, (void *)rxep,
+                                                      
RTE_I40E_RXQ_REARM_THRESH);
+               /* Pull 'n' more MBUFs into the software ring */
+               if (ret != 0) {
+                       if (rxq->rxrearm_nb + RTE_I40E_RXQ_REARM_THRESH >=
+                                       rxq->nb_rx_desc) {
+                               for (i = 0; i < RTE_I40E_DESCS_PER_LOOP; i++) {
+                                       rxep[i].mbuf = &rxq->fake_mbuf;
+                                       vst1q_u64((uint64_t *)&rxdp[i].read, 
zero);
+                               }
                        }
+                       
rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
+                               RTE_I40E_RXQ_REARM_THRESH;
+                       return;
                }
-               rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
-                       RTE_I40E_RXQ_REARM_THRESH;
-               return;
        }
 
+       void **cache_objs;
+
+       cache_objs = rte_mempool_cache_zc_get_bulk(cache, rxq->mp, 
RTE_I40E_RXQ_REARM_THRESH);
+       rte_memcpy(rxep, cache_objs, RTE_I40E_RXQ_REARM_THRESH * sizeof(void 
*));
+
        /* Initialize the mbufs in vector, process 2 mbufs in one loop */
        for (i = 0; i < RTE_I40E_RXQ_REARM_THRESH; i += 2, rxep += 2) {
                mb0 = rxep[0].mbuf;
-- 
2.25.1

Reply via email to