As a consequence of the improved mempool cache algorithm, the PMD was
updated regarding how much to backfill the mempool cache in the AVX512
code path.

Signed-off-by: Morten Brørup <[email protected]>
---
v7:
* Rebased.
v6:
* Moved driver changes out as separate patches, for easier review. (Bruce)
---
Depends-on: patch-164745 ("mempool: improve cache behaviour and performance")
---
 .../net/intel/idpf/idpf_common_rxtx_avx512.c  | 52 +++++++++++++++----
 1 file changed, 42 insertions(+), 10 deletions(-)

diff --git a/drivers/net/intel/idpf/idpf_common_rxtx_avx512.c 
b/drivers/net/intel/idpf/idpf_common_rxtx_avx512.c
index 8db4c64106..5788a009ab 100644
--- a/drivers/net/intel/idpf/idpf_common_rxtx_avx512.c
+++ b/drivers/net/intel/idpf/idpf_common_rxtx_avx512.c
@@ -148,15 +148,31 @@ idpf_singleq_rearm(struct idpf_rx_queue *rxq)
        /* Can this be satisfied from the cache? */
        if (cache->len < IDPF_RXQ_REARM_THRESH) {
                /* No. Backfill the cache first, and then fill from it */
-               uint32_t req = IDPF_RXQ_REARM_THRESH + (cache->size -
-                                                       cache->len);
 
-               /* How many do we require i.e. number to fill the cache + the 
request */
+               /* Backfill would exceed the cache bounce buffer limit? */
+               __rte_assume(cache->size / 2 <= RTE_MEMPOOL_CACHE_MAX_SIZE / 2);
+               if (unlikely(cache->size / 2 < IDPF_RXQ_REARM_THRESH)) {
+                       idpf_singleq_rearm_common(rxq);
+                       return;
+               }
+
+               /*
+                * Backfill the cache from the backend;
+                * move up the hot objects in the cache to the top half of the 
cache,
+                * and fetch (size / 2) objects to the bottom of the cache.
+                */
+               __rte_assume(cache->len < cache->size / 2);
+               rte_memcpy(&cache->objs[cache->size / 2], &cache->objs[0],
+                               sizeof(void *) * cache->len);
                int ret = rte_mempool_ops_dequeue_bulk
-                               (rxq->mp, &cache->objs[cache->len], req);
+                               (rxq->mp, &cache->objs[0], cache->size / 2);
                if (ret == 0) {
-                       cache->len += req;
+                       cache->len += cache->size / 2;
                } else {
+                       /*
+                        * No further action is required for roll back, as the 
objects moved
+                        * in the cache were actually copied, and the cache 
remains intact.
+                        */
                        if (rxq->rxrearm_nb + IDPF_RXQ_REARM_THRESH >=
                            rxq->nb_rx_desc) {
                                __m128i dma_addr0;
@@ -565,15 +581,31 @@ idpf_splitq_rearm(struct idpf_rx_queue *rx_bufq)
        /* Can this be satisfied from the cache? */
        if (cache->len < IDPF_RXQ_REARM_THRESH) {
                /* No. Backfill the cache first, and then fill from it */
-               uint32_t req = IDPF_RXQ_REARM_THRESH + (cache->size -
-                                                       cache->len);
 
-               /* How many do we require i.e. number to fill the cache + the 
request */
+               /* Backfill would exceed the cache bounce buffer limit? */
+               __rte_assume(cache->size / 2 <= RTE_MEMPOOL_CACHE_MAX_SIZE / 2);
+               if (unlikely(cache->size / 2 < IDPF_RXQ_REARM_THRESH)) {
+                       idpf_splitq_rearm_common(rx_bufq);
+                       return;
+               }
+
+               /*
+                * Backfill the cache from the backend;
+                * move up the hot objects in the cache to the top half of the 
cache,
+                * and fetch (size / 2) objects to the bottom of the cache.
+                */
+               __rte_assume(cache->len < cache->size / 2);
+               rte_memcpy(&cache->objs[cache->size / 2], &cache->objs[0],
+                               sizeof(void *) * cache->len);
                int ret = rte_mempool_ops_dequeue_bulk
-                               (rx_bufq->mp, &cache->objs[cache->len], req);
+                               (rx_bufq->mp, &cache->objs[0], cache->size / 2);
                if (ret == 0) {
-                       cache->len += req;
+                       cache->len += cache->size / 2;
                } else {
+                       /*
+                        * No further action is required for roll back, as the 
objects moved
+                        * in the cache were actually copied, and the cache 
remains intact.
+                        */
                        if (rx_bufq->rxrearm_nb + IDPF_RXQ_REARM_THRESH >=
                            rx_bufq->nb_rx_desc) {
                                __m128i dma_addr0;
-- 
2.43.0

Reply via email to