From: Long Li <lon...@microsoft.com>

Instead of allocating mbufs one by one during RX, use
rte_pktmbuf_alloc_bulk() to allocate them in a batch.

There are no measurable performance improvements in benchmarks. However,
this patch should improve CPU cycles and reduce potential locking
conflicts in real-world applications.

Signed-off-by: Long Li <lon...@microsoft.com>
---
Change in v2:
use rte_calloc_socket() in place of rte_calloc()

v3:
add more comment explaining the benefit of doing alloc_bulk.
free mbufs that are failed to post

 drivers/net/mana/rx.c | 74 +++++++++++++++++++++++++++++--------------
 1 file changed, 50 insertions(+), 24 deletions(-)

diff --git a/drivers/net/mana/rx.c b/drivers/net/mana/rx.c
index acad5e26cd..6112db2219 100644
--- a/drivers/net/mana/rx.c
+++ b/drivers/net/mana/rx.c
@@ -2,6 +2,7 @@
  * Copyright 2022 Microsoft Corporation
  */
 #include <ethdev_driver.h>
+#include <rte_malloc.h>
 
 #include <infiniband/verbs.h>
 #include <infiniband/manadv.h>
@@ -59,9 +60,8 @@ mana_rq_ring_doorbell(struct mana_rxq *rxq)
 }
 
 static int
-mana_alloc_and_post_rx_wqe(struct mana_rxq *rxq)
+mana_post_rx_wqe(struct mana_rxq *rxq, struct rte_mbuf *mbuf)
 {
-       struct rte_mbuf *mbuf = NULL;
        struct gdma_sgl_element sgl[1];
        struct gdma_work_request request;
        uint32_t wqe_size_in_bu;
@@ -69,12 +69,6 @@ mana_alloc_and_post_rx_wqe(struct mana_rxq *rxq)
        int ret;
        struct mana_mr_cache *mr;
 
-       mbuf = rte_pktmbuf_alloc(rxq->mp);
-       if (!mbuf) {
-               rxq->stats.nombuf++;
-               return -ENOMEM;
-       }
-
        mr = mana_alloc_pmd_mr(&rxq->mr_btree, priv, mbuf);
        if (!mr) {
                DP_LOG(ERR, "failed to register RX MR");
@@ -121,19 +115,32 @@ mana_alloc_and_post_rx_wqe(struct mana_rxq *rxq)
  * Post work requests for a Rx queue.
  */
 static int
-mana_alloc_and_post_rx_wqes(struct mana_rxq *rxq)
+mana_alloc_and_post_rx_wqes(struct mana_rxq *rxq, uint32_t count)
 {
        int ret;
        uint32_t i;
+       struct rte_mbuf **mbufs;
+
+       mbufs = rte_calloc_socket("mana_rx_mbufs", count, sizeof(struct 
rte_mbuf *),
+                                 0, rxq->mp->socket_id);
+       if (!mbufs)
+               return -ENOMEM;
+
+       ret = rte_pktmbuf_alloc_bulk(rxq->mp, mbufs, count);
+       if (ret) {
+               DP_LOG(ERR, "failed to allocate mbufs for RX");
+               rxq->stats.nombuf += count;
+               goto fail;
+       }
 
 #ifdef RTE_ARCH_32
        rxq->wqe_cnt_to_short_db = 0;
 #endif
-       for (i = 0; i < rxq->num_desc; i++) {
-               ret = mana_alloc_and_post_rx_wqe(rxq);
+       for (i = 0; i < count; i++) {
+               ret = mana_post_rx_wqe(rxq, mbufs[i]);
                if (ret) {
                        DP_LOG(ERR, "failed to post RX ret = %d", ret);
-                       return ret;
+                       break;
                }
 
 #ifdef RTE_ARCH_32
@@ -144,8 +151,16 @@ mana_alloc_and_post_rx_wqes(struct mana_rxq *rxq)
 #endif
        }
 
+       /* Free the remaining mbufs that are not posted */
+       while (i < count) {
+               rte_pktmbuf_free(mbufs[i]);
+               i++;
+       }
+
        mana_rq_ring_doorbell(rxq);
 
+fail:
+       rte_free(mbufs);
        return ret;
 }
 
@@ -404,7 +419,9 @@ mana_start_rx_queues(struct rte_eth_dev *dev)
        }
 
        for (i = 0; i < priv->num_queues; i++) {
-               ret = mana_alloc_and_post_rx_wqes(dev->data->rx_queues[i]);
+               struct mana_rxq *rxq = dev->data->rx_queues[i];
+
+               ret = mana_alloc_and_post_rx_wqes(rxq, rxq->num_desc);
                if (ret)
                        goto fail;
        }
@@ -423,7 +440,7 @@ uint16_t
 mana_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
 {
        uint16_t pkt_received = 0;
-       uint16_t wqe_posted = 0;
+       uint16_t wqe_consumed = 0;
        struct mana_rxq *rxq = dpdk_rxq;
        struct mana_priv *priv = rxq->priv;
        struct rte_mbuf *mbuf;
@@ -535,18 +552,23 @@ mana_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, 
uint16_t pkts_n)
 
                rxq->gdma_rq.tail += desc->wqe_size_in_bu;
 
-               /* Consume this request and post another request */
-               ret = mana_alloc_and_post_rx_wqe(rxq);
-               if (ret) {
-                       DP_LOG(ERR, "failed to post rx wqe ret=%d", ret);
-                       break;
-               }
-
-               wqe_posted++;
+               /* Record the number of the RX WQE we need to post to replenish
+                * consumed RX requests
+                */
+               wqe_consumed++;
                if (pkt_received == pkts_n)
                        break;
 
 #ifdef RTE_ARCH_32
+               /* Always post WQE as soon as it's consumed for short DB */
+               ret = mana_alloc_and_post_rx_wqes(rxq, wqe_consumed);
+               if (ret) {
+                       DRV_LOG(ERR, "failed to post %d WQEs, ret %d",
+                               wqe_consumed, ret);
+                       return pkt_received;
+               }
+               wqe_consumed = 0;
+
                /* Ring short doorbell if approaching the wqe increment
                 * limit.
                 */
@@ -569,8 +591,12 @@ mana_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, 
uint16_t pkts_n)
                goto repoll;
        }
 
-       if (wqe_posted)
-               mana_rq_ring_doorbell(rxq);
+       if (wqe_consumed) {
+               ret = mana_alloc_and_post_rx_wqes(rxq, wqe_consumed);
+               if (ret)
+                       DRV_LOG(ERR, "failed to post %d WQEs, ret %d",
+                               wqe_consumed, ret);
+       }
 
        return pkt_received;
 }
-- 
2.17.1

Reply via email to