For memif non-zero-copy mode, there is a branch to compare
the mbuf and memif buffer size during memory copying. Add
a fast memory copy path by removing this branch with mbuf
and memif buffer size defined at compile time. The removal
of the branch leads to considerable performance uplift.

When memif <= buffer size, Rx chooses the fast memcpy path,
otherwise it would choose the original path.

Test with 1p1q on Ampere Altra AArch64 server,
--------------------------------------------
  buf size  | memif <= mbuf | memif > mbuf |
--------------------------------------------
non-zc gain |     4.30%     |    -0.52%    |
--------------------------------------------
   zc gain  |     2.46%     |     0.70%    |
--------------------------------------------

Test with 1p1q on Cascade Lake Xeon X86server,
-------------------------------------------
  buf size  | memif <= mbuf | memif > mbuf |
-------------------------------------------
non-zc gain |     2.13%     |    -1.40%    |
-------------------------------------------
   zc gain  |     0.18%     |     0.48%    |
-------------------------------------------

Signed-off-by: Joyce Kong <joyce.k...@arm.com>
---
 drivers/net/memif/rte_eth_memif.c | 124 ++++++++++++++++++++----------
 1 file changed, 84 insertions(+), 40 deletions(-)

diff --git a/drivers/net/memif/rte_eth_memif.c 
b/drivers/net/memif/rte_eth_memif.c
index 587ad45576..f55776ca46 100644
--- a/drivers/net/memif/rte_eth_memif.c
+++ b/drivers/net/memif/rte_eth_memif.c
@@ -342,66 +342,111 @@ eth_memif_rx(void *queue, struct rte_mbuf **bufs, 
uint16_t nb_pkts)
                goto refill;
        n_slots = last_slot - cur_slot;
 
-       while (n_slots && n_rx_pkts < nb_pkts) {
-               mbuf_head = rte_pktmbuf_alloc(mq->mempool);
-               if (unlikely(mbuf_head == NULL))
-                       goto no_free_bufs;
-               mbuf = mbuf_head;
-               mbuf->port = mq->in_port;
+       if (likely(mbuf_size >= pmd->cfg.pkt_buffer_size)) {
+               while (n_slots && n_rx_pkts < nb_pkts) {
+                       mbuf_head = rte_pktmbuf_alloc(mq->mempool);
+                       if (unlikely(mbuf_head == NULL))
+                               goto no_free_bufs;
+                       mbuf = mbuf_head;
+                       mbuf->port = mq->in_port;
+
+next_slot1:
+                       s0 = cur_slot & mask;
+                       d0 = &ring->desc[s0];
 
-next_slot:
-               s0 = cur_slot & mask;
-               d0 = &ring->desc[s0];
+                       cp_len = d0->length;
 
-               src_len = d0->length;
-               dst_off = 0;
-               src_off = 0;
+                       rte_pktmbuf_data_len(mbuf) = cp_len;
+                       rte_pktmbuf_pkt_len(mbuf) = cp_len;
+                       if (mbuf != mbuf_head)
+                               rte_pktmbuf_pkt_len(mbuf_head) += cp_len;
 
-               do {
-                       dst_len = mbuf_size - dst_off;
-                       if (dst_len == 0) {
-                               dst_off = 0;
-                               dst_len = mbuf_size;
+                       rte_memcpy(rte_pktmbuf_mtod(mbuf, void *),
+                               (uint8_t *)memif_get_buffer(proc_private, d0), 
cp_len);
+
+                       cur_slot++;
+                       n_slots--;
 
-                               /* store pointer to tail */
+                       if (d0->flags & MEMIF_DESC_FLAG_NEXT) {
                                mbuf_tail = mbuf;
                                mbuf = rte_pktmbuf_alloc(mq->mempool);
                                if (unlikely(mbuf == NULL))
                                        goto no_free_bufs;
-                               mbuf->port = mq->in_port;
                                ret = memif_pktmbuf_chain(mbuf_head, mbuf_tail, 
mbuf);
                                if (unlikely(ret < 0)) {
                                        MIF_LOG(ERR, 
"number-of-segments-overflow");
                                        rte_pktmbuf_free(mbuf);
                                        goto no_free_bufs;
                                }
+                               goto next_slot1;
                        }
-                       cp_len = RTE_MIN(dst_len, src_len);
 
-                       rte_pktmbuf_data_len(mbuf) += cp_len;
-                       rte_pktmbuf_pkt_len(mbuf) = rte_pktmbuf_data_len(mbuf);
-                       if (mbuf != mbuf_head)
-                               rte_pktmbuf_pkt_len(mbuf_head) += cp_len;
+                       mq->n_bytes += rte_pktmbuf_pkt_len(mbuf_head);
+                       *bufs++ = mbuf_head;
+                       n_rx_pkts++;
+               }
+       } else {
+               while (n_slots && n_rx_pkts < nb_pkts) {
+                       mbuf_head = rte_pktmbuf_alloc(mq->mempool);
+                       if (unlikely(mbuf_head == NULL))
+                               goto no_free_bufs;
+                       mbuf = mbuf_head;
+                       mbuf->port = mq->in_port;
+
+next_slot2:
+                       s0 = cur_slot & mask;
+                       d0 = &ring->desc[s0];
 
-                       rte_memcpy(rte_pktmbuf_mtod_offset(mbuf, void *,
-                                                          dst_off),
-                               (uint8_t *)memif_get_buffer(proc_private, d0) +
-                               src_off, cp_len);
+                       src_len = d0->length;
+                       dst_off = 0;
+                       src_off = 0;
 
-                       src_off += cp_len;
-                       dst_off += cp_len;
-                       src_len -= cp_len;
-               } while (src_len);
+                       do {
+                               dst_len = mbuf_size - dst_off;
+                               if (dst_len == 0) {
+                                       dst_off = 0;
+                                       dst_len = mbuf_size;
+
+                                       /* store pointer to tail */
+                                       mbuf_tail = mbuf;
+                                       mbuf = rte_pktmbuf_alloc(mq->mempool);
+                                       if (unlikely(mbuf == NULL))
+                                               goto no_free_bufs;
+                                       mbuf->port = mq->in_port;
+                                       ret = memif_pktmbuf_chain(mbuf_head, 
mbuf_tail, mbuf);
+                                       if (unlikely(ret < 0)) {
+                                               MIF_LOG(ERR, 
"number-of-segments-overflow");
+                                               rte_pktmbuf_free(mbuf);
+                                               goto no_free_bufs;
+                                       }
+                               }
+                               cp_len = RTE_MIN(dst_len, src_len);
 
-               cur_slot++;
-               n_slots--;
+                               rte_pktmbuf_data_len(mbuf) += cp_len;
+                               rte_pktmbuf_pkt_len(mbuf) = 
rte_pktmbuf_data_len(mbuf);
+                               if (mbuf != mbuf_head)
+                                       rte_pktmbuf_pkt_len(mbuf_head) += 
cp_len;
 
-               if (d0->flags & MEMIF_DESC_FLAG_NEXT)
-                       goto next_slot;
+                               rte_memcpy(rte_pktmbuf_mtod_offset(mbuf, void *,
+                                                                  dst_off),
+                                       (uint8_t 
*)memif_get_buffer(proc_private, d0) +
+                                       src_off, cp_len);
 
-               mq->n_bytes += rte_pktmbuf_pkt_len(mbuf_head);
-               *bufs++ = mbuf_head;
-               n_rx_pkts++;
+                               src_off += cp_len;
+                               dst_off += cp_len;
+                               src_len -= cp_len;
+                       } while (src_len);
+
+                       cur_slot++;
+                       n_slots--;
+
+                       if (d0->flags & MEMIF_DESC_FLAG_NEXT)
+                               goto next_slot2;
+
+                       mq->n_bytes += rte_pktmbuf_pkt_len(mbuf_head);
+                       *bufs++ = mbuf_head;
+                       n_rx_pkts++;
+               }
        }
 
 no_free_bufs:
@@ -694,7 +739,6 @@ eth_memif_tx(void *queue, struct rte_mbuf **bufs, uint16_t 
nb_pkts)
        return n_tx_pkts;
 }
 
-
 static int
 memif_tx_one_zc(struct pmd_process_private *proc_private, struct memif_queue 
*mq,
                memif_ring_t *ring, struct rte_mbuf *mbuf, const uint16_t mask,
-- 
2.25.1

Reply via email to