The rte_vhost_dequeue_burst supports two ways of dequeuing data. If
the data fits into a buffer, then all data is copied and a single
linear buffer is returned. Otherwise it allocates additional mbufs
and chains them together to return a multiple segments mbuf.

While that covers most use cases, it forces applications that need
to work with larger data sizes to support multiple segments mbufs.
The non-linear characteristic brings complexity and performance
implications to the application.

To resolve the issue, change the API so that the application can
optionally provide a second mempool containing larger mbufs. If that
is not provided (NULL), the behavior remains as before the change.
Otherwise, the data size is checked and the corresponding mempool
is used to return linear mbufs.

Signed-off-by: Flavio Leitner <f...@sysclose.org>
---
 drivers/net/vhost/rte_eth_vhost.c |  4 +--
 examples/tep_termination/main.c   |  2 +-
 examples/vhost/main.c             |  2 +-
 lib/librte_vhost/rte_vhost.h      |  5 ++-
 lib/librte_vhost/virtio_net.c     | 57 +++++++++++++++++++++++--------
 5 files changed, 50 insertions(+), 20 deletions(-)

diff --git a/drivers/net/vhost/rte_eth_vhost.c 
b/drivers/net/vhost/rte_eth_vhost.c
index 46f01a7f4..ce7f68a5b 100644
--- a/drivers/net/vhost/rte_eth_vhost.c
+++ b/drivers/net/vhost/rte_eth_vhost.c
@@ -393,8 +393,8 @@ eth_vhost_rx(void *q, struct rte_mbuf **bufs, uint16_t 
nb_bufs)
                                                 VHOST_MAX_PKT_BURST);
 
                nb_pkts = rte_vhost_dequeue_burst(r->vid, r->virtqueue_id,
-                                                 r->mb_pool, &bufs[nb_rx],
-                                                 num);
+                                                 r->mb_pool, NULL,
+                                                 &bufs[nb_rx], num);
 
                nb_rx += nb_pkts;
                nb_receive -= nb_pkts;
diff --git a/examples/tep_termination/main.c b/examples/tep_termination/main.c
index ab956ad7c..3ebf0fa6e 100644
--- a/examples/tep_termination/main.c
+++ b/examples/tep_termination/main.c
@@ -697,7 +697,7 @@ switch_worker(__rte_unused void *arg)
                        if (likely(!vdev->remove)) {
                                /* Handle guest TX*/
                                tx_count = rte_vhost_dequeue_burst(vdev->vid,
-                                               VIRTIO_TXQ, mbuf_pool,
+                                               VIRTIO_TXQ, mbuf_pool, NULL,
                                                pkts_burst, MAX_PKT_BURST);
                                /* If this is the first received packet we need 
to learn the MAC */
                                if (unlikely(vdev->ready == 
DEVICE_MAC_LEARNING) && tx_count) {
diff --git a/examples/vhost/main.c b/examples/vhost/main.c
index ab649bf14..e9b306af3 100644
--- a/examples/vhost/main.c
+++ b/examples/vhost/main.c
@@ -1092,7 +1092,7 @@ drain_virtio_tx(struct vhost_dev *vdev)
                                        pkts, MAX_PKT_BURST);
        } else {
                count = rte_vhost_dequeue_burst(vdev->vid, VIRTIO_TXQ,
-                                       mbuf_pool, pkts, MAX_PKT_BURST);
+                                       mbuf_pool, NULL, pkts, MAX_PKT_BURST);
        }
 
        /* setup VMDq for the first packet */
diff --git a/lib/librte_vhost/rte_vhost.h b/lib/librte_vhost/rte_vhost.h
index 19474bca0..b05fd8e2a 100644
--- a/lib/librte_vhost/rte_vhost.h
+++ b/lib/librte_vhost/rte_vhost.h
@@ -593,6 +593,8 @@ uint16_t rte_vhost_enqueue_burst(int vid, uint16_t queue_id,
  *  virtio queue index in mq case
  * @param mbuf_pool
  *  mbuf_pool where host mbuf is allocated.
+ * @param mbuf_pool_large
+ *  mbuf_pool where larger host mbuf is allocated.
  * @param pkts
  *  array to contain packets to be dequeued
  * @param count
@@ -601,7 +603,8 @@ uint16_t rte_vhost_enqueue_burst(int vid, uint16_t queue_id,
  *  num of packets dequeued
  */
 uint16_t rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
-       struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count);
+       struct rte_mempool *mbuf_pool, struct rte_mempool *mbuf_pool_large,
+       struct rte_mbuf **pkts, uint16_t count);
 
 /**
  * Get guest mem table: a list of memory regions.
diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c
index 5b85b832d..da9d77732 100644
--- a/lib/librte_vhost/virtio_net.c
+++ b/lib/librte_vhost/virtio_net.c
@@ -1291,10 +1291,12 @@ get_zmbuf(struct vhost_virtqueue *vq)
 
 static __rte_noinline uint16_t
 virtio_dev_tx_split(struct virtio_net *dev, struct vhost_virtqueue *vq,
-       struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count)
+       struct rte_mempool *mbuf_pool, struct rte_mempool *mbuf_pool_large,
+       struct rte_mbuf **pkts, uint16_t count)
 {
        uint16_t i;
        uint16_t free_entries;
+       uint16_t mbuf_avail;
 
        if (unlikely(dev->dequeue_zero_copy)) {
                struct zcopy_mbuf *zmbuf, *next;
@@ -1340,32 +1342,42 @@ virtio_dev_tx_split(struct virtio_net *dev, struct 
vhost_virtqueue *vq,
        VHOST_LOG_DEBUG(VHOST_DATA, "(%d) about to dequeue %u buffers\n",
                        dev->vid, count);
 
+       /* If the large mpool is provided, find the threshold */
+       mbuf_avail = 0;
+       if (mbuf_pool_large)
+               mbuf_avail = rte_pktmbuf_data_room_size(mbuf_pool) - 
RTE_PKTMBUF_HEADROOM;
+
        for (i = 0; i < count; i++) {
                struct buf_vector buf_vec[BUF_VECTOR_MAX];
                uint16_t head_idx;
-               uint32_t dummy_len;
+               uint32_t buf_len;
                uint16_t nr_vec = 0;
+               struct rte_mempool *mpool;
                int err;
 
                if (unlikely(fill_vec_buf_split(dev, vq,
                                                vq->last_avail_idx + i,
                                                &nr_vec, buf_vec,
-                                               &head_idx, &dummy_len,
+                                               &head_idx, &buf_len,
                                                VHOST_ACCESS_RO) < 0))
                        break;
 
                if (likely(dev->dequeue_zero_copy == 0))
                        update_shadow_used_ring_split(vq, head_idx, 0);
 
-               pkts[i] = rte_pktmbuf_alloc(mbuf_pool);
+               if (mbuf_pool_large && buf_len > mbuf_avail)
+                       mpool = mbuf_pool_large;
+               else
+                       mpool = mbuf_pool;
+
+               pkts[i] = rte_pktmbuf_alloc(mpool);
                if (unlikely(pkts[i] == NULL)) {
                        RTE_LOG(ERR, VHOST_DATA,
                                "Failed to allocate memory for mbuf.\n");
                        break;
                }
 
-               err = copy_desc_to_mbuf(dev, vq, buf_vec, nr_vec, pkts[i],
-                               mbuf_pool);
+               err = copy_desc_to_mbuf(dev, vq, buf_vec, nr_vec, pkts[i], 
mpool);
                if (unlikely(err)) {
                        rte_pktmbuf_free(pkts[i]);
                        break;
@@ -1411,9 +1423,11 @@ virtio_dev_tx_split(struct virtio_net *dev, struct 
vhost_virtqueue *vq,
 
 static __rte_noinline uint16_t
 virtio_dev_tx_packed(struct virtio_net *dev, struct vhost_virtqueue *vq,
-       struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count)
+       struct rte_mempool *mbuf_pool, struct rte_mempool *mbuf_pool_large,
+       struct rte_mbuf **pkts, uint16_t count)
 {
        uint16_t i;
+       uint16_t mbuf_avail;
 
        if (unlikely(dev->dequeue_zero_copy)) {
                struct zcopy_mbuf *zmbuf, *next;
@@ -1448,17 +1462,23 @@ virtio_dev_tx_packed(struct virtio_net *dev, struct 
vhost_virtqueue *vq,
        VHOST_LOG_DEBUG(VHOST_DATA, "(%d) about to dequeue %u buffers\n",
                        dev->vid, count);
 
+       /* If the large mpool is provided, find the threshold */
+       mbuf_avail = 0;
+       if (mbuf_pool_large)
+               mbuf_avail = rte_pktmbuf_data_room_size(mbuf_pool) - 
RTE_PKTMBUF_HEADROOM;
+
        for (i = 0; i < count; i++) {
                struct buf_vector buf_vec[BUF_VECTOR_MAX];
                uint16_t buf_id;
-               uint32_t dummy_len;
+               uint32_t buf_len;
                uint16_t desc_count, nr_vec = 0;
+               struct rte_mempool *mpool;
                int err;
 
                if (unlikely(fill_vec_buf_packed(dev, vq,
                                                vq->last_avail_idx, &desc_count,
                                                buf_vec, &nr_vec,
-                                               &buf_id, &dummy_len,
+                                               &buf_id, &buf_len,
                                                VHOST_ACCESS_RO) < 0))
                        break;
 
@@ -1466,15 +1486,19 @@ virtio_dev_tx_packed(struct virtio_net *dev, struct 
vhost_virtqueue *vq,
                        update_shadow_used_ring_packed(vq, buf_id, 0,
                                        desc_count);
 
-               pkts[i] = rte_pktmbuf_alloc(mbuf_pool);
+               if (mbuf_pool_large && buf_len > mbuf_avail)
+                       mpool = mbuf_pool_large;
+               else
+                       mpool = mbuf_pool;
+
+               pkts[i] = rte_pktmbuf_alloc(mpool);
                if (unlikely(pkts[i] == NULL)) {
                        RTE_LOG(ERR, VHOST_DATA,
                                "Failed to allocate memory for mbuf.\n");
                        break;
                }
 
-               err = copy_desc_to_mbuf(dev, vq, buf_vec, nr_vec, pkts[i],
-                               mbuf_pool);
+               err = copy_desc_to_mbuf(dev, vq, buf_vec, nr_vec, pkts[i], 
mpool);
                if (unlikely(err)) {
                        rte_pktmbuf_free(pkts[i]);
                        break;
@@ -1526,7 +1550,8 @@ virtio_dev_tx_packed(struct virtio_net *dev, struct 
vhost_virtqueue *vq,
 
 uint16_t
 rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
-       struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count)
+       struct rte_mempool *mbuf_pool, struct rte_mempool *mbuf_pool_large,
+       struct rte_mbuf **pkts, uint16_t count)
 {
        struct virtio_net *dev;
        struct rte_mbuf *rarp_mbuf = NULL;
@@ -1598,9 +1623,11 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
        }
 
        if (vq_is_packed(dev))
-               count = virtio_dev_tx_packed(dev, vq, mbuf_pool, pkts, count);
+               count = virtio_dev_tx_packed(dev, vq, mbuf_pool, 
mbuf_pool_large, pkts,
+                                     count);
        else
-               count = virtio_dev_tx_split(dev, vq, mbuf_pool, pkts, count);
+               count = virtio_dev_tx_split(dev, vq, mbuf_pool, 
mbuf_pool_large, pkts,
+                                    count);
 
 out:
        if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
-- 
2.20.1

Reply via email to