Implement code to dequeue and process descriptors from
the vring if VIRTIO_F_RING_PACKED is enabled.

Check if descriptor was made available by driver by looking at
VIRTIO_F_DESC_AVAIL flag in descriptor. If so dequeue and set
the used flag VIRTIO_F_DESC_USED to the current value of the
used wrap counter.

Used ring wrap counter needs to be toggled when last descriptor is
written out. This allows the host/guest to detect new descriptors even
after the ring has wrapped.

Signed-off-by: Jens Freimann <jfreim...@redhat.com>
---
 lib/librte_vhost/vhost.c      |   1 +
 lib/librte_vhost/virtio_net.c | 220 ++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 221 insertions(+)

diff --git a/lib/librte_vhost/vhost.c b/lib/librte_vhost/vhost.c
index 5ddf55ed9..f7989cfbd 100644
--- a/lib/librte_vhost/vhost.c
+++ b/lib/librte_vhost/vhost.c
@@ -185,6 +185,7 @@ init_vring_queue(struct virtio_net *dev, uint32_t vring_idx)
 
        vq->kickfd = VIRTIO_UNINITIALIZED_EVENTFD;
        vq->callfd = VIRTIO_UNINITIALIZED_EVENTFD;
+       vq->used_wrap_counter = 1;
 
        vhost_user_iotlb_init(dev, vring_idx);
        /* Backends are set to -1 indicating an inactive device. */
diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c
index ed7198dbb..9a9ff92f9 100644
--- a/lib/librte_vhost/virtio_net.c
+++ b/lib/librte_vhost/virtio_net.c
@@ -19,6 +19,7 @@
 
 #include "iotlb.h"
 #include "vhost.h"
+#include "virtio-packed.h"
 
 #define MAX_PKT_BURST 32
 
@@ -1118,6 +1119,221 @@ restore_mbuf(struct rte_mbuf *m)
        }
 }
 
+static inline uint16_t
+dequeue_desc_packed(struct virtio_net *dev, struct vhost_virtqueue *vq,
+            struct rte_mempool *mbuf_pool, struct rte_mbuf *m,
+            struct vring_desc_packed *descs)
+{
+       struct vring_desc_packed *desc;
+       uint64_t desc_addr;
+       uint32_t desc_avail, desc_offset;
+       uint32_t mbuf_avail, mbuf_offset;
+       uint32_t cpy_len;
+       struct rte_mbuf *cur = m, *prev = m;
+       struct virtio_net_hdr *hdr = NULL;
+       uint16_t head_idx = vq->last_used_idx & (vq->size - 1);
+       int wrap_counter = vq->used_wrap_counter;
+       int rc = 0;
+
+       if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
+               vhost_user_iotlb_rd_lock(vq);
+
+       desc = &descs[vq->last_used_idx & (vq->size - 1)];
+       if (unlikely((desc->len < dev->vhost_hlen)) ||
+               (desc->flags & VRING_DESC_F_INDIRECT)) {
+                       RTE_LOG(ERR, VHOST_DATA,
+                               "INDIRECT not supported yet\n");
+                       rc = -1;
+                       goto out;
+       }
+
+       desc_addr = vhost_iova_to_vva(dev, vq, desc->addr,
+                                     sizeof(*desc), VHOST_ACCESS_RO);
+
+       if (unlikely(!desc_addr)) {
+               rc = -1;
+               goto out;
+       }
+
+       if (virtio_net_with_host_offload(dev)) {
+               hdr = (struct virtio_net_hdr *)((uintptr_t)desc_addr);
+               rte_prefetch0(hdr);
+       }
+
+       /*
+        * A virtio driver normally uses at least 2 desc buffers
+        * for Tx: the first for storing the header, and others
+        * for storing the data.
+        */
+       if (likely((desc->len == dev->vhost_hlen) &&
+                  (desc->flags & VRING_DESC_F_NEXT) != 0)) {
+               if ((++vq->last_used_idx & (vq->size - 1)) == 0)
+                       toggle_wrap_counter(vq);
+
+               desc = &descs[vq->last_used_idx & (vq->size - 1)];
+
+               desc_addr = vhost_iova_to_vva(dev, vq, desc->addr,
+                                             sizeof(*desc), VHOST_ACCESS_RO);
+               if (unlikely(!desc_addr)) {
+                       rc = -1;
+                       goto out;
+               }
+
+               desc_offset = 0;
+               desc_avail  = desc->len;
+       } else {
+               desc_avail  = desc->len - dev->vhost_hlen;
+               desc_offset = dev->vhost_hlen;
+       }
+
+       rte_prefetch0((void *)(uintptr_t)(desc_addr + desc_offset));
+
+       PRINT_PACKET(dev, (uintptr_t)(desc_addr + desc_offset), desc_avail, 0);
+
+       mbuf_offset = 0;
+       mbuf_avail  = m->buf_len - RTE_PKTMBUF_HEADROOM;
+       while (1) {
+               uint64_t hpa;
+
+               cpy_len = RTE_MIN(desc_avail, mbuf_avail);
+
+               /*
+                * A desc buf might across two host physical pages that are
+                * not continuous. In such case (gpa_to_hpa returns 0), data
+                * will be copied even though zero copy is enabled.
+                */
+               if (unlikely(dev->dequeue_zero_copy && (hpa = gpa_to_hpa(dev,
+                                       desc->addr + desc_offset, cpy_len)))) {
+                       cur->data_len = cpy_len;
+                       cur->data_off = 0;
+                       cur->buf_addr = (void *)(uintptr_t)desc_addr;
+                       cur->buf_physaddr = hpa;
+
+                       /*
+                        * In zero copy mode, one mbuf can only reference data
+                        * for one or partial of one desc buff.
+                        */
+                       mbuf_avail = cpy_len;
+               } else {
+                       rte_memcpy(rte_pktmbuf_mtod_offset(cur, void *,
+                                                          mbuf_offset),
+                               (void *)((uintptr_t)(desc_addr + desc_offset)),
+                               cpy_len);
+               }
+
+               mbuf_avail  -= cpy_len;
+               mbuf_offset += cpy_len;
+               desc_avail  -= cpy_len;
+               desc_offset += cpy_len;
+
+               /* This desc reaches to its end, get the next one */
+               if (desc_avail == 0) {
+                       if ((desc->flags & VRING_DESC_F_NEXT) == 0)
+                               break;
+
+                       if ((++vq->last_used_idx & (vq->size - 1)) == 0)
+                               toggle_wrap_counter(vq);
+
+                       desc = &descs[vq->last_used_idx & (vq->size - 1)];
+
+                       desc_addr = vhost_iova_to_vva(dev, vq, desc->addr,
+                                                     sizeof(*desc),
+                                                     VHOST_ACCESS_RO);
+                       if (unlikely(!desc_addr))
+                               break;
+
+                       rte_prefetch0((void *)(uintptr_t)desc_addr);
+
+                       desc_offset = 0;
+                       desc_avail  = desc->len;
+
+                       PRINT_PACKET(dev, (uintptr_t)desc_addr, desc->len, 0);
+               }
+
+               /*
+                * This mbuf reaches to its end, get a new one
+                * to hold more data.
+                */
+               if (mbuf_avail == 0) {
+                       cur = rte_pktmbuf_alloc(mbuf_pool);
+                       if (unlikely(cur == NULL)) {
+                               RTE_LOG(ERR, VHOST_DATA, "Failed to "
+                                       "allocate memory for mbuf.\n");
+                               break;
+                       }
+
+                       prev->next = cur;
+                       prev->data_len = mbuf_offset;
+                       m->nb_segs += 1;
+                       m->pkt_len += mbuf_offset;
+                       prev = cur;
+
+                       mbuf_offset = 0;
+                       mbuf_avail  = cur->buf_len - RTE_PKTMBUF_HEADROOM;
+               }
+       }
+
+       if (hdr)
+               vhost_dequeue_offload(hdr, m);
+
+       if ((++vq->last_used_idx & (vq->size - 1)) == 0)
+               toggle_wrap_counter(vq);
+
+       rte_smp_wmb();
+       _set_desc_used(&descs[head_idx], wrap_counter);
+
+       prev->data_len = mbuf_offset;
+       m->pkt_len    += mbuf_offset;
+
+out:
+       if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
+               vhost_user_iotlb_rd_unlock(vq);
+
+       return rc;
+}
+
+static inline uint16_t
+vhost_dequeue_burst_packed(struct virtio_net *dev, struct vhost_virtqueue *vq,
+                       struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts,
+                       uint16_t count)
+{
+       uint16_t i = 0;
+       uint16_t idx;
+       struct vring_desc_packed *desc = vq->desc_packed;
+       int err;
+
+       rte_spinlock_lock(&vq->access_lock);
+       if (unlikely(vq->enabled == 0))
+               goto out;
+
+       count = RTE_MIN(MAX_PKT_BURST, count);
+
+       for (i = 0; i < count; i++) {
+               idx = vq->last_used_idx & (vq->size - 1);
+               if (!desc_is_avail(vq, &desc[idx]))
+                       break;
+               rte_smp_rmb();
+
+               pkts[i] = rte_pktmbuf_alloc(mbuf_pool);
+               if (unlikely(pkts[i] == NULL)) {
+                       RTE_LOG(ERR, VHOST_DATA,
+                               "Failed to allocate memory for mbuf.\n");
+                       break;
+               }
+
+               err = dequeue_desc_packed(dev, vq, mbuf_pool, pkts[i], desc);
+               if (unlikely(err)) {
+                       rte_pktmbuf_free(pkts[i]);
+                       break;
+               }
+       }
+
+out:
+       rte_spinlock_unlock(&vq->access_lock);
+
+       return i;
+}
+
 uint16_t
 rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
        struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count)
@@ -1150,6 +1366,10 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
 
        vq = dev->virtqueue[queue_id];
 
+       if (vq_is_packed(dev))
+               return vhost_dequeue_burst_packed(dev, vq, mbuf_pool,
+                                                 pkts, count);
+
        if (unlikely(rte_spinlock_trylock(&vq->access_lock) == 0))
                return 0;
 
-- 
2.14.3

Reply via email to