This patch implements the vhost logic from scratch into a single function
designed for high performance and better maintainability.

Signed-off-by: Zhihong Wang <zhihong.wang at intel.com>
---
 lib/librte_vhost/vhost_rxtx.c | 212 ++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 205 insertions(+), 7 deletions(-)

diff --git a/lib/librte_vhost/vhost_rxtx.c b/lib/librte_vhost/vhost_rxtx.c
index 08a73fd..8e6d782 100644
--- a/lib/librte_vhost/vhost_rxtx.c
+++ b/lib/librte_vhost/vhost_rxtx.c
@@ -91,7 +91,7 @@ is_valid_virt_queue_idx(uint32_t idx, int is_tx, uint32_t 
qp_nb)
        return (is_tx ^ (idx & 1)) == 0 && idx < qp_nb * VIRTIO_QNUM;
 }

-static void
+static inline void __attribute__((always_inline))
 virtio_enqueue_offload(struct rte_mbuf *m_buf, struct virtio_net_hdr *net_hdr)
 {
        if (m_buf->ol_flags & PKT_TX_L4_MASK) {
@@ -533,19 +533,217 @@ virtio_dev_merge_rx(struct virtio_net *dev, uint16_t 
queue_id,
        return pkt_idx;
 }

+static inline uint32_t __attribute__((always_inline))
+loop_check(struct vhost_virtqueue *vq, uint16_t avail_idx, uint32_t pkt_left)
+{
+       if (pkt_left == 0 || avail_idx == vq->last_used_idx)
+               return 1;
+
+       return 0;
+}
+
+static inline uint32_t __attribute__((always_inline))
+enqueue_packet(struct virtio_net *dev, struct vhost_virtqueue *vq,
+               uint16_t avail_idx, struct rte_mbuf *mbuf,
+               uint32_t is_mrg_rxbuf)
+{
+       struct virtio_net_hdr_mrg_rxbuf *virtio_hdr;
+       struct vring_desc *desc;
+       uint64_t desc_host_write_addr = 0;
+       uint32_t desc_chain_head = 0;
+       uint32_t desc_chain_len = 0;
+       uint32_t desc_current = 0;
+       uint32_t desc_write_offset = 0;
+       uint32_t mbuf_len = 0;
+       uint32_t mbuf_len_left = 0;
+       uint32_t copy_len = 0;
+       uint32_t extra_buffers = 0;
+       uint32_t used_idx_round = 0;
+
+       /* start with the first mbuf of the packet */
+       mbuf_len = rte_pktmbuf_data_len(mbuf);
+       mbuf_len_left = mbuf_len;
+
+       /* get the current desc */
+       desc_current = vq->avail->ring[(vq->last_used_idx) & (vq->size - 1)];
+       desc_chain_head = desc_current;
+       desc = &vq->desc[desc_current];
+       desc_host_write_addr = gpa_to_vva(dev, desc->addr);
+       if (unlikely(!desc_host_write_addr))
+               goto error;
+
+       /* handle virtio header */
+       virtio_hdr = (struct virtio_net_hdr_mrg_rxbuf *)
+               (uintptr_t)desc_host_write_addr;
+       memset((void *)(uintptr_t)&(virtio_hdr->hdr),
+                       0, dev->vhost_hlen);
+       virtio_enqueue_offload(mbuf, &(virtio_hdr->hdr));
+       vhost_log_write(dev, desc->addr, dev->vhost_hlen);
+       desc_write_offset = dev->vhost_hlen;
+       desc_chain_len = desc_write_offset;
+       desc_host_write_addr += desc_write_offset;
+       if (is_mrg_rxbuf)
+               virtio_hdr->num_buffers = 1;
+
+       /* start copy from mbuf to desc */
+       while (1) {
+               /* get the next mbuf if the current done */
+               if (!mbuf_len_left) {
+                       if (mbuf->next) {
+                               mbuf = mbuf->next;
+                               mbuf_len = rte_pktmbuf_data_len(mbuf);
+                               mbuf_len_left = mbuf_len;
+                       } else
+                               break;
+               }
+
+               /* get the next desc if the current done */
+               if (desc->len <= desc_write_offset) {
+                       if (desc->flags & VRING_DESC_F_NEXT) {
+                               /* go on with the current desc chain */
+                               desc_write_offset = 0;
+                               desc_current = desc->next;
+                               desc = &vq->desc[desc_current];
+                               desc_host_write_addr =
+                                       gpa_to_vva(dev, desc->addr);
+                               if (unlikely(!desc_host_write_addr))
+                                       goto rollback;
+                       } else if (is_mrg_rxbuf) {
+                               /* start with the next desc chain */
+                               used_idx_round = vq->last_used_idx
+                                       & (vq->size - 1);
+                               vq->used->ring[used_idx_round].id =
+                                       desc_chain_head;
+                               vq->used->ring[used_idx_round].len =
+                                       desc_chain_len;
+                               vhost_log_used_vring(dev, vq,
+                                       offsetof(struct vring_used,
+                                               ring[used_idx_round]),
+                                       sizeof(vq->used->ring[
+                                               used_idx_round]));
+                               vq->last_used_idx++;
+                               extra_buffers++;
+                               virtio_hdr->num_buffers++;
+                               if (avail_idx == vq->last_used_idx)
+                                       goto rollback;
+
+                               desc_current =
+                                       vq->avail->ring[(vq->last_used_idx) &
+                                       (vq->size - 1)];
+                               desc_chain_head = desc_current;
+                               desc = &vq->desc[desc_current];
+                               desc_host_write_addr =
+                                       gpa_to_vva(dev, desc->addr);
+                               if (unlikely(!desc_host_write_addr))
+                                       goto rollback;
+
+                               desc_chain_len = 0;
+                               desc_write_offset = 0;
+                       } else
+                               goto rollback;
+               }
+
+               /* copy mbuf data */
+               copy_len = RTE_MIN(desc->len - desc_write_offset,
+                               mbuf_len_left);
+               rte_memcpy((void *)(uintptr_t)desc_host_write_addr,
+                               rte_pktmbuf_mtod_offset(mbuf, void *,
+                                       mbuf_len - mbuf_len_left),
+                               copy_len);
+               vhost_log_write(dev, desc->addr + desc_write_offset,
+                               copy_len);
+               mbuf_len_left -= copy_len;
+               desc_write_offset += copy_len;
+               desc_host_write_addr += copy_len;
+               desc_chain_len += copy_len;
+       }
+
+       used_idx_round = vq->last_used_idx & (vq->size - 1);
+       vq->used->ring[used_idx_round].id = desc_chain_head;
+       vq->used->ring[used_idx_round].len = desc_chain_len;
+       vhost_log_used_vring(dev, vq,
+               offsetof(struct vring_used, ring[used_idx_round]),
+               sizeof(vq->used->ring[used_idx_round]));
+       vq->last_used_idx++;
+
+       return 0;
+
+rollback:
+       /* rollback on any error if last_used_idx update on-the-fly */
+       if (is_mrg_rxbuf)
+               vq->last_used_idx -= extra_buffers;
+
+error:
+       return 1;
+}
+
+static inline void __attribute__((always_inline))
+notify_guest(struct virtio_net *dev, struct vhost_virtqueue *vq)
+{
+       rte_smp_wmb();
+       vq->used->idx = vq->last_used_idx;
+       vhost_log_used_vring(dev, vq, offsetof(struct vring_used, idx),
+                       sizeof(vq->used->idx));
+       rte_mb();
+       if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT)
+                       && (vq->callfd >= 0))
+               eventfd_write(vq->callfd, (eventfd_t)1);
+}
+
 uint16_t
 rte_vhost_enqueue_burst(int vid, uint16_t queue_id,
        struct rte_mbuf **pkts, uint16_t count)
 {
-       struct virtio_net *dev = get_device(vid);
+       struct vhost_virtqueue *vq;
+       struct virtio_net *dev;
+       uint32_t pkt_idx = 0;
+       uint32_t pkt_left = 0;
+       uint32_t pkt_sent = 0;
+       uint32_t is_mrg_rxbuf = 0;
+       uint16_t avail_idx = 0;
+
+       /* precheck */
+       if (unlikely(count == 0))
+               return 0;

-       if (!dev)
+       count = RTE_MIN((uint32_t)MAX_PKT_BURST, count);
+
+       dev = get_device(vid);
+       if (unlikely(!dev))
                return 0;

-       if (dev->features & (1 << VIRTIO_NET_F_MRG_RXBUF))
-               return virtio_dev_merge_rx(dev, queue_id, pkts, count);
-       else
-               return virtio_dev_rx(dev, queue_id, pkts, count);
+       if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->virt_qp_nb)))
+               return 0;
+
+       vq = dev->virtqueue[queue_id];
+       if (unlikely(!vq->enabled))
+               return 0;
+
+       if (dev->features & (1ULL << VIRTIO_NET_F_MRG_RXBUF))
+               is_mrg_rxbuf = 1;
+
+       /* start enqueuing packets 1 by 1 */
+       pkt_idx = 0;
+       pkt_left = count;
+       avail_idx = *((volatile uint16_t *)&vq->avail->idx);
+       while (1) {
+               if (loop_check(vq, avail_idx, pkt_left))
+                       break;
+
+               if (enqueue_packet(dev, vq, avail_idx, pkts[pkt_idx],
+                                       is_mrg_rxbuf))
+                       break;
+
+               pkt_idx++;
+               pkt_sent++;
+               pkt_left--;
+       }
+
+       /* update used idx and kick the guest if necessary */
+       if (pkt_sent)
+               notify_guest(dev, vq);
+
+       return pkt_sent;
 }

 static void
-- 
2.7.4

Reply via email to