This patch implements basic batched processing of tx virtqueue by
prefetching desc indices and updating used ring in a batch. For
non-zerocopy case, vq->heads were used for storing the prefetched
indices and updating used ring. It is also a requirement for doing
more batching on top. For zerocopy case and for simplicity, batched
processing were simply disabled by only fetching and processing one
descriptor at a time, this could be optimized in the future.

XDP_DROP (without touching skb) on tun (with Moongen in guest) with
zercopy disabled:

Intel(R) Xeon(R) CPU E5-2650 0 @ 2.00GHz:
Before: 3.20Mpps
After:  3.90Mpps (+22%)

No differences were seen with zerocopy enabled.

Signed-off-by: Jason Wang <jasow...@redhat.com>
---
 drivers/vhost/net.c   | 215 ++++++++++++++++++++++++++++----------------------
 drivers/vhost/vhost.c |   2 +-
 2 files changed, 121 insertions(+), 96 deletions(-)

diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index c89640e..c439892 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -408,27 +408,25 @@ static int vhost_net_enable_vq(struct vhost_net *n,
        return vhost_poll_start(poll, sock->file);
 }
 
-static int vhost_net_tx_get_vq_desc(struct vhost_net *net,
-                                   struct vhost_virtqueue *vq,
-                                   struct iovec iov[], unsigned int iov_size,
-                                   unsigned int *out_num, unsigned int *in_num)
+static bool vhost_net_tx_avail(struct vhost_net *net,
+                              struct vhost_virtqueue *vq)
 {
        unsigned long uninitialized_var(endtime);
-       int r = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov),
-                                 out_num, in_num, NULL, NULL);
 
-       if (r == vq->num && vq->busyloop_timeout) {
-               preempt_disable();
-               endtime = busy_clock() + vq->busyloop_timeout;
-               while (vhost_can_busy_poll(vq->dev, endtime) &&
-                      vhost_vq_avail_empty(vq->dev, vq))
-                       cpu_relax();
-               preempt_enable();
-               r = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov),
-                                     out_num, in_num, NULL, NULL);
-       }
+       if (!vq->busyloop_timeout)
+               return false;
 
-       return r;
+       if (!vhost_vq_avail_empty(vq->dev, vq))
+               return true;
+
+       preempt_disable();
+       endtime = busy_clock() + vq->busyloop_timeout;
+       while (vhost_can_busy_poll(vq->dev, endtime) &&
+               vhost_vq_avail_empty(vq->dev, vq))
+               cpu_relax();
+       preempt_enable();
+
+       return !vhost_vq_avail_empty(vq->dev, vq);
 }
 
 static bool vhost_exceeds_maxpend(struct vhost_net *net)
@@ -446,8 +444,9 @@ static void handle_tx(struct vhost_net *net)
 {
        struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_TX];
        struct vhost_virtqueue *vq = &nvq->vq;
+       struct vring_used_elem used, *heads = vq->heads;
        unsigned out, in;
-       int head;
+       int avails, head;
        struct msghdr msg = {
                .msg_name = NULL,
                .msg_namelen = 0,
@@ -461,6 +460,7 @@ static void handle_tx(struct vhost_net *net)
        struct socket *sock;
        struct vhost_net_ubuf_ref *uninitialized_var(ubufs);
        bool zcopy, zcopy_used;
+       int i, batched = VHOST_NET_BATCH;
 
        mutex_lock(&vq->mutex);
        sock = vq->private_data;
@@ -475,6 +475,12 @@ static void handle_tx(struct vhost_net *net)
        hdr_size = nvq->vhost_hlen;
        zcopy = nvq->ubufs;
 
+       /* Disable zerocopy batched fetching for simplicity */
+       if (zcopy) {
+               heads = &used;
+               batched = 1;
+       }
+
        for (;;) {
                /* Release DMAs done buffers first */
                if (zcopy)
@@ -486,95 +492,114 @@ static void handle_tx(struct vhost_net *net)
                if (unlikely(vhost_exceeds_maxpend(net)))
                        break;
 
-               head = vhost_net_tx_get_vq_desc(net, vq, vq->iov,
-                                               ARRAY_SIZE(vq->iov),
-                                               &out, &in);
+               avails = vhost_prefetch_desc_indices(vq, heads, batched, 
!zcopy);
                /* On error, stop handling until the next kick. */
-               if (unlikely(head < 0))
+               if (unlikely(avails < 0))
                        break;
-               /* Nothing new?  Wait for eventfd to tell us they refilled. */
-               if (head == vq->num) {
+               /* Nothing new?  Busy poll for a while or wait for
+                * eventfd to tell us they refilled. */
+               if (!avails) {
+                       if (vhost_net_tx_avail(net, vq))
+                               continue;
                        if (unlikely(vhost_enable_notify(&net->dev, vq))) {
                                vhost_disable_notify(&net->dev, vq);
                                continue;
                        }
                        break;
                }
-               if (in) {
-                       vq_err(vq, "Unexpected descriptor format for TX: "
-                              "out %d, int %d\n", out, in);
-                       break;
-               }
-               /* Skip header. TODO: support TSO. */
-               len = iov_length(vq->iov, out);
-               iov_iter_init(&msg.msg_iter, WRITE, vq->iov, out, len);
-               iov_iter_advance(&msg.msg_iter, hdr_size);
-               /* Sanity check */
-               if (!msg_data_left(&msg)) {
-                       vq_err(vq, "Unexpected header len for TX: "
-                              "%zd expected %zd\n",
-                              len, hdr_size);
-                       break;
-               }
-               len = msg_data_left(&msg);
-
-               zcopy_used = zcopy && len >= VHOST_GOODCOPY_LEN
-                                  && (nvq->upend_idx + 1) % UIO_MAXIOV !=
-                                     nvq->done_idx
-                                  && vhost_net_tx_select_zcopy(net);
-
-               /* use msg_control to pass vhost zerocopy ubuf info to skb */
-               if (zcopy_used) {
-                       struct ubuf_info *ubuf;
-                       ubuf = nvq->ubuf_info + nvq->upend_idx;
-
-                       vq->heads[nvq->upend_idx].id = cpu_to_vhost32(vq, head);
-                       vq->heads[nvq->upend_idx].len = VHOST_DMA_IN_PROGRESS;
-                       ubuf->callback = vhost_zerocopy_callback;
-                       ubuf->ctx = nvq->ubufs;
-                       ubuf->desc = nvq->upend_idx;
-                       refcount_set(&ubuf->refcnt, 1);
-                       msg.msg_control = ubuf;
-                       msg.msg_controllen = sizeof(ubuf);
-                       ubufs = nvq->ubufs;
-                       atomic_inc(&ubufs->refcount);
-                       nvq->upend_idx = (nvq->upend_idx + 1) % UIO_MAXIOV;
-               } else {
-                       msg.msg_control = NULL;
-                       ubufs = NULL;
-               }
+               for (i = 0; i < avails; i++) {
+                       head = __vhost_get_vq_desc(vq, vq->iov,
+                                                  ARRAY_SIZE(vq->iov),
+                                                  &out, &in, NULL, NULL,
+                                              vhost16_to_cpu(vq, heads[i].id));
+                       if (in) {
+                               vq_err(vq, "Unexpected descriptor format for "
+                                          "TX: out %d, int %d\n", out, in);
+                               goto out;
+                       }
 
-               total_len += len;
-               if (total_len < VHOST_NET_WEIGHT &&
-                   !vhost_vq_avail_empty(&net->dev, vq) &&
-                   likely(!vhost_exceeds_maxpend(net))) {
-                       msg.msg_flags |= MSG_MORE;
-               } else {
-                       msg.msg_flags &= ~MSG_MORE;
-               }
+                       /* Skip header. TODO: support TSO. */
+                       len = iov_length(vq->iov, out);
+                       iov_iter_init(&msg.msg_iter, WRITE, vq->iov, out, len);
+                       iov_iter_advance(&msg.msg_iter, hdr_size);
+                       /* Sanity check */
+                       if (!msg_data_left(&msg)) {
+                               vq_err(vq, "Unexpected header len for TX: "
+                                       "%zd expected %zd\n",
+                                       len, hdr_size);
+                               goto out;
+                       }
+                       len = msg_data_left(&msg);
 
-               /* TODO: Check specific error and bomb out unless ENOBUFS? */
-               err = sock->ops->sendmsg(sock, &msg, len);
-               if (unlikely(err < 0)) {
+                       zcopy_used = zcopy && len >= VHOST_GOODCOPY_LEN
+                                    && (nvq->upend_idx + 1) % UIO_MAXIOV !=
+                                       nvq->done_idx
+                                    && vhost_net_tx_select_zcopy(net);
+
+                       /* use msg_control to pass vhost zerocopy ubuf
+                        * info to skb
+                        */
                        if (zcopy_used) {
-                               vhost_net_ubuf_put(ubufs);
-                               nvq->upend_idx = ((unsigned)nvq->upend_idx - 1)
-                                       % UIO_MAXIOV;
+                               struct ubuf_info *ubuf;
+                               ubuf = nvq->ubuf_info + nvq->upend_idx;
+
+                               vq->heads[nvq->upend_idx].id =
+                                       cpu_to_vhost32(vq, head);
+                               vq->heads[nvq->upend_idx].len =
+                                       VHOST_DMA_IN_PROGRESS;
+                               ubuf->callback = vhost_zerocopy_callback;
+                               ubuf->ctx = nvq->ubufs;
+                               ubuf->desc = nvq->upend_idx;
+                               refcount_set(&ubuf->refcnt, 1);
+                               msg.msg_control = ubuf;
+                               msg.msg_controllen = sizeof(ubuf);
+                               ubufs = nvq->ubufs;
+                               atomic_inc(&ubufs->refcount);
+                               nvq->upend_idx =
+                                       (nvq->upend_idx + 1) % UIO_MAXIOV;
+                       } else {
+                               msg.msg_control = NULL;
+                               ubufs = NULL;
+                       }
+
+                       total_len += len;
+                       if (total_len < VHOST_NET_WEIGHT &&
+                               !vhost_vq_avail_empty(&net->dev, vq) &&
+                               likely(!vhost_exceeds_maxpend(net))) {
+                               msg.msg_flags |= MSG_MORE;
+                       } else {
+                               msg.msg_flags &= ~MSG_MORE;
+                       }
+
+                       /* TODO: Check specific error and bomb out
+                        * unless ENOBUFS?
+                        */
+                       err = sock->ops->sendmsg(sock, &msg, len);
+                       if (unlikely(err < 0)) {
+                               if (zcopy_used) {
+                                       vhost_net_ubuf_put(ubufs);
+                                       nvq->upend_idx =
+                                  ((unsigned)nvq->upend_idx - 1) % UIO_MAXIOV;
+                               }
+                               vhost_discard_vq_desc(vq, 1);
+                               goto out;
+                       }
+                       if (err != len)
+                               pr_debug("Truncated TX packet: "
+                                       " len %d != %zd\n", err, len);
+                       if (!zcopy) {
+                               vhost_add_used_idx(vq, 1);
+                               vhost_signal(&net->dev, vq);
+                       } else if (!zcopy_used) {
+                               vhost_add_used_and_signal(&net->dev,
+                                                         vq, head, 0);
+                       } else
+                               vhost_zerocopy_signal_used(net, vq);
+                       vhost_net_tx_packet(net);
+                       if (unlikely(total_len >= VHOST_NET_WEIGHT)) {
+                               vhost_poll_queue(&vq->poll);
+                               goto out;
                        }
-                       vhost_discard_vq_desc(vq, 1);
-                       break;
-               }
-               if (err != len)
-                       pr_debug("Truncated TX packet: "
-                                " len %d != %zd\n", err, len);
-               if (!zcopy_used)
-                       vhost_add_used_and_signal(&net->dev, vq, head, 0);
-               else
-                       vhost_zerocopy_signal_used(net, vq);
-               vhost_net_tx_packet(net);
-               if (unlikely(total_len >= VHOST_NET_WEIGHT)) {
-                       vhost_poll_queue(&vq->poll);
-                       break;
                }
        }
 out:
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 6532cda..8764df5 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -392,7 +392,7 @@ static long vhost_dev_alloc_iovecs(struct vhost_dev *dev)
                vq->indirect = kmalloc(sizeof *vq->indirect * UIO_MAXIOV,
                                       GFP_KERNEL);
                vq->log = kmalloc(sizeof *vq->log * UIO_MAXIOV, GFP_KERNEL);
-               vq->heads = kmalloc(sizeof *vq->heads * UIO_MAXIOV, GFP_KERNEL);
+               vq->heads = kzalloc(sizeof *vq->heads * UIO_MAXIOV, GFP_KERNEL);
                if (!vq->indirect || !vq->log || !vq->heads)
                        goto err_nomem;
        }
-- 
2.7.4

Reply via email to