On Wed, Dec 12, 2018 at 05:29:31PM +0800, jiangyiwen wrote:
> When vhost support VIRTIO_VSOCK_F_MRG_RXBUF feature,
> it will merge big packet into rx vq.
> 
> Signed-off-by: Yiwen Jiang <jiangyi...@huawei.com>

I feel this approach jumps into making interface changes for
optimizations too quickly. For example, what prevents us
from taking a big buffer, prepending each chunk
with the header and writing it out without
host/guest interface changes?

This should allow optimizations such as vhost_add_used_n
batching.

I realize a header in each packet does have a cost,
but it also has advantages such as improved robustness,
I'd like to see more of an apples to apples comparison
of the performance gain from skipping them.


> ---
>  drivers/vhost/vsock.c             | 111 
> ++++++++++++++++++++++++++++++--------
>  include/linux/virtio_vsock.h      |   1 +
>  include/uapi/linux/virtio_vsock.h |   5 ++
>  3 files changed, 94 insertions(+), 23 deletions(-)
> 
> diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
> index 34bc3ab..dc52b0f 100644
> --- a/drivers/vhost/vsock.c
> +++ b/drivers/vhost/vsock.c
> @@ -22,7 +22,8 @@
>  #define VHOST_VSOCK_DEFAULT_HOST_CID 2
> 
>  enum {
> -     VHOST_VSOCK_FEATURES = VHOST_FEATURES,
> +     VHOST_VSOCK_FEATURES = VHOST_FEATURES |
> +                     (1ULL << VIRTIO_VSOCK_F_MRG_RXBUF),
>  };
> 
>  /* Used to track all the vhost_vsock instances on the system. */
> @@ -80,6 +81,69 @@ static struct vhost_vsock *vhost_vsock_get(u32 guest_cid)
>       return vsock;
>  }
> 
> +/* This segment of codes are copied from drivers/vhost/net.c */
> +static int get_rx_bufs(struct vhost_virtqueue *vq,
> +             struct vring_used_elem *heads, int datalen,
> +             unsigned *iovcount, unsigned int quota)
> +{
> +     unsigned int out, in;
> +     int seg = 0;
> +     int headcount = 0;
> +     unsigned d;
> +     int ret;
> +     /*
> +      * len is always initialized before use since we are always called with
> +      * datalen > 0.
> +      */
> +     u32 uninitialized_var(len);
> +
> +     while (datalen > 0 && headcount < quota) {
> +             if (unlikely(seg >= UIO_MAXIOV)) {
> +                     ret = -ENOBUFS;
> +                     goto err;
> +             }
> +
> +             ret = vhost_get_vq_desc(vq, vq->iov + seg,
> +                             ARRAY_SIZE(vq->iov) - seg, &out,
> +                             &in, NULL, NULL);
> +             if (unlikely(ret < 0))
> +                     goto err;
> +
> +             d = ret;
> +             if (d == vq->num) {
> +                     ret = 0;
> +                     goto err;
> +             }
> +
> +             if (unlikely(out || in <= 0)) {
> +                     vq_err(vq, "unexpected descriptor format for RX: "
> +                                     "out %d, in %d\n", out, in);
> +                     ret = -EINVAL;
> +                     goto err;
> +             }
> +
> +             heads[headcount].id = cpu_to_vhost32(vq, d);
> +             len = iov_length(vq->iov + seg, in);
> +             heads[headcount].len = cpu_to_vhost32(vq, len);
> +             datalen -= len;
> +             ++headcount;
> +             seg += in;
> +     }
> +
> +     heads[headcount - 1].len = cpu_to_vhost32(vq, len + datalen);
> +     *iovcount = seg;
> +
> +     /* Detect overrun */
> +     if (unlikely(datalen > 0)) {
> +             ret = UIO_MAXIOV + 1;
> +             goto err;
> +     }
> +     return headcount;
> +err:
> +     vhost_discard_vq_desc(vq, headcount);
> +     return ret;
> +}
> +
>  static void
>  vhost_transport_do_send_pkt(struct vhost_vsock *vsock,
>                           struct vhost_virtqueue *vq)
> @@ -87,22 +151,34 @@ static struct vhost_vsock *vhost_vsock_get(u32 guest_cid)
>       struct vhost_virtqueue *tx_vq = &vsock->vqs[VSOCK_VQ_TX];
>       bool added = false;
>       bool restart_tx = false;
> +     int mergeable;
> +     size_t vsock_hlen;
> 
>       mutex_lock(&vq->mutex);
> 
>       if (!vq->private_data)
>               goto out;
> 
> +     mergeable = vhost_has_feature(vq, VIRTIO_VSOCK_F_MRG_RXBUF);
> +     /*
> +      * Guest fill page for rx vq in mergeable case, so it will not
> +      * allocate pkt structure, we should reserve size of pkt in advance.
> +      */
> +     if (likely(mergeable))
> +             vsock_hlen = sizeof(struct virtio_vsock_pkt);
> +     else
> +             vsock_hlen = sizeof(struct virtio_vsock_hdr);
> +
>       /* Avoid further vmexits, we're already processing the virtqueue */
>       vhost_disable_notify(&vsock->dev, vq);
> 
>       for (;;) {
>               struct virtio_vsock_pkt *pkt;
>               struct iov_iter iov_iter;
> -             unsigned out, in;
> +             unsigned out = 0, in = 0;
>               size_t nbytes;
>               size_t len;
> -             int head;
> +             s16 headcount;
> 
>               spin_lock_bh(&vsock->send_pkt_list_lock);
>               if (list_empty(&vsock->send_pkt_list)) {
> @@ -116,16 +192,9 @@ static struct vhost_vsock *vhost_vsock_get(u32 guest_cid)
>               list_del_init(&pkt->list);
>               spin_unlock_bh(&vsock->send_pkt_list_lock);
> 
> -             head = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov),
> -                                      &out, &in, NULL, NULL);
> -             if (head < 0) {
> -                     spin_lock_bh(&vsock->send_pkt_list_lock);
> -                     list_add(&pkt->list, &vsock->send_pkt_list);
> -                     spin_unlock_bh(&vsock->send_pkt_list_lock);
> -                     break;
> -             }
> -
> -             if (head == vq->num) {
> +             headcount = get_rx_bufs(vq, vq->heads, vsock_hlen + pkt->len,
> +                             &in, likely(mergeable) ? UIO_MAXIOV : 1);
> +             if (headcount <= 0) {
>                       spin_lock_bh(&vsock->send_pkt_list_lock);
>                       list_add(&pkt->list, &vsock->send_pkt_list);
>                       spin_unlock_bh(&vsock->send_pkt_list_lock);
> @@ -133,24 +202,20 @@ static struct vhost_vsock *vhost_vsock_get(u32 
> guest_cid)
>                       /* We cannot finish yet if more buffers snuck in while
>                        * re-enabling notify.
>                        */
> -                     if (unlikely(vhost_enable_notify(&vsock->dev, vq))) {
> +                     if (!headcount && 
> unlikely(vhost_enable_notify(&vsock->dev, vq))) {
>                               vhost_disable_notify(&vsock->dev, vq);
>                               continue;
>                       }
>                       break;
>               }
> 
> -             if (out) {
> -                     virtio_transport_free_pkt(pkt);
> -                     vq_err(vq, "Expected 0 output buffers, got %u\n", out);
> -                     break;
> -             }
> -
>               len = iov_length(&vq->iov[out], in);
>               iov_iter_init(&iov_iter, READ, &vq->iov[out], in, len);
> 
> -             nbytes = copy_to_iter(&pkt->hdr, sizeof(pkt->hdr), &iov_iter);
> -             if (nbytes != sizeof(pkt->hdr)) {
> +             if (likely(mergeable))
> +                     pkt->mrg_rxbuf_hdr.num_buffers = cpu_to_le16(headcount);
> +             nbytes = copy_to_iter(&pkt->hdr, vsock_hlen, &iov_iter);
> +             if (nbytes != vsock_hlen) {
>                       virtio_transport_free_pkt(pkt);
>                       vq_err(vq, "Faulted on copying pkt hdr\n");
>                       break;
> @@ -163,7 +228,7 @@ static struct vhost_vsock *vhost_vsock_get(u32 guest_cid)
>                       break;
>               }
> 
> -             vhost_add_used(vq, head, sizeof(pkt->hdr) + pkt->len);
> +             vhost_add_used_n(vq, vq->heads, headcount);
>               added = true;
> 
>               if (pkt->reply) {
> diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h
> index bf84418..da9e1fe 100644
> --- a/include/linux/virtio_vsock.h
> +++ b/include/linux/virtio_vsock.h
> @@ -50,6 +50,7 @@ struct virtio_vsock_sock {
> 
>  struct virtio_vsock_pkt {
>       struct virtio_vsock_hdr hdr;
> +     struct virtio_vsock_mrg_rxbuf_hdr mrg_rxbuf_hdr;
>       struct work_struct work;
>       struct list_head list;
>       /* socket refcnt not held, only use for cancellation */
> diff --git a/include/uapi/linux/virtio_vsock.h 
> b/include/uapi/linux/virtio_vsock.h
> index 1d57ed3..2292f30 100644
> --- a/include/uapi/linux/virtio_vsock.h
> +++ b/include/uapi/linux/virtio_vsock.h
> @@ -63,6 +63,11 @@ struct virtio_vsock_hdr {
>       __le32  fwd_cnt;
>  } __attribute__((packed));
> 
> +/* It add mergeable rx buffers feature */
> +struct virtio_vsock_mrg_rxbuf_hdr {
> +     __le16  num_buffers;    /* number of mergeable rx buffers */
> +} __attribute__((packed));
> +
>  enum virtio_vsock_type {
>       VIRTIO_VSOCK_TYPE_STREAM = 1,
>  };
> -- 
> 1.8.3.1
> 

Reply via email to