Hi,

On Tuesday, June 18, 2024 11:48:34 PM GMT+5:30 Sahil Siddiq wrote:
> [...]
>
>  hw/virtio/vhost-shadow-virtqueue.c | 124 ++++++++++++++++++++++++++++-
>  hw/virtio/vhost-shadow-virtqueue.h |  66 ++++++++++-----
>  2 files changed, 167 insertions(+), 23 deletions(-)
> 
> diff --git a/hw/virtio/vhost-shadow-virtqueue.c 
> b/hw/virtio/vhost-shadow-virtqueue.c
> index fc5f408f77..e3b276a9e9 100644
> --- a/hw/virtio/vhost-shadow-virtqueue.c
> +++ b/hw/virtio/vhost-shadow-virtqueue.c
> @@ -217,6 +217,122 @@ static bool vhost_svq_add_split(VhostShadowVirtqueue 
> *svq,
>      return true;
>  }
>  
> +/**
> + * Write descriptors to SVQ packed vring
> + *
> + * @svq: The shadow virtqueue
> + * @sg: Cache for hwaddr
> + * @out_sg: The iovec from the guest that is read-only for device
> + * @out_num: iovec length
> + * @in_sg: The iovec from the guest that is write-only for device
> + * @in_num: iovec length
> + * @head_flags: flags for first descriptor in list
> + *
> + * Return true if success, false otherwise and print error.
> + */
> +static bool vhost_svq_vring_write_descs_packed(VhostShadowVirtqueue *svq, 
> hwaddr *sg,
> +                                        const struct iovec *out_sg, size_t 
> out_num,
> +                                        const struct iovec *in_sg, size_t 
> in_num,
> +                                        uint16_t *head_flags)
> +{
> +    uint16_t id, curr, head, i;
> +    unsigned n;
> +    struct vring_packed_desc *descs = svq->vring_packed.vring.desc;
> +    bool ok;
> +
> +    head = svq->vring_packed.next_avail_idx;
> +    i = head;
> +    id = svq->free_head;
> +    curr = id;
> +
> +    size_t num = out_num + in_num;
> +
> +    if (num == 0) {
> +        return true;
> +    }
> +
> +    ok = vhost_svq_translate_addr(svq, sg, out_sg, out_num);
> +    if (unlikely(!ok)) {
> +        return false;
> +    }
> +
> +    ok = vhost_svq_translate_addr(svq, sg + out_num, in_sg, in_num);
> +    if (unlikely(!ok)) {
> +        return false;
> +    }
> +
> +    for (n = 0; n < num; n++) {
> +        uint16_t flags = cpu_to_le16(svq->vring_packed.avail_used_flags |
> +                (n < out_num ? 0 : VRING_DESC_F_WRITE) |
> +                (n + 1 == num ? 0 : VRING_DESC_F_NEXT));
> +        if (i == head) {
> +            *head_flags = flags;
> +        } else {
> +            descs[i].flags = flags;
> +        }
> +
> +        descs[i].addr = cpu_to_le64(sg[n]);
> +        descs[i].id = id;
> +        if (n < out_num) {
> +            descs[i].len = cpu_to_le32(out_sg[n].iov_len);
> +        } else {
> +            descs[i].len = cpu_to_le32(in_sg[n - out_num].iov_len);
> +        }
> +
> +        curr = cpu_to_le16(svq->desc_next[curr]);

"curr" is being updated here, but descs[i].id is always set to id which doesn't 
change in
the loop. So all the descriptors in the chain will have the same id. I can't 
find anything
in the virtio specification [1] that suggests that all descriptors in the chain 
have the same
id. Also, going by the figure captioned "Three chained descriptors available" 
in the blog
post on packed virtqueues [2], it looks like the descriptors in the chain have 
different
buffer ids.

The virtio implementation in Linux also reuses the same id value for all the 
descriptors in a
single chain. I am not sure if I am missing something here.

> +        if (++i >= svq->vring_packed.vring.num) {
> +            i = 0;
> +            svq->vring_packed.avail_used_flags ^=
> +                    1 << VRING_PACKED_DESC_F_AVAIL |
> +                    1 << VRING_PACKED_DESC_F_USED;
> +        }
> +    }
> +
> +    if (i <= head) {
> +        svq->vring_packed.avail_wrap_counter ^= 1;
> +    }
> +
> +    svq->vring_packed.next_avail_idx = i;
> +    svq->free_head = curr;

Even though the same id is used, curr will not be id+1 here.

> +    return true;
> +}
> +
> +static bool vhost_svq_add_packed(VhostShadowVirtqueue *svq,
> +                                const struct iovec *out_sg, size_t out_num,
> +                                const struct iovec *in_sg, size_t in_num,
> +                                unsigned *head)
> +{
> +    bool ok;
> +    uint16_t head_flags = 0;
> +    g_autofree hwaddr *sgs = g_new(hwaddr, out_num + in_num);

I chose to use out_num+in_num as the size instead of MAX(ount_num, in_num). I
found it easier to implement "vhost_svq_vring_write_descs_packed()" like this.
Please let me know if this isn't feasible or ideal.

> +    *head = svq->vring_packed.next_avail_idx;
> +
> +    /* We need some descriptors here */
> +    if (unlikely(!out_num && !in_num)) {
> +        qemu_log_mask(LOG_GUEST_ERROR,
> +                      "Guest provided element with no descriptors");
> +        return false;
> +    }
> +
> +    ok = vhost_svq_vring_write_descs_packed(svq, sgs, out_sg, out_num,
> +                                            in_sg, in_num, &head_flags);
> +    if (unlikely(!ok)) {
> +        return false;
> +    }
> +
> +    /*
> +     * A driver MUST NOT make the first descriptor in the list
> +     * available before all subsequent descriptors comprising
> +     * the list are made available.
> +     */
> +    smp_wmb();
> +    svq->vring_packed.vring.desc[*head].flags = head_flags;
> +
> +    return true;
> +}
> +
>  static void vhost_svq_kick(VhostShadowVirtqueue *svq)
>  {
>      bool needs_kick;
> @@ -258,7 +374,13 @@ int vhost_svq_add(VhostShadowVirtqueue *svq, const 
> struct iovec *out_sg,
>          return -ENOSPC;
>      }
>  
> -    ok = vhost_svq_add_split(svq, out_sg, out_num, in_sg, in_num, 
> &qemu_head);
> +    if (virtio_vdev_has_feature(svq->vdev, VIRTIO_F_RING_PACKED)) {
> +        ok = vhost_svq_add_packed(svq, out_sg, out_num,
> +                                  in_sg, in_num, &qemu_head);
> +    } else {
> +        ok = vhost_svq_add_split(svq, out_sg, out_num,
> +                                 in_sg, in_num, &qemu_head);
> +    }
>      if (unlikely(!ok)) {
>          return -EINVAL;
>      }
> diff --git a/hw/virtio/vhost-shadow-virtqueue.h 
> b/hw/virtio/vhost-shadow-virtqueue.h
> index 19c842a15b..ee1a87f523 100644
> --- a/hw/virtio/vhost-shadow-virtqueue.h
> +++ b/hw/virtio/vhost-shadow-virtqueue.h
> @@ -46,10 +46,53 @@ typedef struct VhostShadowVirtqueueOps {
>      VirtQueueAvailCallback avail_handler;
>  } VhostShadowVirtqueueOps;
>  
> +struct vring_packed {
> +    /* Actual memory layout for this queue. */
> +    struct {
> +        unsigned int num;
> +        struct vring_packed_desc *desc;
> +        struct vring_packed_desc_event *driver;
> +        struct vring_packed_desc_event *device;
> +    } vring;
> +
> +    /* Avail used flags. */
> +    uint16_t avail_used_flags;
> +
> +    /* Index of the next avail descriptor. */
> +    uint16_t next_avail_idx;
> +
> +    /* Driver ring wrap counter */
> +    bool avail_wrap_counter;
> +};
> +
>  /* Shadow virtqueue to relay notifications */
>  typedef struct VhostShadowVirtqueue {
> +    /* Virtio queue shadowing */
> +    VirtQueue *vq;
> +
> +    /* Virtio device */
> +    VirtIODevice *vdev;
> +
> +    /* SVQ vring descriptors state */
> +    SVQDescState *desc_state;
> +
> +    /*
> +     * Backup next field for each descriptor so we can recover securely, not
> +     * needing to trust the device access.
> +     */
> +    uint16_t *desc_next;
> +
> +    /* Next free descriptor */
> +    uint16_t free_head;
> +
> +    /* Size of SVQ vring free descriptors */
> +    uint16_t num_free;
> +
>      /* Shadow vring */
> -    struct vring vring;
> +    union {
> +        struct vring vring;
> +        struct vring_packed vring_packed;
> +    };
>  
>      /* Shadow kick notifier, sent to vhost */
>      EventNotifier hdev_kick;
> @@ -69,27 +112,12 @@ typedef struct VhostShadowVirtqueue {
>      /* Guest's call notifier, where the SVQ calls guest. */
>      EventNotifier svq_call;
>  
> -    /* Virtio queue shadowing */
> -    VirtQueue *vq;
> -
> -    /* Virtio device */
> -    VirtIODevice *vdev;
> -
>      /* IOVA mapping */
>      VhostIOVATree *iova_tree;
>  
> -    /* SVQ vring descriptors state */
> -    SVQDescState *desc_state;
> -
>      /* Next VirtQueue element that guest made available */
>      VirtQueueElement *next_guest_avail_elem;
>  
> -    /*
> -     * Backup next field for each descriptor so we can recover securely, not
> -     * needing to trust the device access.
> -     */
> -    uint16_t *desc_next;
> -
>      /* Caller callbacks */
>      const VhostShadowVirtqueueOps *ops;
>  
> @@ -99,17 +127,11 @@ typedef struct VhostShadowVirtqueue {
>      /* Next head to expose to the device */
>      uint16_t shadow_avail_idx;
>  
> -    /* Next free descriptor */
> -    uint16_t free_head;
> -
>      /* Last seen used idx */
>      uint16_t shadow_used_idx;
>  
>      /* Next head to consume from the device */
>      uint16_t last_used_idx;
> -
> -    /* Size of SVQ vring free descriptors */
> -    uint16_t num_free;
>  } VhostShadowVirtqueue;
>  
>  bool vhost_svq_valid_features(uint64_t features, Error **errp);
> -- 
> 2.45.2
>

In "struct VhostShadowVirtqueue", I rearranged the order in which some members 
appear.
I tried to keep the members common to split and packed virtqueues above the 
union and
the rest below the union. I haven't entirely understood the role of some of the 
members
(for example, VhostShadowVirtqueueOps *ops). I'll change this ordering if need 
be as I
continue to understand them better.

For the next step, I think I should work on "vhost_svq_start()" which is where 
members of
the struct are actually initialized. At the moment, only the split ring part of 
the structure is
initialized.

I think I should also start working on enhancing "vhost_svq_kick()" to actually 
send the buffers
to the device. I think it'll be easier to test these changes once that's done 
(I am not sure about
this though). Would this involve implementing the notification mechanism and 
event_idx?

Thanks,
Sahil

[1] 
https://docs.oasis-open.org/virtio/virtio/v1.3/csd01/virtio-v1.3-csd01.html#x1-720008
[2] https://www.redhat.com/en/blog/packed-virtqueue-how-reduce-overhead-virtio



Reply via email to