On Thu, Apr 14, 2022 at 11:10 AM Jason Wang <jasow...@redhat.com> wrote:
>
> On Thu, Apr 14, 2022 at 12:32 AM Eugenio Pérez <epere...@redhat.com> wrote:
> >
> > This allows qemu to inject packets to the device without guest's notice.

s/packets/buffers/ actually.

>
> Does it mean it can support guests without _F_ANNOUNCE?
>

Technically it is possible. We could inject the packet using this to
data virtqueues, but that implies that they start in SVQ mode
actually.

Once we have a way to transition from/to shadow virtqueue dynamically,
data virtqueues could start shadowed, the gratuitous ARP can be sent,
and then we can move to passthrough mode again.

> >
> > This will be use to inject net CVQ messages to restore status in the 
> > destination
>
> I guess for restoring, we should set cvq.ready = true but all other
> (TX/RX) as false before we complete the restoring? If yes, I don't see
> codes to do that.
>

Right, that will be ready for the next version.

> >
> > Signed-off-by: Eugenio Pérez <epere...@redhat.com>
> > ---
> >  hw/virtio/vhost-shadow-virtqueue.h |   5 +
> >  hw/virtio/vhost-shadow-virtqueue.c | 179 +++++++++++++++++++++++++----
> >  2 files changed, 160 insertions(+), 24 deletions(-)
> >
> > diff --git a/hw/virtio/vhost-shadow-virtqueue.h 
> > b/hw/virtio/vhost-shadow-virtqueue.h
> > index e06ac52158..2a5229e77f 100644
> > --- a/hw/virtio/vhost-shadow-virtqueue.h
> > +++ b/hw/virtio/vhost-shadow-virtqueue.h
> > @@ -17,6 +17,9 @@
> >
> >  typedef struct SVQElement {
> >      VirtQueueElement elem;
> > +    hwaddr in_iova;
> > +    hwaddr out_iova;
> > +    bool not_from_guest;
>
> Let's add a comment for those fields.
>

Sure.

> >  } SVQElement;
> >
> >  typedef void (*VirtQueueElementCallback)(VirtIODevice *vdev,
> > @@ -106,6 +109,8 @@ typedef struct VhostShadowVirtqueue {
> >
> >  bool vhost_svq_valid_features(uint64_t features, Error **errp);
> >
> > +bool vhost_svq_inject(VhostShadowVirtqueue *svq, const struct iovec *iov,
> > +                      size_t out_num, size_t in_num);
> >  void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd);
> >  void vhost_svq_set_svq_call_fd(VhostShadowVirtqueue *svq, int call_fd);
> >  void vhost_svq_get_vring_addr(const VhostShadowVirtqueue *svq,
> > diff --git a/hw/virtio/vhost-shadow-virtqueue.c 
> > b/hw/virtio/vhost-shadow-virtqueue.c
> > index 87980e2a9c..f3600df133 100644
> > --- a/hw/virtio/vhost-shadow-virtqueue.c
> > +++ b/hw/virtio/vhost-shadow-virtqueue.c
> > @@ -16,6 +16,7 @@
> >  #include "qemu/log.h"
> >  #include "qemu/memalign.h"
> >  #include "linux-headers/linux/vhost.h"
> > +#include "qemu/iov.h"
> >
> >  /**
> >   * Validate the transport device features that both guests can use with 
> > the SVQ
> > @@ -122,7 +123,8 @@ static bool vhost_svq_translate_addr(const 
> > VhostShadowVirtqueue *svq,
> >      return true;
> >  }
> >
> > -static bool vhost_svq_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr 
> > *sg,
> > +static bool vhost_svq_vring_write_descs(VhostShadowVirtqueue *svq,
> > +                                        SVQElement *svq_elem, hwaddr *sg,
> >                                          const struct iovec *iovec, size_t 
> > num,
> >                                          bool more_descs, bool write)
> >  {
> > @@ -130,15 +132,39 @@ static bool 
> > vhost_svq_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg,
> >      unsigned n;
> >      uint16_t flags = write ? cpu_to_le16(VRING_DESC_F_WRITE) : 0;
> >      vring_desc_t *descs = svq->vring.desc;
> > -    bool ok;
> >
> >      if (num == 0) {
> >          return true;
> >      }
> >
> > -    ok = vhost_svq_translate_addr(svq, sg, iovec, num);
> > -    if (unlikely(!ok)) {
> > -        return false;
> > +    if (svq_elem->not_from_guest) {
> > +        DMAMap map = {
> > +            .translated_addr = (hwaddr)iovec->iov_base,
> > +            .size = ROUND_UP(iovec->iov_len, 4096) - 1,
> > +            .perm = write ? IOMMU_RW : IOMMU_RO,
> > +        };
> > +        int r;
> > +
> > +        if (unlikely(num != 1)) {
> > +            error_report("Unexpected chain of element injected");
> > +            return false;
> > +        }
> > +        r = vhost_iova_tree_map_alloc(svq->iova_tree, &map);
> > +        if (unlikely(r != IOVA_OK)) {
> > +            error_report("Cannot map injected element");
> > +            return false;
> > +        }
> > +
> > +        r = svq->map_ops->map(map.iova, map.size + 1,
> > +                              (void *)map.translated_addr, !write,
> > +                              svq->map_ops_opaque);
> > +        assert(r == 0);
> > +        sg[0] = map.iova;
> > +    } else {
> > +        bool ok = vhost_svq_translate_addr(svq, sg, iovec, num);
> > +        if (unlikely(!ok)) {
> > +            return false;
> > +        }
> >      }
> >
> >      for (n = 0; n < num; n++) {
> > @@ -166,7 +192,8 @@ static bool vhost_svq_add_split(VhostShadowVirtqueue 
> > *svq, SVQElement *svq_elem,
> >      unsigned avail_idx;
> >      vring_avail_t *avail = svq->vring.avail;
> >      bool ok;
> > -    g_autofree hwaddr *sgs = g_new(hwaddr, MAX(elem->out_num, 
> > elem->in_num));
> > +    g_autofree hwaddr *sgs = NULL;
> > +    hwaddr *in_sgs, *out_sgs;
> >
> >      *head = svq->free_head;
> >
> > @@ -177,15 +204,23 @@ static bool vhost_svq_add_split(VhostShadowVirtqueue 
> > *svq, SVQElement *svq_elem,
> >          return false;
> >      }
> >
> > -    ok = vhost_svq_vring_write_descs(svq, sgs, elem->out_sg, elem->out_num,
> > -                                     elem->in_num > 0, false);
> > +    if (!svq_elem->not_from_guest) {
> > +        sgs = g_new(hwaddr, MAX(elem->out_num, elem->in_num));
> > +        in_sgs = out_sgs = sgs;
> > +    } else {
> > +        in_sgs = &svq_elem->in_iova;
> > +        out_sgs = &svq_elem->out_iova;
> > +    }
> > +    ok = vhost_svq_vring_write_descs(svq, svq_elem, out_sgs, elem->out_sg,
> > +                                     elem->out_num, elem->in_num > 0, 
> > false);
> >      if (unlikely(!ok)) {
> >          return false;
> >      }
> >
> > -    ok = vhost_svq_vring_write_descs(svq, sgs, elem->in_sg, elem->in_num, 
> > false,
> > -                                     true);
> > +    ok = vhost_svq_vring_write_descs(svq, svq_elem, in_sgs, elem->in_sg,
> > +                                     elem->in_num, false, true);
> >      if (unlikely(!ok)) {
> > +        /* TODO unwind out_sg */
> >          return false;
> >      }
> >
> > @@ -230,6 +265,43 @@ static void vhost_svq_kick(VhostShadowVirtqueue *svq)
> >      event_notifier_set(&svq->hdev_kick);
> >  }
> >
> > +bool vhost_svq_inject(VhostShadowVirtqueue *svq, const struct iovec *iov,
> > +                      size_t out_num, size_t in_num)
> > +{
> > +    size_t out_size = iov_size(iov, out_num);
> > +    size_t out_buf_size = ROUND_UP(out_size, 4096);
> > +    size_t in_size = iov_size(iov + out_num, in_num);
> > +    size_t in_buf_size = ROUND_UP(in_size, 4096);
> > +    SVQElement *svq_elem;
> > +    uint16_t num_slots = (in_num ? 1 : 0) + (out_num ? 1 : 0);
> > +
> > +    if (unlikely(num_slots == 0 || svq->next_guest_avail_elem ||
> > +                 vhost_svq_available_slots(svq) < num_slots)) {
> > +        return false;
> > +    }
> > +
> > +    svq_elem = virtqueue_alloc_element(sizeof(SVQElement), 1, 1);
> > +    if (out_num) {
> > +        void *out = qemu_memalign(4096, out_buf_size);
> > +        svq_elem->elem.out_sg[0].iov_base = out;
> > +        svq_elem->elem.out_sg[0].iov_len = out_size;
> > +        iov_to_buf(iov, out_num, 0, out, out_size);
> > +        memset(out + out_size, 0, out_buf_size - out_size);
> > +    }
> > +    if (in_num) {
> > +        void *in = qemu_memalign(4096, in_buf_size);
> > +        svq_elem->elem.in_sg[0].iov_base = in;
> > +        svq_elem->elem.in_sg[0].iov_len = in_size;
> > +        memset(in, 0, in_buf_size);
> > +    }
> > +
> > +    svq_elem->not_from_guest = true;
> > +    vhost_svq_add(svq, svq_elem);
> > +    vhost_svq_kick(svq);
> > +
>
> Should we wait for the completion before moving forward? Otherwise we
> will have a race.
>
> And if we wait for the completion (e.g doing busy polling), I think we
> can avoid the auxiliary structures like
> in_iova/out_iova/not_from_guest by doing mapping before
> vhost_svq_add() to keep it clean.
>

Ok I can move it to that model.

Thanks!

> Thanks
>
> > +    return true;
> > +}
> > +
> >  /**
> >   * Forward available buffers.
> >   *
> > @@ -267,6 +339,7 @@ static void 
> > vhost_handle_guest_kick(VhostShadowVirtqueue *svq)
> >                  break;
> >              }
> >
> > +            svq_elem->not_from_guest = false;
> >              elem = &svq_elem->elem;
> >              if (elem->out_num + elem->in_num > 
> > vhost_svq_available_slots(svq)) {
> >                  /*
> > @@ -391,6 +464,31 @@ static SVQElement 
> > *vhost_svq_get_buf(VhostShadowVirtqueue *svq, uint32_t *len)
> >      return g_steal_pointer(&svq->ring_id_maps[used_elem.id]);
> >  }
> >
> > +static int vhost_svq_unmap(VhostShadowVirtqueue *svq, hwaddr iova, size_t 
> > size)
> > +{
> > +    DMAMap needle = {
> > +        .iova = iova,
> > +        .size = size,
> > +    };
> > +    const DMAMap *overlap;
> > +
> > +    while ((overlap = vhost_iova_tree_find(svq->iova_tree, &needle))) {
> > +        DMAMap needle = *overlap;
> > +
> > +        if (svq->map_ops->unmap) {
> > +            int r = svq->map_ops->unmap(overlap->iova, overlap->size + 1,
> > +                                        svq->map_ops_opaque);
> > +            if (unlikely(r != 0)) {
> > +                return r;
> > +            }
> > +        }
> > +        qemu_vfree((void *)overlap->translated_addr);
> > +        vhost_iova_tree_remove(svq->iova_tree, &needle);
> > +    }
> > +
> > +    return 0;
> > +}
> > +
> >  static void vhost_svq_flush(VhostShadowVirtqueue *svq,
> >                              bool check_for_avail_queue)
> >  {
> > @@ -410,23 +508,56 @@ static void vhost_svq_flush(VhostShadowVirtqueue *svq,
> >              }
> >
> >              elem = &svq_elem->elem;
> > -            if (unlikely(i >= svq->vring.num)) {
> > -                qemu_log_mask(LOG_GUEST_ERROR,
> > -                         "More than %u used buffers obtained in a %u size 
> > SVQ",
> > -                         i, svq->vring.num);
> > -                virtqueue_fill(vq, elem, len, i);
> > -                virtqueue_flush(vq, i);
> > -                return;
> > -            }
> > -            virtqueue_fill(vq, elem, len, i++);
> > -
> >              if (svq->ops && svq->ops->used_elem_handler) {
> >                  svq->ops->used_elem_handler(svq->vdev, elem);
> >              }
> > +
> > +            if (svq_elem->not_from_guest) {
> > +                if (unlikely(!elem->out_num && elem->out_num != 1)) {
> > +                    error_report("Unexpected out_num > 1");
> > +                    return;
> > +                }
> > +
> > +                if (elem->out_num) {
> > +                    int r = vhost_svq_unmap(svq, svq_elem->out_iova,
> > +                                            elem->out_sg[0].iov_len);
> > +                    if (unlikely(r != 0)) {
> > +                        error_report("Cannot unmap out buffer");
> > +                        return;
> > +                    }
> > +                }
> > +
> > +                if (unlikely(!elem->in_num && elem->in_num != 1)) {
> > +                    error_report("Unexpected in_num > 1");
> > +                    return;
> > +                }
> > +
> > +                if (elem->in_num) {
> > +                    int r = vhost_svq_unmap(svq, svq_elem->in_iova,
> > +                                            elem->in_sg[0].iov_len);
> > +                    if (unlikely(r != 0)) {
> > +                        error_report("Cannot unmap out buffer");
> > +                        return;
> > +                    }
> > +                }
> > +            } else {
> > +                if (unlikely(i >= svq->vring.num)) {
> > +                    qemu_log_mask(
> > +                        LOG_GUEST_ERROR,
> > +                        "More than %u used buffers obtained in a %u size 
> > SVQ",
> > +                        i, svq->vring.num);
> > +                    virtqueue_fill(vq, elem, len, i);
> > +                    virtqueue_flush(vq, i);
> > +                    return;
> > +                }
> > +                virtqueue_fill(vq, elem, len, i++);
> > +            }
> >          }
> >
> > -        virtqueue_flush(vq, i);
> > -        event_notifier_set(&svq->svq_call);
> > +        if (i > 0) {
> > +            virtqueue_flush(vq, i);
> > +            event_notifier_set(&svq->svq_call);
> > +        }
> >
> >          if (check_for_avail_queue && svq->next_guest_avail_elem) {
> >              /*
> > @@ -590,13 +721,13 @@ void vhost_svq_stop(VhostShadowVirtqueue *svq)
> >      for (unsigned i = 0; i < svq->vring.num; ++i) {
> >          g_autofree SVQElement *svq_elem = NULL;
> >          svq_elem = g_steal_pointer(&svq->ring_id_maps[i]);
> > -        if (svq_elem) {
> > +        if (svq_elem && !svq_elem->not_from_guest) {
> >              virtqueue_detach_element(svq->vq, &svq_elem->elem, 0);
> >          }
> >      }
> >
> >      next_avail_elem = g_steal_pointer(&svq->next_guest_avail_elem);
> > -    if (next_avail_elem) {
> > +    if (next_avail_elem && !next_avail_elem->not_from_guest) {
> >          virtqueue_detach_element(svq->vq, &next_avail_elem->elem, 0);
> >      }
> >      svq->vq = NULL;
> > --
> > 2.27.0
> >
>


Reply via email to