On Thu, Apr 14, 2022 at 11:10 AM Jason Wang <jasow...@redhat.com> wrote: > > On Thu, Apr 14, 2022 at 12:32 AM Eugenio Pérez <epere...@redhat.com> wrote: > > > > This allows qemu to inject packets to the device without guest's notice.
s/packets/buffers/ actually. > > Does it mean it can support guests without _F_ANNOUNCE? > Technically it is possible. We could inject the packet using this to data virtqueues, but that implies that they start in SVQ mode actually. Once we have a way to transition from/to shadow virtqueue dynamically, data virtqueues could start shadowed, the gratuitous ARP can be sent, and then we can move to passthrough mode again. > > > > This will be use to inject net CVQ messages to restore status in the > > destination > > I guess for restoring, we should set cvq.ready = true but all other > (TX/RX) as false before we complete the restoring? If yes, I don't see > codes to do that. > Right, that will be ready for the next version. > > > > Signed-off-by: Eugenio Pérez <epere...@redhat.com> > > --- > > hw/virtio/vhost-shadow-virtqueue.h | 5 + > > hw/virtio/vhost-shadow-virtqueue.c | 179 +++++++++++++++++++++++++---- > > 2 files changed, 160 insertions(+), 24 deletions(-) > > > > diff --git a/hw/virtio/vhost-shadow-virtqueue.h > > b/hw/virtio/vhost-shadow-virtqueue.h > > index e06ac52158..2a5229e77f 100644 > > --- a/hw/virtio/vhost-shadow-virtqueue.h > > +++ b/hw/virtio/vhost-shadow-virtqueue.h > > @@ -17,6 +17,9 @@ > > > > typedef struct SVQElement { > > VirtQueueElement elem; > > + hwaddr in_iova; > > + hwaddr out_iova; > > + bool not_from_guest; > > Let's add a comment for those fields. > Sure. > > } SVQElement; > > > > typedef void (*VirtQueueElementCallback)(VirtIODevice *vdev, > > @@ -106,6 +109,8 @@ typedef struct VhostShadowVirtqueue { > > > > bool vhost_svq_valid_features(uint64_t features, Error **errp); > > > > +bool vhost_svq_inject(VhostShadowVirtqueue *svq, const struct iovec *iov, > > + size_t out_num, size_t in_num); > > void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd); > > void vhost_svq_set_svq_call_fd(VhostShadowVirtqueue *svq, int call_fd); > > void vhost_svq_get_vring_addr(const VhostShadowVirtqueue *svq, > > diff --git a/hw/virtio/vhost-shadow-virtqueue.c > > b/hw/virtio/vhost-shadow-virtqueue.c > > index 87980e2a9c..f3600df133 100644 > > --- a/hw/virtio/vhost-shadow-virtqueue.c > > +++ b/hw/virtio/vhost-shadow-virtqueue.c > > @@ -16,6 +16,7 @@ > > #include "qemu/log.h" > > #include "qemu/memalign.h" > > #include "linux-headers/linux/vhost.h" > > +#include "qemu/iov.h" > > > > /** > > * Validate the transport device features that both guests can use with > > the SVQ > > @@ -122,7 +123,8 @@ static bool vhost_svq_translate_addr(const > > VhostShadowVirtqueue *svq, > > return true; > > } > > > > -static bool vhost_svq_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr > > *sg, > > +static bool vhost_svq_vring_write_descs(VhostShadowVirtqueue *svq, > > + SVQElement *svq_elem, hwaddr *sg, > > const struct iovec *iovec, size_t > > num, > > bool more_descs, bool write) > > { > > @@ -130,15 +132,39 @@ static bool > > vhost_svq_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg, > > unsigned n; > > uint16_t flags = write ? cpu_to_le16(VRING_DESC_F_WRITE) : 0; > > vring_desc_t *descs = svq->vring.desc; > > - bool ok; > > > > if (num == 0) { > > return true; > > } > > > > - ok = vhost_svq_translate_addr(svq, sg, iovec, num); > > - if (unlikely(!ok)) { > > - return false; > > + if (svq_elem->not_from_guest) { > > + DMAMap map = { > > + .translated_addr = (hwaddr)iovec->iov_base, > > + .size = ROUND_UP(iovec->iov_len, 4096) - 1, > > + .perm = write ? IOMMU_RW : IOMMU_RO, > > + }; > > + int r; > > + > > + if (unlikely(num != 1)) { > > + error_report("Unexpected chain of element injected"); > > + return false; > > + } > > + r = vhost_iova_tree_map_alloc(svq->iova_tree, &map); > > + if (unlikely(r != IOVA_OK)) { > > + error_report("Cannot map injected element"); > > + return false; > > + } > > + > > + r = svq->map_ops->map(map.iova, map.size + 1, > > + (void *)map.translated_addr, !write, > > + svq->map_ops_opaque); > > + assert(r == 0); > > + sg[0] = map.iova; > > + } else { > > + bool ok = vhost_svq_translate_addr(svq, sg, iovec, num); > > + if (unlikely(!ok)) { > > + return false; > > + } > > } > > > > for (n = 0; n < num; n++) { > > @@ -166,7 +192,8 @@ static bool vhost_svq_add_split(VhostShadowVirtqueue > > *svq, SVQElement *svq_elem, > > unsigned avail_idx; > > vring_avail_t *avail = svq->vring.avail; > > bool ok; > > - g_autofree hwaddr *sgs = g_new(hwaddr, MAX(elem->out_num, > > elem->in_num)); > > + g_autofree hwaddr *sgs = NULL; > > + hwaddr *in_sgs, *out_sgs; > > > > *head = svq->free_head; > > > > @@ -177,15 +204,23 @@ static bool vhost_svq_add_split(VhostShadowVirtqueue > > *svq, SVQElement *svq_elem, > > return false; > > } > > > > - ok = vhost_svq_vring_write_descs(svq, sgs, elem->out_sg, elem->out_num, > > - elem->in_num > 0, false); > > + if (!svq_elem->not_from_guest) { > > + sgs = g_new(hwaddr, MAX(elem->out_num, elem->in_num)); > > + in_sgs = out_sgs = sgs; > > + } else { > > + in_sgs = &svq_elem->in_iova; > > + out_sgs = &svq_elem->out_iova; > > + } > > + ok = vhost_svq_vring_write_descs(svq, svq_elem, out_sgs, elem->out_sg, > > + elem->out_num, elem->in_num > 0, > > false); > > if (unlikely(!ok)) { > > return false; > > } > > > > - ok = vhost_svq_vring_write_descs(svq, sgs, elem->in_sg, elem->in_num, > > false, > > - true); > > + ok = vhost_svq_vring_write_descs(svq, svq_elem, in_sgs, elem->in_sg, > > + elem->in_num, false, true); > > if (unlikely(!ok)) { > > + /* TODO unwind out_sg */ > > return false; > > } > > > > @@ -230,6 +265,43 @@ static void vhost_svq_kick(VhostShadowVirtqueue *svq) > > event_notifier_set(&svq->hdev_kick); > > } > > > > +bool vhost_svq_inject(VhostShadowVirtqueue *svq, const struct iovec *iov, > > + size_t out_num, size_t in_num) > > +{ > > + size_t out_size = iov_size(iov, out_num); > > + size_t out_buf_size = ROUND_UP(out_size, 4096); > > + size_t in_size = iov_size(iov + out_num, in_num); > > + size_t in_buf_size = ROUND_UP(in_size, 4096); > > + SVQElement *svq_elem; > > + uint16_t num_slots = (in_num ? 1 : 0) + (out_num ? 1 : 0); > > + > > + if (unlikely(num_slots == 0 || svq->next_guest_avail_elem || > > + vhost_svq_available_slots(svq) < num_slots)) { > > + return false; > > + } > > + > > + svq_elem = virtqueue_alloc_element(sizeof(SVQElement), 1, 1); > > + if (out_num) { > > + void *out = qemu_memalign(4096, out_buf_size); > > + svq_elem->elem.out_sg[0].iov_base = out; > > + svq_elem->elem.out_sg[0].iov_len = out_size; > > + iov_to_buf(iov, out_num, 0, out, out_size); > > + memset(out + out_size, 0, out_buf_size - out_size); > > + } > > + if (in_num) { > > + void *in = qemu_memalign(4096, in_buf_size); > > + svq_elem->elem.in_sg[0].iov_base = in; > > + svq_elem->elem.in_sg[0].iov_len = in_size; > > + memset(in, 0, in_buf_size); > > + } > > + > > + svq_elem->not_from_guest = true; > > + vhost_svq_add(svq, svq_elem); > > + vhost_svq_kick(svq); > > + > > Should we wait for the completion before moving forward? Otherwise we > will have a race. > > And if we wait for the completion (e.g doing busy polling), I think we > can avoid the auxiliary structures like > in_iova/out_iova/not_from_guest by doing mapping before > vhost_svq_add() to keep it clean. > Ok I can move it to that model. Thanks! > Thanks > > > + return true; > > +} > > + > > /** > > * Forward available buffers. > > * > > @@ -267,6 +339,7 @@ static void > > vhost_handle_guest_kick(VhostShadowVirtqueue *svq) > > break; > > } > > > > + svq_elem->not_from_guest = false; > > elem = &svq_elem->elem; > > if (elem->out_num + elem->in_num > > > vhost_svq_available_slots(svq)) { > > /* > > @@ -391,6 +464,31 @@ static SVQElement > > *vhost_svq_get_buf(VhostShadowVirtqueue *svq, uint32_t *len) > > return g_steal_pointer(&svq->ring_id_maps[used_elem.id]); > > } > > > > +static int vhost_svq_unmap(VhostShadowVirtqueue *svq, hwaddr iova, size_t > > size) > > +{ > > + DMAMap needle = { > > + .iova = iova, > > + .size = size, > > + }; > > + const DMAMap *overlap; > > + > > + while ((overlap = vhost_iova_tree_find(svq->iova_tree, &needle))) { > > + DMAMap needle = *overlap; > > + > > + if (svq->map_ops->unmap) { > > + int r = svq->map_ops->unmap(overlap->iova, overlap->size + 1, > > + svq->map_ops_opaque); > > + if (unlikely(r != 0)) { > > + return r; > > + } > > + } > > + qemu_vfree((void *)overlap->translated_addr); > > + vhost_iova_tree_remove(svq->iova_tree, &needle); > > + } > > + > > + return 0; > > +} > > + > > static void vhost_svq_flush(VhostShadowVirtqueue *svq, > > bool check_for_avail_queue) > > { > > @@ -410,23 +508,56 @@ static void vhost_svq_flush(VhostShadowVirtqueue *svq, > > } > > > > elem = &svq_elem->elem; > > - if (unlikely(i >= svq->vring.num)) { > > - qemu_log_mask(LOG_GUEST_ERROR, > > - "More than %u used buffers obtained in a %u size > > SVQ", > > - i, svq->vring.num); > > - virtqueue_fill(vq, elem, len, i); > > - virtqueue_flush(vq, i); > > - return; > > - } > > - virtqueue_fill(vq, elem, len, i++); > > - > > if (svq->ops && svq->ops->used_elem_handler) { > > svq->ops->used_elem_handler(svq->vdev, elem); > > } > > + > > + if (svq_elem->not_from_guest) { > > + if (unlikely(!elem->out_num && elem->out_num != 1)) { > > + error_report("Unexpected out_num > 1"); > > + return; > > + } > > + > > + if (elem->out_num) { > > + int r = vhost_svq_unmap(svq, svq_elem->out_iova, > > + elem->out_sg[0].iov_len); > > + if (unlikely(r != 0)) { > > + error_report("Cannot unmap out buffer"); > > + return; > > + } > > + } > > + > > + if (unlikely(!elem->in_num && elem->in_num != 1)) { > > + error_report("Unexpected in_num > 1"); > > + return; > > + } > > + > > + if (elem->in_num) { > > + int r = vhost_svq_unmap(svq, svq_elem->in_iova, > > + elem->in_sg[0].iov_len); > > + if (unlikely(r != 0)) { > > + error_report("Cannot unmap out buffer"); > > + return; > > + } > > + } > > + } else { > > + if (unlikely(i >= svq->vring.num)) { > > + qemu_log_mask( > > + LOG_GUEST_ERROR, > > + "More than %u used buffers obtained in a %u size > > SVQ", > > + i, svq->vring.num); > > + virtqueue_fill(vq, elem, len, i); > > + virtqueue_flush(vq, i); > > + return; > > + } > > + virtqueue_fill(vq, elem, len, i++); > > + } > > } > > > > - virtqueue_flush(vq, i); > > - event_notifier_set(&svq->svq_call); > > + if (i > 0) { > > + virtqueue_flush(vq, i); > > + event_notifier_set(&svq->svq_call); > > + } > > > > if (check_for_avail_queue && svq->next_guest_avail_elem) { > > /* > > @@ -590,13 +721,13 @@ void vhost_svq_stop(VhostShadowVirtqueue *svq) > > for (unsigned i = 0; i < svq->vring.num; ++i) { > > g_autofree SVQElement *svq_elem = NULL; > > svq_elem = g_steal_pointer(&svq->ring_id_maps[i]); > > - if (svq_elem) { > > + if (svq_elem && !svq_elem->not_from_guest) { > > virtqueue_detach_element(svq->vq, &svq_elem->elem, 0); > > } > > } > > > > next_avail_elem = g_steal_pointer(&svq->next_guest_avail_elem); > > - if (next_avail_elem) { > > + if (next_avail_elem && !next_avail_elem->not_from_guest) { > > virtqueue_detach_element(svq->vq, &next_avail_elem->elem, 0); > > } > > svq->vq = NULL; > > -- > > 2.27.0 > > >