Use translations added in VhostIOVATree in SVQ. Only introduce usage here, not allocation and deallocation. As with previous patches, we use the dead code paths of shadow_vqs_enabled to avoid commiting too many changes at once. These are impossible to take at the moment.
Signed-off-by: Eugenio Pérez <epere...@redhat.com> --- hw/virtio/vhost-shadow-virtqueue.h | 3 +- include/hw/virtio/vhost-vdpa.h | 3 + hw/virtio/vhost-shadow-virtqueue.c | 111 ++++++++++++++++---- hw/virtio/vhost-vdpa.c | 161 +++++++++++++++++++++++++---- 4 files changed, 238 insertions(+), 40 deletions(-) diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h index 19c934af49..c6f67d6f76 100644 --- a/hw/virtio/vhost-shadow-virtqueue.h +++ b/hw/virtio/vhost-shadow-virtqueue.h @@ -12,6 +12,7 @@ #include "hw/virtio/vhost.h" #include "qemu/event_notifier.h" +#include "hw/virtio/vhost-iova-tree.h" typedef struct VhostShadowVirtqueue VhostShadowVirtqueue; @@ -37,7 +38,7 @@ void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev, VirtQueue *vq); void vhost_svq_stop(VhostShadowVirtqueue *svq); -VhostShadowVirtqueue *vhost_svq_new(uint16_t qsize); +VhostShadowVirtqueue *vhost_svq_new(uint16_t qsize, VhostIOVATree *iova_map); void vhost_svq_free(VhostShadowVirtqueue *vq); diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h index 009a9f3b6b..cd2388b3be 100644 --- a/include/hw/virtio/vhost-vdpa.h +++ b/include/hw/virtio/vhost-vdpa.h @@ -14,6 +14,7 @@ #include <gmodule.h> +#include "hw/virtio/vhost-iova-tree.h" #include "hw/virtio/virtio.h" #include "standard-headers/linux/vhost_types.h" @@ -30,6 +31,8 @@ typedef struct vhost_vdpa { MemoryListener listener; struct vhost_vdpa_iova_range iova_range; bool shadow_vqs_enabled; + /* IOVA mapping used by Shadow Virtqueue */ + VhostIOVATree *iova_tree; GPtrArray *shadow_vqs; struct vhost_dev *dev; VhostVDPAHostNotifier notifier[VIRTIO_QUEUE_MAX]; diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c index a1a404f68f..c7888eb8cf 100644 --- a/hw/virtio/vhost-shadow-virtqueue.c +++ b/hw/virtio/vhost-shadow-virtqueue.c @@ -11,6 +11,7 @@ #include "hw/virtio/vhost-shadow-virtqueue.h" #include "hw/virtio/vhost.h" #include "hw/virtio/virtio-access.h" +#include "hw/virtio/vhost-iova-tree.h" #include "standard-headers/linux/vhost_types.h" #include "qemu/error-report.h" @@ -45,6 +46,9 @@ typedef struct VhostShadowVirtqueue { /* Virtio device */ VirtIODevice *vdev; + /* IOVA mapping */ + VhostIOVATree *iova_tree; + /* Map for returning guest's descriptors */ VirtQueueElement **ring_id_maps; @@ -97,13 +101,7 @@ bool vhost_svq_valid_device_features(uint64_t *dev_features) continue; case VIRTIO_F_ACCESS_PLATFORM: - /* SVQ does not know how to translate addresses */ - if (*dev_features & BIT_ULL(b)) { - clear_bit(b, dev_features); - r = false; - } - break; - + /* SVQ trust in host's IOMMU to translate addresses */ case VIRTIO_F_VERSION_1: /* SVQ trust that guest vring is little endian */ if (!(*dev_features & BIT_ULL(b))) { @@ -205,7 +203,55 @@ static void vhost_svq_set_notification(VhostShadowVirtqueue *svq, bool enable) } } +/** + * Translate addresses between qemu's virtual address and SVQ IOVA + * + * @svq Shadow VirtQueue + * @vaddr Translated IOVA addresses + * @iovec Source qemu's VA addresses + * @num Length of iovec and minimum length of vaddr + */ +static bool vhost_svq_translate_addr(const VhostShadowVirtqueue *svq, + void **addrs, const struct iovec *iovec, + size_t num) +{ + size_t i; + + if (num == 0) { + return true; + } + + for (i = 0; i < num; ++i) { + DMAMap needle = { + .translated_addr = (hwaddr)iovec[i].iov_base, + .size = iovec[i].iov_len, + }; + size_t off; + + const DMAMap *map = vhost_iova_tree_find_iova(svq->iova_tree, &needle); + /* + * Map cannot be NULL since iova map contains all guest space and + * qemu already has a physical address mapped + */ + if (unlikely(!map)) { + error_report("Invalid address 0x%"HWADDR_PRIx" given by guest", + needle.translated_addr); + return false; + } + + /* + * Map->iova chunk size is ignored. What to do if descriptor + * (addr, size) does not fit is delegated to the device. + */ + off = needle.translated_addr - map->translated_addr; + addrs[i] = (void *)(map->iova + off); + } + + return true; +} + static void vhost_vring_write_descs(VhostShadowVirtqueue *svq, + void * const *vaddr_sg, const struct iovec *iovec, size_t num, bool more_descs, bool write) { @@ -224,7 +270,7 @@ static void vhost_vring_write_descs(VhostShadowVirtqueue *svq, } else { descs[i].flags = flags; } - descs[i].addr = cpu_to_le64((hwaddr)iovec[n].iov_base); + descs[i].addr = cpu_to_le64((hwaddr)vaddr_sg[n]); descs[i].len = cpu_to_le32(iovec[n].iov_len); last = i; @@ -234,42 +280,60 @@ static void vhost_vring_write_descs(VhostShadowVirtqueue *svq, svq->free_head = le16_to_cpu(descs[last].next); } -static unsigned vhost_svq_add_split(VhostShadowVirtqueue *svq, - VirtQueueElement *elem) +static bool vhost_svq_add_split(VhostShadowVirtqueue *svq, + VirtQueueElement *elem, + unsigned *head) { - int head; unsigned avail_idx; vring_avail_t *avail = svq->vring.avail; + bool ok; + g_autofree void **sgs = g_new(void *, MAX(elem->out_num, elem->in_num)); - head = svq->free_head; + *head = svq->free_head; /* We need some descriptors here */ assert(elem->out_num || elem->in_num); - vhost_vring_write_descs(svq, elem->out_sg, elem->out_num, + ok = vhost_svq_translate_addr(svq, sgs, elem->out_sg, elem->out_num); + if (unlikely(!ok)) { + return false; + } + vhost_vring_write_descs(svq, sgs, elem->out_sg, elem->out_num, elem->in_num > 0, false); - vhost_vring_write_descs(svq, elem->in_sg, elem->in_num, false, true); + + + ok = vhost_svq_translate_addr(svq, sgs, elem->in_sg, elem->in_num); + if (unlikely(!ok)) { + return false; + } + + vhost_vring_write_descs(svq, sgs, elem->in_sg, elem->in_num, false, true); /* * Put entry in available array (but don't update avail->idx until they * do sync). */ avail_idx = svq->avail_idx_shadow & (svq->vring.num - 1); - avail->ring[avail_idx] = cpu_to_le16(head); + avail->ring[avail_idx] = cpu_to_le16(*head); svq->avail_idx_shadow++; /* Update avail index after the descriptor is wrote */ smp_wmb(); avail->idx = cpu_to_le16(svq->avail_idx_shadow); - return head; + return true; } -static void vhost_svq_add(VhostShadowVirtqueue *svq, VirtQueueElement *elem) +static bool vhost_svq_add(VhostShadowVirtqueue *svq, VirtQueueElement *elem) { - unsigned qemu_head = vhost_svq_add_split(svq, elem); + unsigned qemu_head; + bool ok = vhost_svq_add_split(svq, elem, &qemu_head); + if (unlikely(!ok)) { + return false; + } svq->ring_id_maps[qemu_head] = elem; + return true; } static void vhost_svq_kick(VhostShadowVirtqueue *svq) @@ -309,6 +373,7 @@ static void vhost_handle_guest_kick(VhostShadowVirtqueue *svq) while (true) { VirtQueueElement *elem; + bool ok; if (svq->next_guest_avail_elem) { elem = g_steal_pointer(&svq->next_guest_avail_elem); @@ -337,7 +402,11 @@ static void vhost_handle_guest_kick(VhostShadowVirtqueue *svq) return; } - vhost_svq_add(svq, elem); + ok = vhost_svq_add(svq, elem); + if (unlikely(!ok)) { + /* VQ is broken, just return and ignore any other kicks */ + return; + } vhost_svq_kick(svq); } @@ -619,12 +688,13 @@ void vhost_svq_stop(VhostShadowVirtqueue *svq) * methods and file descriptors. * * @qsize Shadow VirtQueue size + * @iova_tree Tree to perform descriptors translations * * Returns the new virtqueue or NULL. * * In case of error, reason is reported through error_report. */ -VhostShadowVirtqueue *vhost_svq_new(uint16_t qsize) +VhostShadowVirtqueue *vhost_svq_new(uint16_t qsize, VhostIOVATree *iova_tree) { size_t desc_size = sizeof(vring_desc_t) * qsize; size_t device_size, driver_size; @@ -656,6 +726,7 @@ VhostShadowVirtqueue *vhost_svq_new(uint16_t qsize) memset(svq->vring.desc, 0, driver_size); svq->vring.used = qemu_memalign(qemu_real_host_page_size, device_size); memset(svq->vring.used, 0, device_size); + svq->iova_tree = iova_tree; svq->ring_id_maps = g_new0(VirtQueueElement *, qsize); event_notifier_set_handler(&svq->hdev_call, vhost_svq_handle_call); return g_steal_pointer(&svq); diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c index 0e5c00ed7e..276a559649 100644 --- a/hw/virtio/vhost-vdpa.c +++ b/hw/virtio/vhost-vdpa.c @@ -209,6 +209,18 @@ static void vhost_vdpa_listener_region_add(MemoryListener *listener, vaddr, section->readonly); llsize = int128_sub(llend, int128_make64(iova)); + if (v->shadow_vqs_enabled) { + DMAMap mem_region = { + .translated_addr = (hwaddr)vaddr, + .size = int128_get64(llsize) - 1, + .perm = IOMMU_ACCESS_FLAG(true, section->readonly), + }; + + int r = vhost_iova_tree_map_alloc(v->iova_tree, &mem_region); + assert(r == IOVA_OK); + + iova = mem_region.iova; + } vhost_vdpa_iotlb_batch_begin_once(v); ret = vhost_vdpa_dma_map(v, iova, int128_get64(llsize), @@ -261,6 +273,20 @@ static void vhost_vdpa_listener_region_del(MemoryListener *listener, llsize = int128_sub(llend, int128_make64(iova)); + if (v->shadow_vqs_enabled) { + const DMAMap *result; + const void *vaddr = memory_region_get_ram_ptr(section->mr) + + section->offset_within_region + + (iova - section->offset_within_address_space); + DMAMap mem_region = { + .translated_addr = (hwaddr)vaddr, + .size = int128_get64(llsize) - 1, + }; + + result = vhost_iova_tree_find_iova(v->iova_tree, &mem_region); + iova = result->iova; + vhost_iova_tree_remove(v->iova_tree, &mem_region); + } vhost_vdpa_iotlb_batch_begin_once(v); ret = vhost_vdpa_dma_unmap(v, iova, int128_get64(llsize)); if (ret) { @@ -783,33 +809,70 @@ static int vhost_vdpa_svq_set_fds(struct vhost_dev *dev, /** * Unmap SVQ area in the device */ -static bool vhost_vdpa_svq_unmap_ring(struct vhost_vdpa *v, hwaddr iova, - hwaddr size) +static bool vhost_vdpa_svq_unmap_ring(struct vhost_vdpa *v, + const DMAMap *needle) { + const DMAMap *result = vhost_iova_tree_find_iova(v->iova_tree, needle); + hwaddr size; int r; - size = ROUND_UP(size, qemu_real_host_page_size); - r = vhost_vdpa_dma_unmap(v, iova, size); + if (unlikely(!result)) { + error_report("Unable to find SVQ address to unmap"); + return false; + } + + size = ROUND_UP(result->size, qemu_real_host_page_size); + r = vhost_vdpa_dma_unmap(v, result->iova, size); return r == 0; } static bool vhost_vdpa_svq_unmap_rings(struct vhost_dev *dev, const VhostShadowVirtqueue *svq) { + DMAMap needle; struct vhost_vdpa *v = dev->opaque; struct vhost_vring_addr svq_addr; - size_t device_size = vhost_svq_device_area_size(svq); - size_t driver_size = vhost_svq_driver_area_size(svq); bool ok; vhost_svq_get_vring_addr(svq, &svq_addr); - ok = vhost_vdpa_svq_unmap_ring(v, svq_addr.desc_user_addr, driver_size); + needle = (DMAMap) { + .translated_addr = svq_addr.desc_user_addr, + }; + ok = vhost_vdpa_svq_unmap_ring(v, &needle); if (unlikely(!ok)) { return false; } - return vhost_vdpa_svq_unmap_ring(v, svq_addr.used_user_addr, device_size); + needle = (DMAMap) { + .translated_addr = svq_addr.used_user_addr, + }; + return vhost_vdpa_svq_unmap_ring(v, &needle); +} + +/** + * Map SVQ area in the device + * + * @v Vhost-vdpa device + * @needle The area to search iova + * @readonly Permissions of the area + */ +static bool vhost_vdpa_svq_map_ring(struct vhost_vdpa *v, const DMAMap *needle, + bool readonly) +{ + hwaddr off; + const DMAMap *result = vhost_iova_tree_find_iova(v->iova_tree, needle); + int r; + + if (unlikely(!result)) { + error_report("Can't locate SVQ ring"); + return false; + } + + off = needle->translated_addr - result->translated_addr; + r = vhost_vdpa_dma_map(v, result->iova + off, needle->size, + (void *)needle->translated_addr, readonly); + return r == 0; } /** @@ -821,23 +884,29 @@ static bool vhost_vdpa_svq_unmap_rings(struct vhost_dev *dev, static bool vhost_vdpa_svq_map_rings(struct vhost_dev *dev, const VhostShadowVirtqueue *svq) { + DMAMap needle; struct vhost_vdpa *v = dev->opaque; struct vhost_vring_addr svq_addr; size_t device_size = vhost_svq_device_area_size(svq); size_t driver_size = vhost_svq_driver_area_size(svq); - int r; + bool ok; vhost_svq_get_vring_addr(svq, &svq_addr); - r = vhost_vdpa_dma_map(v, svq_addr.desc_user_addr, driver_size, - (void *)svq_addr.desc_user_addr, true); - if (unlikely(r != 0)) { + needle = (DMAMap) { + .translated_addr = svq_addr.desc_user_addr, + .size = driver_size, + }; + ok = vhost_vdpa_svq_map_ring(v, &needle, true); + if (unlikely(!ok)) { return false; } - r = vhost_vdpa_dma_map(v, svq_addr.used_user_addr, device_size, - (void *)svq_addr.used_user_addr, false); - return r == 0; + needle = (DMAMap) { + .translated_addr = svq_addr.used_user_addr, + .size = device_size, + }; + return vhost_vdpa_svq_map_ring(v, &needle, false); } static bool vhost_vdpa_svq_setup(struct vhost_dev *dev, @@ -1006,6 +1075,23 @@ static int vhost_vdpa_set_owner(struct vhost_dev *dev) return vhost_vdpa_call(dev, VHOST_SET_OWNER, NULL); } +static bool vhost_vdpa_svq_get_vq_region(struct vhost_vdpa *v, + unsigned long long addr, + uint64_t *iova_addr) +{ + const DMAMap needle = { + .translated_addr = addr, + }; + const DMAMap *translation = vhost_iova_tree_find_iova(v->iova_tree, + &needle); + if (!translation) { + return false; + } + + *iova_addr = translation->iova + (addr - translation->translated_addr); + return true; +} + static void vhost_vdpa_vq_get_guest_addr(struct vhost_vring_addr *addr, struct vhost_virtqueue *vq) { @@ -1023,10 +1109,23 @@ static int vhost_vdpa_vq_get_addr(struct vhost_dev *dev, assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA); if (v->shadow_vqs_enabled) { + struct vhost_vring_addr svq_addr; int idx = vhost_vdpa_get_vq_index(dev, addr->index); VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, idx); - vhost_svq_get_vring_addr(svq, addr); + vhost_svq_get_vring_addr(svq, &svq_addr); + if (!vhost_vdpa_svq_get_vq_region(v, svq_addr.desc_user_addr, + &addr->desc_user_addr)) { + return -1; + } + if (!vhost_vdpa_svq_get_vq_region(v, svq_addr.avail_user_addr, + &addr->avail_user_addr)) { + return -1; + } + if (!vhost_vdpa_svq_get_vq_region(v, svq_addr.used_user_addr, + &addr->used_user_addr)) { + return -1; + } } else { vhost_vdpa_vq_get_guest_addr(addr, vq); } @@ -1095,13 +1194,37 @@ static int vhost_vdpa_init_svq(struct vhost_dev *hdev, struct vhost_vdpa *v, shadow_vqs = g_ptr_array_new_full(hdev->nvqs, vhost_psvq_free); for (unsigned n = 0; n < hdev->nvqs; ++n) { - VhostShadowVirtqueue *svq = vhost_svq_new(qsize); - + DMAMap device_region, driver_region; + struct vhost_vring_addr addr; + VhostShadowVirtqueue *svq = vhost_svq_new(qsize, v->iova_tree); if (unlikely(!svq)) { error_setg(errp, "Cannot create svq %u", n); return -1; } - g_ptr_array_add(v->shadow_vqs, svq); + + vhost_svq_get_vring_addr(svq, &addr); + driver_region = (DMAMap) { + .translated_addr = (hwaddr)addr.desc_user_addr, + + /* + * DMAMAp.size include the last byte included in the range, while + * sizeof marks one past it. Substract one byte to make them match. + */ + .size = vhost_svq_driver_area_size(svq) - 1, + .perm = VHOST_ACCESS_RO, + }; + device_region = (DMAMap) { + .translated_addr = (hwaddr)addr.used_user_addr, + .size = vhost_svq_device_area_size(svq) - 1, + .perm = VHOST_ACCESS_RW, + }; + + r = vhost_iova_tree_map_alloc(v->iova_tree, &driver_region); + assert(r == IOVA_OK); + r = vhost_iova_tree_map_alloc(v->iova_tree, &device_region); + assert(r == IOVA_OK); + + g_ptr_array_add(shadow_vqs, svq); } out: -- 2.27.0