On Fri, Aug 30, 2024 at 3:58 PM Jonah Palmer <jonah.pal...@oracle.com> wrote: > > > > On 8/29/24 12:55 PM, Eugenio Perez Martin wrote: > > On Wed, Aug 21, 2024 at 2:56 PM Jonah Palmer <jonah.pal...@oracle.com> > > wrote: > >> > >> Implements a GPA->IOVA and IOVA->SVQ HVA tree for handling mapping, > >> unmapping, and translations for guest and host-only memory, > >> respectively. > >> > >> By splitting up a full IOVA->HVA tree (containing both guest and > >> host-only memory mappings) into a GPA->IOVA tree (containing only guest > >> memory mappings) and a IOVA->SVQ HVA tree (containing host-only memory > >> mappings), we can avoid translating to the wrong IOVA when the guest has > >> overlapping memory regions where different GPAs lead to the same HVA. > >> > >> In other words, if the guest has overlapping memory regions, translating > >> an HVA to an IOVA may result in receiving an incorrect IOVA when > >> searching the full IOVA->HVA tree. This would be due to one HVA range > >> being contained (overlapping) in another HVA range in the IOVA->HVA > >> tree. > >> > >> To avoid this issue, creating a GPA->IOVA tree and using it to translate > >> a GPA to an IOVA ensures that the IOVA we receive is the correct one > >> (instead of relying on a HVA->IOVA translation). > >> > >> As a byproduct of creating a GPA->IOVA tree, the full IOVA->HVA tree now > >> becomes a partial IOVA->SVQ HVA tree. That is, since we're moving all > >> guest memory mappings to the GPA->IOVA tree, the host-only memory > >> mappings are now the only mappings being put into the IOVA->HVA tree. > >> > >> Furthermore, as an additional byproduct of splitting up guest and > >> host-only memory mappings into separate trees, special attention needs > >> to be paid to vhost_svq_translate_addr() when translating memory buffers > >> from iovec. The memory buffers from iovec can be backed by guest memory > >> or host-only memory, which means that we need to figure out who is > >> backing these buffers and then decide which tree to use for translating > >> it. > >> > >> In this patch we determine the backer of this buffer by first checking > >> if a RAM block can be inferred from the buffer's HVA. That is, we use > >> qemu_ram_block_from_host() and if a valid RAM block is returned, we know > >> the buffer's HVA is backed by guest memory. Then we derive the GPA from > >> it and translate the GPA to an IOVA using the GPA->IOVA tree. > >> > >> If an invalid RAM block is returned, the buffer's HVA is likely backed > >> by host-only memory. In this case, we can then simply translate the HVA > >> to an IOVA using the partial IOVA->SVQ HVA tree. > >> > >> However, this method is sub-optimal, especially for memory buffers > >> backed by host-only memory, due to needing to iterate over some (if not > >> all) RAMBlock structures and then searching either the GPA->IOVA tree or > >> the IOVA->SVQ HVA tree. Optimizations to improve performance in this > >> area should be revisited at some point. > >> > >> Signed-off-by: Jonah Palmer <jonah.pal...@oracle.com> > >> --- > >> hw/virtio/vhost-iova-tree.c | 53 +++++++++++++++++++++++++++++- > >> hw/virtio/vhost-iova-tree.h | 5 ++- > >> hw/virtio/vhost-shadow-virtqueue.c | 48 +++++++++++++++++++++++---- > >> hw/virtio/vhost-vdpa.c | 18 +++++----- > >> include/qemu/iova-tree.h | 22 +++++++++++++ > >> util/iova-tree.c | 46 ++++++++++++++++++++++++++ > >> 6 files changed, 173 insertions(+), 19 deletions(-) > >> > >> diff --git a/hw/virtio/vhost-iova-tree.c b/hw/virtio/vhost-iova-tree.c > >> index 32c03db2f5..5a3f6b5cd9 100644 > >> --- a/hw/virtio/vhost-iova-tree.c > >> +++ b/hw/virtio/vhost-iova-tree.c > >> @@ -26,15 +26,19 @@ struct VhostIOVATree { > >> /* Last addressable iova address in the device */ > >> uint64_t iova_last; > >> > >> - /* IOVA address to qemu memory maps. */ > >> + /* IOVA address to qemu SVQ memory maps. */ > >> IOVATree *iova_taddr_map; > >> > >> /* IOVA tree (IOVA allocator) */ > >> IOVATree *iova_map; > >> + > >> + /* GPA->IOVA tree */ > >> + IOVATree *gpa_map; > >> }; > >> > >> /** > >> * Create a new VhostIOVATree with a new set of IOVATree's: > >> + * - GPA->IOVA tree (gpa_map) > >> * - IOVA allocator (iova_map) > >> * - IOVA->HVA tree (iova_taddr_map) > >> * > >> @@ -50,6 +54,7 @@ VhostIOVATree *vhost_iova_tree_new(hwaddr iova_first, > >> hwaddr iova_last) > >> > >> tree->iova_taddr_map = iova_tree_new(); > >> tree->iova_map = iova_tree_new(); > >> + tree->gpa_map = gpa_tree_new(); > >> return tree; > >> } > >> > >> @@ -136,3 +141,49 @@ int vhost_iova_tree_insert(VhostIOVATree *iova_tree, > >> DMAMap *map) > >> > >> return iova_tree_insert(iova_tree->iova_taddr_map, map); > >> } > >> + > >> +/** > >> + * Insert a new GPA->IOVA mapping to the GPA->IOVA tree > >> + * > >> + * @iova_tree: The VhostIOVATree > >> + * @map: The GPA->IOVA mapping > >> + * > >> + * Returns: > >> + * - IOVA_OK if the map fits in the container > >> + * - IOVA_ERR_INVALID if the map does not make sense (like size overflow) > >> + * - IOVA_ERR_OVERLAP if the GPA range overlaps with an existing range > >> + */ > >> +int vhost_gpa_tree_insert(VhostIOVATree *iova_tree, DMAMap *map) > >> +{ > >> + if (map->iova + map->size < map->iova || map->perm == IOMMU_NONE) { > >> + return IOVA_ERR_INVALID; > >> + } > >> + > >> + return gpa_tree_insert(iova_tree->gpa_map, map); > >> +} > >> + > >> +/** > >> + * Find the IOVA address stored from a guest memory address (GPA) > >> + * > >> + * @tree: The VhostIOVATree > >> + * @map: The map with the guest memory address > >> + * > >> + * Return the stored mapping, or NULL if not found. > >> + */ > >> +const DMAMap *vhost_gpa_tree_find_iova(const VhostIOVATree *tree, > >> + const DMAMap *map) > >> +{ > >> + return iova_tree_find_iova(tree->gpa_map, map); > >> +} > >> + > >> +/** > >> + * Remove existing mappings from the GPA->IOVA tree and IOVA tree > >> + * > >> + * @iova_tree: The VhostIOVATree > >> + * @map: The map to remove > >> + */ > >> +void vhost_gpa_tree_remove(VhostIOVATree *iova_tree, DMAMap map) > >> +{ > >> + iova_tree_remove(iova_tree->gpa_map, map); > >> + iova_tree_remove(iova_tree->iova_map, map); > >> +} > >> diff --git a/hw/virtio/vhost-iova-tree.h b/hw/virtio/vhost-iova-tree.h > >> index 8bf7b64786..c22941db4f 100644 > >> --- a/hw/virtio/vhost-iova-tree.h > >> +++ b/hw/virtio/vhost-iova-tree.h > >> @@ -24,5 +24,8 @@ const DMAMap *vhost_iova_tree_find_iova(const > >> VhostIOVATree *iova_tree, > >> int vhost_iova_tree_map_alloc(VhostIOVATree *iova_tree, DMAMap *map); > >> void vhost_iova_tree_remove(VhostIOVATree *iova_tree, DMAMap map); > >> int vhost_iova_tree_insert(VhostIOVATree *iova_tree, DMAMap *map); > >> - > >> +int vhost_gpa_tree_insert(VhostIOVATree *iova_tree, DMAMap *map); > >> +const DMAMap *vhost_gpa_tree_find_iova(const VhostIOVATree *iova_tree, > >> + const DMAMap *map); > >> +void vhost_gpa_tree_remove(VhostIOVATree *iova_tree, DMAMap map); > >> #endif > >> diff --git a/hw/virtio/vhost-shadow-virtqueue.c > >> b/hw/virtio/vhost-shadow-virtqueue.c > >> index fc5f408f77..12eabddaa6 100644 > >> --- a/hw/virtio/vhost-shadow-virtqueue.c > >> +++ b/hw/virtio/vhost-shadow-virtqueue.c > >> @@ -16,6 +16,7 @@ > >> #include "qemu/log.h" > >> #include "qemu/memalign.h" > >> #include "linux-headers/linux/vhost.h" > >> +#include "exec/ramblock.h" > >> > >> /** > >> * Validate the transport device features that both guests can use with > >> the SVQ > >> @@ -88,14 +89,45 @@ static bool vhost_svq_translate_addr(const > >> VhostShadowVirtqueue *svq, > >> } > >> > >> for (size_t i = 0; i < num; ++i) { > >> - DMAMap needle = { > >> - .translated_addr = (hwaddr)(uintptr_t)iovec[i].iov_base, > >> - .size = iovec[i].iov_len, > >> - }; > >> - Int128 needle_last, map_last; > >> - size_t off; > >> + RAMBlock *rb; > >> + hwaddr gpa; > >> + ram_addr_t offset; > >> + const DMAMap *map; > >> + DMAMap needle; > >> + > >> + /* > >> + * Determine if this HVA is backed by guest memory by attempting > >> to > >> + * infer a RAM block from it. If a valid RAM block is returned, > >> the > >> + * VA is backed by guest memory and we can derive the GPA from it. > >> + * Then search the GPA->IOVA tree for the corresponding IOVA. > >> + * > >> + * If the RAM block is invalid, the HVA is likely backed by > >> host-only > >> + * memory. Use the HVA to search the IOVA->HVA tree for the > >> + * corresponding IOVA. > >> + * > >> + * TODO: This additional second lookup is sub-optimal when the HVA > >> + * is backed by host-only memory. Find optimizations for > >> this > >> + * (e.g. using an HVA->IOVA tree). > >> + */ > >> + rb = qemu_ram_block_from_host(iovec[i].iov_base, false, &offset); > >> + if (rb) { > >> + gpa = rb->offset + offset; > >> + > >> + /* Search the GPA->IOVA tree */ > >> + needle = (DMAMap) { > >> + .translated_addr = gpa, > >> + .size = iovec[i].iov_len, > >> + }; > >> + map = vhost_gpa_tree_find_iova(svq->iova_tree, &needle); > >> + } else { > >> + /* Search the IOVA->HVA tree */ > >> + needle = (DMAMap) { > >> + .translated_addr = (hwaddr)(uintptr_t)iovec[i].iov_base, > >> + .size = iovec[i].iov_len, > >> + }; > >> + map = vhost_iova_tree_find_iova(svq->iova_tree, &needle); > >> + } > > > > I think that having this complex conditional here is a problem for > > future users of SVQ. > > > >> > >> - const DMAMap *map = vhost_iova_tree_find_iova(svq->iova_tree, > >> &needle); > >> /* > >> * Map cannot be NULL since iova map contains all guest space and > >> * qemu already has a physical address mapped > >> @@ -106,6 +138,8 @@ static bool vhost_svq_translate_addr(const > >> VhostShadowVirtqueue *svq, > >> needle.translated_addr); > >> return false; > >> } > >> + Int128 needle_last, map_last; > >> + size_t off; > >> > >> off = needle.translated_addr - map->translated_addr; > >> addrs[i] = map->iova + off; > >> diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c > >> index 6702459065..0da0a117dc 100644 > >> --- a/hw/virtio/vhost-vdpa.c > >> +++ b/hw/virtio/vhost-vdpa.c > >> @@ -373,9 +373,9 @@ static void > >> vhost_vdpa_listener_region_add(MemoryListener *listener, > >> > >> iova = mem_region.iova; > >> > >> - /* Add mapping to the IOVA->HVA tree */ > >> - mem_region.translated_addr = (hwaddr)(uintptr_t)vaddr; > >> - r = vhost_iova_tree_insert(s->iova_tree, &mem_region); > >> + /* Add mapping to the GPA->IOVA tree */ > >> + mem_region.translated_addr = section->offset_within_address_space; > >> + r = vhost_gpa_tree_insert(s->iova_tree, &mem_region); > >> if (unlikely(r != IOVA_OK)) { > >> error_report("Can't add listener region mapping (%d)", r); > >> goto fail_map; > >> @@ -394,7 +394,7 @@ static void > >> vhost_vdpa_listener_region_add(MemoryListener *listener, > >> > >> fail_map: > >> if (s->shadow_data) { > >> - vhost_iova_tree_remove(s->iova_tree, mem_region); > >> + vhost_gpa_tree_remove(s->iova_tree, mem_region); > >> } > >> > >> fail: > >> @@ -448,21 +448,19 @@ static void > >> vhost_vdpa_listener_region_del(MemoryListener *listener, > >> > >> if (s->shadow_data) { > >> const DMAMap *result; > >> - const void *vaddr = memory_region_get_ram_ptr(section->mr) + > >> - section->offset_within_region + > >> - (iova - section->offset_within_address_space); > >> DMAMap mem_region = { > >> - .translated_addr = (hwaddr)(uintptr_t)vaddr, > >> + .translated_addr = section->offset_within_address_space, > >> .size = int128_get64(llsize) - 1, > >> }; > >> > >> - result = vhost_iova_tree_find_iova(s->iova_tree, &mem_region); > >> + /* Search the GPA->IOVA tree */ > >> + result = vhost_gpa_tree_find_iova(s->iova_tree, &mem_region); > >> if (!result) { > >> /* The memory listener map wasn't mapped */ > >> return; > >> } > >> iova = result->iova; > >> - vhost_iova_tree_remove(s->iova_tree, *result); > >> + vhost_gpa_tree_remove(s->iova_tree, *result); > >> } > >> vhost_vdpa_iotlb_batch_begin_once(s); > >> /* > >> diff --git a/include/qemu/iova-tree.h b/include/qemu/iova-tree.h > >> index 2a10a7052e..57cfc63d33 100644 > >> --- a/include/qemu/iova-tree.h > >> +++ b/include/qemu/iova-tree.h > >> @@ -40,6 +40,15 @@ typedef struct DMAMap { > >> } QEMU_PACKED DMAMap; > >> typedef gboolean (*iova_tree_iterator)(DMAMap *map); > >> > >> +/** > >> + * gpa_tree_new: > >> + * > >> + * Create a new GPA->IOVA tree. > >> + * > >> + * Returns: the tree pointer on success, or NULL otherwise. > >> + */ > >> +IOVATree *gpa_tree_new(void); > >> + > >> /** > >> * iova_tree_new: > >> * > >> @@ -49,6 +58,19 @@ typedef gboolean (*iova_tree_iterator)(DMAMap *map); > >> */ > >> IOVATree *iova_tree_new(void); > >> > >> +/** > >> + * gpa_tree_insert: > >> + * > >> + * @tree: The GPA->IOVA tree we're inserting the mapping to > >> + * @map: The GPA->IOVA mapping to insert > >> + * > >> + * Insert a GPA range to the GPA->IOVA tree. If there are overlapped > >> + * ranges, IOVA_ERR_OVERLAP will be returned. > >> + * > >> + * Return: 0 if success, or < 0 if error. > >> + */ > >> +int gpa_tree_insert(IOVATree *tree, const DMAMap *map); > >> + > > > > I'd keep this GPA tree in VhostIOVATree as other IOVATree users like > > intel iommu do not use it. > > > > So you'd like me to move these GPA-related functions in iova-tree.c to > vhost-iova-tree.c? >
Yes, please, let's move to vhost-iova-tree.c > >> /** > >> * iova_tree_insert: > >> * > >> diff --git a/util/iova-tree.c b/util/iova-tree.c > >> index 536789797e..e3f50fbf5c 100644 > >> --- a/util/iova-tree.c > >> +++ b/util/iova-tree.c > >> @@ -71,6 +71,22 @@ static int iova_tree_compare(gconstpointer a, > >> gconstpointer b, gpointer data) > >> return 0; > >> } > >> > >> +static int gpa_tree_compare(gconstpointer a, gconstpointer b, gpointer > >> data) > >> +{ > >> + const DMAMap *m1 = a, *m2 = b; > >> + > >> + if (m1->translated_addr > m2->translated_addr + m2->size) { > >> + return 1; > >> + } > >> + > >> + if (m1->translated_addr + m1->size < m2->translated_addr) { > >> + return -1; > >> + } > >> + > >> + /* Overlapped */ > >> + return 0; > >> +} > >> + > >> IOVATree *iova_tree_new(void) > >> { > >> IOVATree *iova_tree = g_new0(IOVATree, 1); > >> @@ -81,6 +97,15 @@ IOVATree *iova_tree_new(void) > >> return iova_tree; > >> } > >> > >> +IOVATree *gpa_tree_new(void) > >> +{ > >> + IOVATree *gpa_tree = g_new0(IOVATree, 1); > >> + > >> + gpa_tree->tree = g_tree_new_full(gpa_tree_compare, NULL, g_free, > >> NULL); > >> + > >> + return gpa_tree; > >> +} > >> + > >> const DMAMap *iova_tree_find(const IOVATree *tree, const DMAMap *map) > >> { > >> return g_tree_lookup(tree->tree, map); > >> @@ -128,6 +153,27 @@ static inline void iova_tree_insert_internal(GTree > >> *gtree, DMAMap *range) > >> g_tree_insert(gtree, range, range); > >> } > >> > >> +int gpa_tree_insert(IOVATree *tree, const DMAMap *map) > >> +{ > >> + DMAMap *new; > >> + > >> + if (map->translated_addr + map->size < map->translated_addr || > >> + map->perm == IOMMU_NONE) { > >> + return IOVA_ERR_INVALID; > >> + } > >> + > >> + /* We don't allow inserting ranges that overlap with existing ones */ > >> + if (iova_tree_find(tree, map)) { > >> + return IOVA_ERR_OVERLAP; > >> + } > >> + > >> + new = g_new0(DMAMap, 1); > >> + memcpy(new, map, sizeof(*new)); > >> + iova_tree_insert_internal(tree->tree, new); > >> + > >> + return IOVA_OK; > >> +} > >> + > >> int iova_tree_insert(IOVATree *tree, const DMAMap *map) > >> { > >> DMAMap *new; > >> -- > >> 2.43.5 > >> > > >