On Fri, Oct 4, 2024 at 2:45 PM Jonah Palmer <jonah.pal...@oracle.com> wrote: > > Implements the IOVA->GPA tree for handling mapping, unmapping, and > translations for guest memory regions. > > When the guest has overlapping memory regions, an HVA to IOVA translation > may return an incorrect IOVA when searching the IOVA->HVA tree. This is > due to one HVA range being contained (overlapping) in another HVA range > in the IOVA->HVA tree. By creating an IOVA->GPA tree, we can use GPAs to > translate and find the correct IOVA for guest memory regions. >
Yes, this first patch is super close to what I meant, just one issue and a pair of nits here and there. I'd leave the second patch as an optimization on top, if the numbers prove that adding the code is worth it. > Signed-off-by: Jonah Palmer <jonah.pal...@oracle.com> > --- > hw/virtio/vhost-iova-tree.c | 78 +++++++++++++++++++++++++++++++++++-- > hw/virtio/vhost-iova-tree.h | 5 +++ > hw/virtio/vhost-vdpa.c | 20 ++++++---- > 3 files changed, 92 insertions(+), 11 deletions(-) > > diff --git a/hw/virtio/vhost-iova-tree.c b/hw/virtio/vhost-iova-tree.c > index 3d03395a77..e33fd56225 100644 > --- a/hw/virtio/vhost-iova-tree.c > +++ b/hw/virtio/vhost-iova-tree.c > @@ -28,12 +28,15 @@ struct VhostIOVATree { > > /* IOVA address to qemu memory maps. */ > IOVATree *iova_taddr_map; > + > + /* IOVA address to guest memory maps. */ > + IOVATree *iova_gpa_map; > }; > > /** > - * Create a new IOVA tree > + * Create a new VhostIOVATree > * > - * Returns the new IOVA tree > + * Returns the new VhostIOVATree > */ > VhostIOVATree *vhost_iova_tree_new(hwaddr iova_first, hwaddr iova_last) > { > @@ -44,6 +47,7 @@ VhostIOVATree *vhost_iova_tree_new(hwaddr iova_first, > hwaddr iova_last) > tree->iova_last = iova_last; > > tree->iova_taddr_map = iova_tree_new(); > + tree->iova_gpa_map = iova_tree_new(); > return tree; > } > > @@ -53,6 +57,7 @@ VhostIOVATree *vhost_iova_tree_new(hwaddr iova_first, > hwaddr iova_last) > void vhost_iova_tree_delete(VhostIOVATree *iova_tree) > { > iova_tree_destroy(iova_tree->iova_taddr_map); > + iova_tree_destroy(iova_tree->iova_gpa_map); > g_free(iova_tree); > } > > @@ -71,7 +76,7 @@ const DMAMap *vhost_iova_tree_find_iova(const VhostIOVATree > *tree, > } > > /** > - * Allocate a new mapping > + * Allocate a new mapping in the IOVA->HVA tree > * > * @tree: The iova tree > * @map: The iova map > @@ -108,3 +113,70 @@ void vhost_iova_tree_remove(VhostIOVATree *iova_tree, > DMAMap map) > { > iova_tree_remove(iova_tree->iova_taddr_map, map); > } > + > +/** > + * Find the IOVA address stored from a guest memory address > + * > + * @tree: The VhostIOVATree > + * @map: The map with the guest memory address > + * > + * Return the stored mapping, or NULL if not found. > + */ > +const DMAMap *vhost_iova_gpa_tree_find_iova(const VhostIOVATree *tree, > + const DMAMap *map) Nit: Not an english native, but I find vhost_iova_tree should not be broken for coherency with the rest of the functions. What about vhost_iova_tree_find_iova_gpa, like _gpa variant? > +{ > + return iova_tree_find_iova(tree->iova_gpa_map, map); > +} > + > +/** > + * Allocate new mappings in the IOVA->HVA & IOVA->GPA trees > + * > + * @tree: The VhostIOVATree > + * @map: The iova map > + * @gpa: The guest physical address (GPA) > + * > + * Returns: > + * - IOVA_OK if the map fits both containers > + * - IOVA_ERR_INVALID if the map does not make sense (like size overflow) > + * - IOVA_ERR_NOMEM if the IOVA->HVA tree cannot allocate more space > + * > + * It returns an assigned iova in map->iova if return value is IOVA_OK. > + */ > +int vhost_iova_tree_map_alloc_gpa(VhostIOVATree *tree, DMAMap *map, hwaddr > gpa) > +{ > + int ret; > + > + /* Some vhost devices don't like addr 0. Skip first page */ > + hwaddr iova_first = tree->iova_first ?: qemu_real_host_page_size(); > + > + if (map->translated_addr + map->size < map->translated_addr || > + map->perm == IOMMU_NONE) { > + return IOVA_ERR_INVALID; > + } > + > + /* Allocate a node in the IOVA->HVA tree */ > + ret = iova_tree_alloc_map(tree->iova_taddr_map, map, iova_first, > + tree->iova_last); Why not call vhost_iova_tree_map_alloc instead of duplicating it here? > + if (unlikely(ret != IOVA_OK)) { > + return ret; > + } > + > + /* Insert a node in the IOVA->GPA tree */ > + map->translated_addr = gpa; > + return iova_tree_insert(tree->iova_gpa_map, map); > +} > + > +/** > + * Remove existing mappings from the IOVA->HVA & IOVA->GPA trees > + * > + * @iova_tree: The VhostIOVATree > + * @map: The map to remove > + */ > +void vhost_iova_tree_remove_gpa(VhostIOVATree *iova_tree, DMAMap map) > +{ > + /* Remove the existing mapping from the IOVA->GPA tree */ > + iova_tree_remove(iova_tree->iova_gpa_map, map); > + > + /* Remove the corresponding mapping from the IOVA->HVA tree */ > + iova_tree_remove(iova_tree->iova_taddr_map, map); If we remove it blindly from both trees, we are keeping the bug, isn't it? I think the remove should receive the "gpa" as a parameter, same as alloc_gpa. After that, vhost_iova_tree_remove_gpa looks the right iova into iova_gpa_map. And only after that, it removes that iova from iova_tree_remove. If it makes things easier it could receive (hwaddr gpa, size_t len) or all of the info in a DMAMap. What do you think? > +} > diff --git a/hw/virtio/vhost-iova-tree.h b/hw/virtio/vhost-iova-tree.h > index 4adfd79ff0..511c6d18ae 100644 > --- a/hw/virtio/vhost-iova-tree.h > +++ b/hw/virtio/vhost-iova-tree.h > @@ -23,5 +23,10 @@ const DMAMap *vhost_iova_tree_find_iova(const > VhostIOVATree *iova_tree, > const DMAMap *map); > int vhost_iova_tree_map_alloc(VhostIOVATree *iova_tree, DMAMap *map); > void vhost_iova_tree_remove(VhostIOVATree *iova_tree, DMAMap map); > +const DMAMap *vhost_iova_gpa_tree_find_iova(const VhostIOVATree *iova_tree, > + const DMAMap *map); > +int vhost_iova_tree_map_alloc_gpa(VhostIOVATree *iova_tree, DMAMap *map, > + hwaddr gpa); > +void vhost_iova_tree_remove_gpa(VhostIOVATree *iova_tree, DMAMap map); > > #endif > diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c > index 3cdaa12ed5..591ff426e7 100644 > --- a/hw/virtio/vhost-vdpa.c > +++ b/hw/virtio/vhost-vdpa.c > @@ -365,9 +365,16 @@ static void > vhost_vdpa_listener_region_add(MemoryListener *listener, > mem_region.size = int128_get64(llsize) - 1, > mem_region.perm = IOMMU_ACCESS_FLAG(true, section->readonly), > > - r = vhost_iova_tree_map_alloc(s->iova_tree, &mem_region); > + r = vhost_iova_tree_map_alloc_gpa(s->iova_tree, &mem_region, > + > section->offset_within_address_space); > if (unlikely(r != IOVA_OK)) { > error_report("Can't allocate a mapping (%d)", r); > + > + /* Insertion to IOVA->GPA tree failed */ > + if (mem_region.translated_addr == > + section->offset_within_address_space) { > + goto fail_map; > + } We can move this cleanup code into vhost_iova_tree_map_alloc_gpa, isn't it? > goto fail; > } > > @@ -386,7 +393,7 @@ static void vhost_vdpa_listener_region_add(MemoryListener > *listener, > > fail_map: > if (s->shadow_data) { > - vhost_iova_tree_remove(s->iova_tree, mem_region); > + vhost_iova_tree_remove_gpa(s->iova_tree, mem_region); > } > > fail: > @@ -440,21 +447,18 @@ static void > vhost_vdpa_listener_region_del(MemoryListener *listener, > > if (s->shadow_data) { > const DMAMap *result; > - const void *vaddr = memory_region_get_ram_ptr(section->mr) + > - section->offset_within_region + > - (iova - section->offset_within_address_space); > DMAMap mem_region = { > - .translated_addr = (hwaddr)(uintptr_t)vaddr, > + .translated_addr = section->offset_within_address_space, > .size = int128_get64(llsize) - 1, > }; > > - result = vhost_iova_tree_find_iova(s->iova_tree, &mem_region); > + result = vhost_iova_gpa_tree_find_iova(s->iova_tree, &mem_region); > if (!result) { > /* The memory listener map wasn't mapped */ > return; > } > iova = result->iova; > - vhost_iova_tree_remove(s->iova_tree, *result); > + vhost_iova_tree_remove_gpa(s->iova_tree, *result); > } > vhost_vdpa_iotlb_batch_begin_once(s); > /* > -- > 2.43.5 >