On Fri, Oct 4, 2024 at 2:45 PM Jonah Palmer <jonah.pal...@oracle.com> wrote:
>
> Implements the IOVA->GPA tree for handling mapping, unmapping, and
> translations for guest memory regions.
>
> When the guest has overlapping memory regions, an HVA to IOVA translation
> may return an incorrect IOVA when searching the IOVA->HVA tree. This is
> due to one HVA range being contained (overlapping) in another HVA range
> in the IOVA->HVA tree. By creating an IOVA->GPA tree, we can use GPAs to
> translate and find the correct IOVA for guest memory regions.
>

Yes, this first patch is super close to what I meant, just one issue
and a pair of nits here and there.

I'd leave the second patch as an optimization on top, if the numbers
prove that adding the code is worth it.

> Signed-off-by: Jonah Palmer <jonah.pal...@oracle.com>
> ---
>  hw/virtio/vhost-iova-tree.c | 78 +++++++++++++++++++++++++++++++++++--
>  hw/virtio/vhost-iova-tree.h |  5 +++
>  hw/virtio/vhost-vdpa.c      | 20 ++++++----
>  3 files changed, 92 insertions(+), 11 deletions(-)
>
> diff --git a/hw/virtio/vhost-iova-tree.c b/hw/virtio/vhost-iova-tree.c
> index 3d03395a77..e33fd56225 100644
> --- a/hw/virtio/vhost-iova-tree.c
> +++ b/hw/virtio/vhost-iova-tree.c
> @@ -28,12 +28,15 @@ struct VhostIOVATree {
>
>      /* IOVA address to qemu memory maps. */
>      IOVATree *iova_taddr_map;
> +
> +    /* IOVA address to guest memory maps. */
> +    IOVATree *iova_gpa_map;
>  };
>
>  /**
> - * Create a new IOVA tree
> + * Create a new VhostIOVATree
>   *
> - * Returns the new IOVA tree
> + * Returns the new VhostIOVATree
>   */
>  VhostIOVATree *vhost_iova_tree_new(hwaddr iova_first, hwaddr iova_last)
>  {
> @@ -44,6 +47,7 @@ VhostIOVATree *vhost_iova_tree_new(hwaddr iova_first, 
> hwaddr iova_last)
>      tree->iova_last = iova_last;
>
>      tree->iova_taddr_map = iova_tree_new();
> +    tree->iova_gpa_map = iova_tree_new();
>      return tree;
>  }
>
> @@ -53,6 +57,7 @@ VhostIOVATree *vhost_iova_tree_new(hwaddr iova_first, 
> hwaddr iova_last)
>  void vhost_iova_tree_delete(VhostIOVATree *iova_tree)
>  {
>      iova_tree_destroy(iova_tree->iova_taddr_map);
> +    iova_tree_destroy(iova_tree->iova_gpa_map);
>      g_free(iova_tree);
>  }
>
> @@ -71,7 +76,7 @@ const DMAMap *vhost_iova_tree_find_iova(const VhostIOVATree 
> *tree,
>  }
>
>  /**
> - * Allocate a new mapping
> + * Allocate a new mapping in the IOVA->HVA tree
>   *
>   * @tree: The iova tree
>   * @map: The iova map
> @@ -108,3 +113,70 @@ void vhost_iova_tree_remove(VhostIOVATree *iova_tree, 
> DMAMap map)
>  {
>      iova_tree_remove(iova_tree->iova_taddr_map, map);
>  }
> +
> +/**
> + * Find the IOVA address stored from a guest memory address
> + *
> + * @tree: The VhostIOVATree
> + * @map: The map with the guest memory address
> + *
> + * Return the stored mapping, or NULL if not found.
> + */
> +const DMAMap *vhost_iova_gpa_tree_find_iova(const VhostIOVATree *tree,
> +                                            const DMAMap *map)

Nit: Not an english native, but I find vhost_iova_tree should not be
broken for coherency with the rest of the functions. What about
vhost_iova_tree_find_iova_gpa, like _gpa variant?

> +{
> +    return iova_tree_find_iova(tree->iova_gpa_map, map);
> +}
> +
> +/**
> + * Allocate new mappings in the IOVA->HVA & IOVA->GPA trees
> + *
> + * @tree: The VhostIOVATree
> + * @map: The iova map
> + * @gpa: The guest physical address (GPA)
> + *
> + * Returns:
> + * - IOVA_OK if the map fits both containers
> + * - IOVA_ERR_INVALID if the map does not make sense (like size overflow)
> + * - IOVA_ERR_NOMEM if the IOVA->HVA tree cannot allocate more space
> + *
> + * It returns an assigned iova in map->iova if return value is IOVA_OK.
> + */
> +int vhost_iova_tree_map_alloc_gpa(VhostIOVATree *tree, DMAMap *map, hwaddr 
> gpa)
> +{
> +    int ret;
> +
> +    /* Some vhost devices don't like addr 0. Skip first page */
> +    hwaddr iova_first = tree->iova_first ?: qemu_real_host_page_size();
> +
> +    if (map->translated_addr + map->size < map->translated_addr ||
> +        map->perm == IOMMU_NONE) {
> +        return IOVA_ERR_INVALID;
> +    }
> +
> +    /* Allocate a node in the IOVA->HVA tree */
> +    ret = iova_tree_alloc_map(tree->iova_taddr_map, map, iova_first,
> +                              tree->iova_last);

Why not call vhost_iova_tree_map_alloc instead of duplicating it here?

> +    if (unlikely(ret != IOVA_OK)) {
> +        return ret;
> +    }
> +
> +    /* Insert a node in the IOVA->GPA tree */
> +    map->translated_addr = gpa;
> +    return iova_tree_insert(tree->iova_gpa_map, map);
> +}
> +
> +/**
> + * Remove existing mappings from the IOVA->HVA & IOVA->GPA trees
> + *
> + * @iova_tree: The VhostIOVATree
> + * @map: The map to remove
> + */
> +void vhost_iova_tree_remove_gpa(VhostIOVATree *iova_tree, DMAMap map)
> +{
> +    /* Remove the existing mapping from the IOVA->GPA tree */
> +    iova_tree_remove(iova_tree->iova_gpa_map, map);
> +
> +    /* Remove the corresponding mapping from the IOVA->HVA tree */
> +    iova_tree_remove(iova_tree->iova_taddr_map, map);

If we remove it blindly from both trees, we are keeping the bug, isn't it?

I think the remove should receive the "gpa" as a parameter, same as
alloc_gpa. After that, vhost_iova_tree_remove_gpa looks the right iova
into iova_gpa_map. And only after that, it removes that iova from
iova_tree_remove.

If it makes things easier it could receive (hwaddr gpa, size_t len) or
all of the info in a DMAMap. What do you think?

> +}
> diff --git a/hw/virtio/vhost-iova-tree.h b/hw/virtio/vhost-iova-tree.h
> index 4adfd79ff0..511c6d18ae 100644
> --- a/hw/virtio/vhost-iova-tree.h
> +++ b/hw/virtio/vhost-iova-tree.h
> @@ -23,5 +23,10 @@ const DMAMap *vhost_iova_tree_find_iova(const 
> VhostIOVATree *iova_tree,
>                                          const DMAMap *map);
>  int vhost_iova_tree_map_alloc(VhostIOVATree *iova_tree, DMAMap *map);
>  void vhost_iova_tree_remove(VhostIOVATree *iova_tree, DMAMap map);
> +const DMAMap *vhost_iova_gpa_tree_find_iova(const VhostIOVATree *iova_tree,
> +                                            const DMAMap *map);
> +int vhost_iova_tree_map_alloc_gpa(VhostIOVATree *iova_tree, DMAMap *map,
> +                                  hwaddr gpa);
> +void vhost_iova_tree_remove_gpa(VhostIOVATree *iova_tree, DMAMap map);
>
>  #endif
> diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
> index 3cdaa12ed5..591ff426e7 100644
> --- a/hw/virtio/vhost-vdpa.c
> +++ b/hw/virtio/vhost-vdpa.c
> @@ -365,9 +365,16 @@ static void 
> vhost_vdpa_listener_region_add(MemoryListener *listener,
>          mem_region.size = int128_get64(llsize) - 1,
>          mem_region.perm = IOMMU_ACCESS_FLAG(true, section->readonly),
>
> -        r = vhost_iova_tree_map_alloc(s->iova_tree, &mem_region);
> +        r = vhost_iova_tree_map_alloc_gpa(s->iova_tree, &mem_region,
> +                                          
> section->offset_within_address_space);
>          if (unlikely(r != IOVA_OK)) {
>              error_report("Can't allocate a mapping (%d)", r);
> +
> +            /* Insertion to IOVA->GPA tree failed */
> +            if (mem_region.translated_addr ==
> +                section->offset_within_address_space) {
> +                goto fail_map;
> +            }

We can move this cleanup code into vhost_iova_tree_map_alloc_gpa, isn't it?

>              goto fail;
>          }
>
> @@ -386,7 +393,7 @@ static void vhost_vdpa_listener_region_add(MemoryListener 
> *listener,
>
>  fail_map:
>      if (s->shadow_data) {
> -        vhost_iova_tree_remove(s->iova_tree, mem_region);
> +        vhost_iova_tree_remove_gpa(s->iova_tree, mem_region);
>      }
>
>  fail:
> @@ -440,21 +447,18 @@ static void 
> vhost_vdpa_listener_region_del(MemoryListener *listener,
>
>      if (s->shadow_data) {
>          const DMAMap *result;
> -        const void *vaddr = memory_region_get_ram_ptr(section->mr) +
> -            section->offset_within_region +
> -            (iova - section->offset_within_address_space);
>          DMAMap mem_region = {
> -            .translated_addr = (hwaddr)(uintptr_t)vaddr,
> +            .translated_addr = section->offset_within_address_space,
>              .size = int128_get64(llsize) - 1,
>          };
>
> -        result = vhost_iova_tree_find_iova(s->iova_tree, &mem_region);
> +        result = vhost_iova_gpa_tree_find_iova(s->iova_tree, &mem_region);
>          if (!result) {
>              /* The memory listener map wasn't mapped */
>              return;
>          }
>          iova = result->iova;
> -        vhost_iova_tree_remove(s->iova_tree, *result);
> +        vhost_iova_tree_remove_gpa(s->iova_tree, *result);
>      }
>      vhost_vdpa_iotlb_batch_begin_once(s);
>      /*
> --
> 2.43.5
>


Reply via email to