On Fri, Aug 30, 2024 at 3:58 PM Jonah Palmer <jonah.pal...@oracle.com> wrote:
>
>
>
> On 8/29/24 12:55 PM, Eugenio Perez Martin wrote:
> > On Wed, Aug 21, 2024 at 2:56 PM Jonah Palmer <jonah.pal...@oracle.com> 
> > wrote:
> >>
> >> Implements a GPA->IOVA and IOVA->SVQ HVA tree for handling mapping,
> >> unmapping, and translations for guest and host-only memory,
> >> respectively.
> >>
> >> By splitting up a full IOVA->HVA tree (containing both guest and
> >> host-only memory mappings) into a GPA->IOVA tree (containing only guest
> >> memory mappings) and a IOVA->SVQ HVA tree (containing host-only memory
> >> mappings), we can avoid translating to the wrong IOVA when the guest has
> >> overlapping memory regions where different GPAs lead to the same HVA.
> >>
> >> In other words, if the guest has overlapping memory regions, translating
> >> an HVA to an IOVA may result in receiving an incorrect IOVA when
> >> searching the full IOVA->HVA tree. This would be due to one HVA range
> >> being contained (overlapping) in another HVA range in the IOVA->HVA
> >> tree.
> >>
> >> To avoid this issue, creating a GPA->IOVA tree and using it to translate
> >> a GPA to an IOVA ensures that the IOVA we receive is the correct one
> >> (instead of relying on a HVA->IOVA translation).
> >>
> >> As a byproduct of creating a GPA->IOVA tree, the full IOVA->HVA tree now
> >> becomes a partial IOVA->SVQ HVA tree. That is, since we're moving all
> >> guest memory mappings to the GPA->IOVA tree, the host-only memory
> >> mappings are now the only mappings being put into the IOVA->HVA tree.
> >>
> >> Furthermore, as an additional byproduct of splitting up guest and
> >> host-only memory mappings into separate trees, special attention needs
> >> to be paid to vhost_svq_translate_addr() when translating memory buffers
> >> from iovec. The memory buffers from iovec can be backed by guest memory
> >> or host-only memory, which means that we need to figure out who is
> >> backing these buffers and then decide which tree to use for translating
> >> it.
> >>
> >> In this patch we determine the backer of this buffer by first checking
> >> if a RAM block can be inferred from the buffer's HVA. That is, we use
> >> qemu_ram_block_from_host() and if a valid RAM block is returned, we know
> >> the buffer's HVA is backed by guest memory. Then we derive the GPA from
> >> it and translate the GPA to an IOVA using the GPA->IOVA tree.
> >>
> >> If an invalid RAM block is returned, the buffer's HVA is likely backed
> >> by host-only memory. In this case, we can then simply translate the HVA
> >> to an IOVA using the partial IOVA->SVQ HVA tree.
> >>
> >> However, this method is sub-optimal, especially for memory buffers
> >> backed by host-only memory, due to needing to iterate over some (if not
> >> all) RAMBlock structures and then searching either the GPA->IOVA tree or
> >> the IOVA->SVQ HVA tree. Optimizations to improve performance in this
> >> area should be revisited at some point.
> >>
> >> Signed-off-by: Jonah Palmer <jonah.pal...@oracle.com>
> >> ---
> >>   hw/virtio/vhost-iova-tree.c        | 53 +++++++++++++++++++++++++++++-
> >>   hw/virtio/vhost-iova-tree.h        |  5 ++-
> >>   hw/virtio/vhost-shadow-virtqueue.c | 48 +++++++++++++++++++++++----
> >>   hw/virtio/vhost-vdpa.c             | 18 +++++-----
> >>   include/qemu/iova-tree.h           | 22 +++++++++++++
> >>   util/iova-tree.c                   | 46 ++++++++++++++++++++++++++
> >>   6 files changed, 173 insertions(+), 19 deletions(-)
> >>
> >> diff --git a/hw/virtio/vhost-iova-tree.c b/hw/virtio/vhost-iova-tree.c
> >> index 32c03db2f5..5a3f6b5cd9 100644
> >> --- a/hw/virtio/vhost-iova-tree.c
> >> +++ b/hw/virtio/vhost-iova-tree.c
> >> @@ -26,15 +26,19 @@ struct VhostIOVATree {
> >>       /* Last addressable iova address in the device */
> >>       uint64_t iova_last;
> >>
> >> -    /* IOVA address to qemu memory maps. */
> >> +    /* IOVA address to qemu SVQ memory maps. */
> >>       IOVATree *iova_taddr_map;
> >>
> >>       /* IOVA tree (IOVA allocator) */
> >>       IOVATree *iova_map;
> >> +
> >> +    /* GPA->IOVA tree */
> >> +    IOVATree *gpa_map;
> >>   };
> >>
> >>   /**
> >>    * Create a new VhostIOVATree with a new set of IOVATree's:
> >> + * - GPA->IOVA tree (gpa_map)
> >>    * - IOVA allocator (iova_map)
> >>    * - IOVA->HVA tree (iova_taddr_map)
> >>    *
> >> @@ -50,6 +54,7 @@ VhostIOVATree *vhost_iova_tree_new(hwaddr iova_first, 
> >> hwaddr iova_last)
> >>
> >>       tree->iova_taddr_map = iova_tree_new();
> >>       tree->iova_map = iova_tree_new();
> >> +    tree->gpa_map = gpa_tree_new();
> >>       return tree;
> >>   }
> >>
> >> @@ -136,3 +141,49 @@ int vhost_iova_tree_insert(VhostIOVATree *iova_tree, 
> >> DMAMap *map)
> >>
> >>       return iova_tree_insert(iova_tree->iova_taddr_map, map);
> >>   }
> >> +
> >> +/**
> >> + * Insert a new GPA->IOVA mapping to the GPA->IOVA tree
> >> + *
> >> + * @iova_tree: The VhostIOVATree
> >> + * @map: The GPA->IOVA mapping
> >> + *
> >> + * Returns:
> >> + * - IOVA_OK if the map fits in the container
> >> + * - IOVA_ERR_INVALID if the map does not make sense (like size overflow)
> >> + * - IOVA_ERR_OVERLAP if the GPA range overlaps with an existing range
> >> + */
> >> +int vhost_gpa_tree_insert(VhostIOVATree *iova_tree, DMAMap *map)
> >> +{
> >> +    if (map->iova + map->size < map->iova || map->perm == IOMMU_NONE) {
> >> +        return IOVA_ERR_INVALID;
> >> +    }
> >> +
> >> +    return gpa_tree_insert(iova_tree->gpa_map, map);
> >> +}
> >> +
> >> +/**
> >> + * Find the IOVA address stored from a guest memory address (GPA)
> >> + *
> >> + * @tree: The VhostIOVATree
> >> + * @map: The map with the guest memory address
> >> + *
> >> + * Return the stored mapping, or NULL if not found.
> >> + */
> >> +const DMAMap *vhost_gpa_tree_find_iova(const VhostIOVATree *tree,
> >> +                                       const DMAMap *map)
> >> +{
> >> +    return iova_tree_find_iova(tree->gpa_map, map);
> >> +}
> >> +
> >> +/**
> >> + * Remove existing mappings from the GPA->IOVA tree and IOVA tree
> >> + *
> >> + * @iova_tree: The VhostIOVATree
> >> + * @map: The map to remove
> >> + */
> >> +void vhost_gpa_tree_remove(VhostIOVATree *iova_tree, DMAMap map)
> >> +{
> >> +    iova_tree_remove(iova_tree->gpa_map, map);
> >> +    iova_tree_remove(iova_tree->iova_map, map);
> >> +}
> >> diff --git a/hw/virtio/vhost-iova-tree.h b/hw/virtio/vhost-iova-tree.h
> >> index 8bf7b64786..c22941db4f 100644
> >> --- a/hw/virtio/vhost-iova-tree.h
> >> +++ b/hw/virtio/vhost-iova-tree.h
> >> @@ -24,5 +24,8 @@ const DMAMap *vhost_iova_tree_find_iova(const 
> >> VhostIOVATree *iova_tree,
> >>   int vhost_iova_tree_map_alloc(VhostIOVATree *iova_tree, DMAMap *map);
> >>   void vhost_iova_tree_remove(VhostIOVATree *iova_tree, DMAMap map);
> >>   int vhost_iova_tree_insert(VhostIOVATree *iova_tree, DMAMap *map);
> >> -
> >> +int vhost_gpa_tree_insert(VhostIOVATree *iova_tree, DMAMap *map);
> >> +const DMAMap *vhost_gpa_tree_find_iova(const VhostIOVATree *iova_tree,
> >> +                                       const DMAMap *map);
> >> +void vhost_gpa_tree_remove(VhostIOVATree *iova_tree, DMAMap map);
> >>   #endif
> >> diff --git a/hw/virtio/vhost-shadow-virtqueue.c 
> >> b/hw/virtio/vhost-shadow-virtqueue.c
> >> index fc5f408f77..12eabddaa6 100644
> >> --- a/hw/virtio/vhost-shadow-virtqueue.c
> >> +++ b/hw/virtio/vhost-shadow-virtqueue.c
> >> @@ -16,6 +16,7 @@
> >>   #include "qemu/log.h"
> >>   #include "qemu/memalign.h"
> >>   #include "linux-headers/linux/vhost.h"
> >> +#include "exec/ramblock.h"
> >>
> >>   /**
> >>    * Validate the transport device features that both guests can use with 
> >> the SVQ
> >> @@ -88,14 +89,45 @@ static bool vhost_svq_translate_addr(const 
> >> VhostShadowVirtqueue *svq,
> >>       }
> >>
> >>       for (size_t i = 0; i < num; ++i) {
> >> -        DMAMap needle = {
> >> -            .translated_addr = (hwaddr)(uintptr_t)iovec[i].iov_base,
> >> -            .size = iovec[i].iov_len,
> >> -        };
> >> -        Int128 needle_last, map_last;
> >> -        size_t off;
> >> +        RAMBlock *rb;
> >> +        hwaddr gpa;
> >> +        ram_addr_t offset;
> >> +        const DMAMap *map;
> >> +        DMAMap needle;
> >> +
> >> +        /*
> >> +         * Determine if this HVA is backed by guest memory by attempting 
> >> to
> >> +         * infer a RAM block from it. If a valid RAM block is returned, 
> >> the
> >> +         * VA is backed by guest memory and we can derive the GPA from it.
> >> +         * Then search the GPA->IOVA tree for the corresponding IOVA.
> >> +         *
> >> +         * If the RAM block is invalid, the HVA is likely backed by 
> >> host-only
> >> +         * memory. Use the HVA to search the IOVA->HVA tree for the
> >> +         * corresponding IOVA.
> >> +         *
> >> +         * TODO: This additional second lookup is sub-optimal when the HVA
> >> +         *       is backed by host-only memory. Find optimizations for 
> >> this
> >> +         *       (e.g. using an HVA->IOVA tree).
> >> +         */
> >> +        rb = qemu_ram_block_from_host(iovec[i].iov_base, false, &offset);
> >> +        if (rb) {
> >> +            gpa = rb->offset + offset;
> >> +
> >> +            /* Search the GPA->IOVA tree */
> >> +            needle = (DMAMap) {
> >> +                .translated_addr = gpa,
> >> +                .size = iovec[i].iov_len,
> >> +            };
> >> +            map = vhost_gpa_tree_find_iova(svq->iova_tree, &needle);
> >> +        } else {
> >> +            /* Search the IOVA->HVA tree */
> >> +            needle = (DMAMap) {
> >> +                .translated_addr = (hwaddr)(uintptr_t)iovec[i].iov_base,
> >> +                .size = iovec[i].iov_len,
> >> +            };
> >> +            map = vhost_iova_tree_find_iova(svq->iova_tree, &needle);
> >> +        }
> >
> > I think that having this complex conditional here is a problem for
> > future users of SVQ.
> >
> >>
> >> -        const DMAMap *map = vhost_iova_tree_find_iova(svq->iova_tree, 
> >> &needle);
> >>           /*
> >>            * Map cannot be NULL since iova map contains all guest space and
> >>            * qemu already has a physical address mapped
> >> @@ -106,6 +138,8 @@ static bool vhost_svq_translate_addr(const 
> >> VhostShadowVirtqueue *svq,
> >>                             needle.translated_addr);
> >>               return false;
> >>           }
> >> +        Int128 needle_last, map_last;
> >> +        size_t off;
> >>
> >>           off = needle.translated_addr - map->translated_addr;
> >>           addrs[i] = map->iova + off;
> >> diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
> >> index 6702459065..0da0a117dc 100644
> >> --- a/hw/virtio/vhost-vdpa.c
> >> +++ b/hw/virtio/vhost-vdpa.c
> >> @@ -373,9 +373,9 @@ static void 
> >> vhost_vdpa_listener_region_add(MemoryListener *listener,
> >>
> >>           iova = mem_region.iova;
> >>
> >> -        /* Add mapping to the IOVA->HVA tree */
> >> -        mem_region.translated_addr = (hwaddr)(uintptr_t)vaddr;
> >> -        r = vhost_iova_tree_insert(s->iova_tree, &mem_region);
> >> +        /* Add mapping to the GPA->IOVA tree */
> >> +        mem_region.translated_addr = section->offset_within_address_space;
> >> +        r = vhost_gpa_tree_insert(s->iova_tree, &mem_region);
> >>           if (unlikely(r != IOVA_OK)) {
> >>               error_report("Can't add listener region mapping (%d)", r);
> >>               goto fail_map;
> >> @@ -394,7 +394,7 @@ static void 
> >> vhost_vdpa_listener_region_add(MemoryListener *listener,
> >>
> >>   fail_map:
> >>       if (s->shadow_data) {
> >> -        vhost_iova_tree_remove(s->iova_tree, mem_region);
> >> +        vhost_gpa_tree_remove(s->iova_tree, mem_region);
> >>       }
> >>
> >>   fail:
> >> @@ -448,21 +448,19 @@ static void 
> >> vhost_vdpa_listener_region_del(MemoryListener *listener,
> >>
> >>       if (s->shadow_data) {
> >>           const DMAMap *result;
> >> -        const void *vaddr = memory_region_get_ram_ptr(section->mr) +
> >> -            section->offset_within_region +
> >> -            (iova - section->offset_within_address_space);
> >>           DMAMap mem_region = {
> >> -            .translated_addr = (hwaddr)(uintptr_t)vaddr,
> >> +            .translated_addr = section->offset_within_address_space,
> >>               .size = int128_get64(llsize) - 1,
> >>           };
> >>
> >> -        result = vhost_iova_tree_find_iova(s->iova_tree, &mem_region);
> >> +        /* Search the GPA->IOVA tree */
> >> +        result = vhost_gpa_tree_find_iova(s->iova_tree, &mem_region);
> >>           if (!result) {
> >>               /* The memory listener map wasn't mapped */
> >>               return;
> >>           }
> >>           iova = result->iova;
> >> -        vhost_iova_tree_remove(s->iova_tree, *result);
> >> +        vhost_gpa_tree_remove(s->iova_tree, *result);
> >>       }
> >>       vhost_vdpa_iotlb_batch_begin_once(s);
> >>       /*
> >> diff --git a/include/qemu/iova-tree.h b/include/qemu/iova-tree.h
> >> index 2a10a7052e..57cfc63d33 100644
> >> --- a/include/qemu/iova-tree.h
> >> +++ b/include/qemu/iova-tree.h
> >> @@ -40,6 +40,15 @@ typedef struct DMAMap {
> >>   } QEMU_PACKED DMAMap;
> >>   typedef gboolean (*iova_tree_iterator)(DMAMap *map);
> >>
> >> +/**
> >> + * gpa_tree_new:
> >> + *
> >> + * Create a new GPA->IOVA tree.
> >> + *
> >> + * Returns: the tree pointer on success, or NULL otherwise.
> >> + */
> >> +IOVATree *gpa_tree_new(void);
> >> +
> >>   /**
> >>    * iova_tree_new:
> >>    *
> >> @@ -49,6 +58,19 @@ typedef gboolean (*iova_tree_iterator)(DMAMap *map);
> >>    */
> >>   IOVATree *iova_tree_new(void);
> >>
> >> +/**
> >> + * gpa_tree_insert:
> >> + *
> >> + * @tree: The GPA->IOVA tree we're inserting the mapping to
> >> + * @map: The GPA->IOVA mapping to insert
> >> + *
> >> + * Insert a GPA range to the GPA->IOVA tree. If there are overlapped
> >> + * ranges, IOVA_ERR_OVERLAP will be returned.
> >> + *
> >> + * Return: 0 if success, or < 0 if error.
> >> + */
> >> +int gpa_tree_insert(IOVATree *tree, const DMAMap *map);
> >> +
> >
> > I'd keep this GPA tree in VhostIOVATree as other IOVATree users like
> > intel iommu do not use it.
> >
>
> So you'd like me to move these GPA-related functions in iova-tree.c to
> vhost-iova-tree.c?
>

Yes, please, let's move to vhost-iova-tree.c

> >>   /**
> >>    * iova_tree_insert:
> >>    *
> >> diff --git a/util/iova-tree.c b/util/iova-tree.c
> >> index 536789797e..e3f50fbf5c 100644
> >> --- a/util/iova-tree.c
> >> +++ b/util/iova-tree.c
> >> @@ -71,6 +71,22 @@ static int iova_tree_compare(gconstpointer a, 
> >> gconstpointer b, gpointer data)
> >>       return 0;
> >>   }
> >>
> >> +static int gpa_tree_compare(gconstpointer a, gconstpointer b, gpointer 
> >> data)
> >> +{
> >> +    const DMAMap *m1 = a, *m2 = b;
> >> +
> >> +    if (m1->translated_addr > m2->translated_addr + m2->size) {
> >> +        return 1;
> >> +    }
> >> +
> >> +    if (m1->translated_addr + m1->size < m2->translated_addr) {
> >> +        return -1;
> >> +    }
> >> +
> >> +    /* Overlapped */
> >> +    return 0;
> >> +}
> >> +
> >>   IOVATree *iova_tree_new(void)
> >>   {
> >>       IOVATree *iova_tree = g_new0(IOVATree, 1);
> >> @@ -81,6 +97,15 @@ IOVATree *iova_tree_new(void)
> >>       return iova_tree;
> >>   }
> >>
> >> +IOVATree *gpa_tree_new(void)
> >> +{
> >> +    IOVATree *gpa_tree = g_new0(IOVATree, 1);
> >> +
> >> +    gpa_tree->tree = g_tree_new_full(gpa_tree_compare, NULL, g_free, 
> >> NULL);
> >> +
> >> +    return gpa_tree;
> >> +}
> >> +
> >>   const DMAMap *iova_tree_find(const IOVATree *tree, const DMAMap *map)
> >>   {
> >>       return g_tree_lookup(tree->tree, map);
> >> @@ -128,6 +153,27 @@ static inline void iova_tree_insert_internal(GTree 
> >> *gtree, DMAMap *range)
> >>       g_tree_insert(gtree, range, range);
> >>   }
> >>
> >> +int gpa_tree_insert(IOVATree *tree, const DMAMap *map)
> >> +{
> >> +    DMAMap *new;
> >> +
> >> +    if (map->translated_addr + map->size < map->translated_addr ||
> >> +        map->perm == IOMMU_NONE) {
> >> +        return IOVA_ERR_INVALID;
> >> +    }
> >> +
> >> +    /* We don't allow inserting ranges that overlap with existing ones */
> >> +    if (iova_tree_find(tree, map)) {
> >> +        return IOVA_ERR_OVERLAP;
> >> +    }
> >> +
> >> +    new = g_new0(DMAMap, 1);
> >> +    memcpy(new, map, sizeof(*new));
> >> +    iova_tree_insert_internal(tree->tree, new);
> >> +
> >> +    return IOVA_OK;
> >> +}
> >> +
> >>   int iova_tree_insert(IOVATree *tree, const DMAMap *map)
> >>   {
> >>       DMAMap *new;
> >> --
> >> 2.43.5
> >>
> >
>


Reply via email to