Qemu maps regions of userland multiple times into the guest. The MSHV
kernel driver detects those overlapping regions and rejects those
mappings.

A logic is introduced to track all mappings and replace a region on the
fly if an unmapped gpa is encountered. If there is a region in the list
that would qualify and is currently unmapped, the current region is
unmapped and the requested region is mapped in.

Signed-off-by: Magnus Kulke <magnusku...@linux.microsoft.com>
---
 accel/mshv/mem.c            | 229 +++++++++++++++++++++++++++++++++++-
 accel/mshv/mshv-all.c       |   2 +
 include/system/mshv.h       |  13 ++
 target/i386/mshv/mshv-cpu.c |  23 +++-
 4 files changed, 265 insertions(+), 2 deletions(-)

diff --git a/accel/mshv/mem.c b/accel/mshv/mem.c
index ee627e7bd6..53e43873dc 100644
--- a/accel/mshv/mem.c
+++ b/accel/mshv/mem.c
@@ -12,7 +12,9 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/lockable.h"
 #include "qemu/error-report.h"
+#include "qemu/rcu.h"
 #include "hw/hyperv/linux-mshv.h"
 #include "system/address-spaces.h"
 #include "system/mshv.h"
@@ -20,12 +22,101 @@
 #include <sys/ioctl.h>
 #include "trace.h"
 
+static GList *mem_entries;
+
+/* We need this, because call_rcu1 won't operate on empty lists (NULL) */
+typedef struct {
+    struct rcu_head rcu;
+    GList *list;
+} FreeMemEntriesJob;
+
+static inline void free_mem_entries(struct rcu_head *rh)
+{
+    FreeMemEntriesJob *job = container_of(rh, FreeMemEntriesJob, rcu);
+    g_list_free(job->list);
+    g_free(job);
+}
+
+static void add_mem_entry(MshvMemoryEntry *entry)
+{
+    GList *old = qatomic_rcu_read(&mem_entries);
+    GList *new = g_list_copy(old);
+    new = g_list_prepend(new, entry);
+
+    qatomic_rcu_set(&mem_entries, new);
+
+    /* defer freeing of an obsolete snapshot */
+    FreeMemEntriesJob *job = g_new(FreeMemEntriesJob, 1);
+    job->list = old;
+    call_rcu1(&job->rcu, free_mem_entries);
+}
+
+static void remove_mem_entry(MshvMemoryEntry *entry)
+{
+    GList *old = qatomic_rcu_read(&mem_entries);
+    GList *new = g_list_copy(old);
+    new = g_list_remove(new, entry);
+
+    qatomic_rcu_set(&mem_entries, new);
+
+    /* Defer freeing of an obsolete snapshot */
+    FreeMemEntriesJob *job = g_new(FreeMemEntriesJob, 1);
+    job->list = old;
+    call_rcu1((struct rcu_head *)old, free_mem_entries);
+}
+
+/* Find _currently mapped_ memory entry, that is overlapping in userspace */
+static MshvMemoryEntry *find_overlap_mem_entry(const MshvMemoryEntry *entry_1)
+{
+    uint64_t start_1 = entry_1->mr.userspace_addr, start_2;
+    size_t len_1 = entry_1->mr.memory_size, len_2;
+
+    WITH_RCU_READ_LOCK_GUARD() {
+        GList *entries = qatomic_rcu_read(&mem_entries);
+        bool overlaps;
+        MshvMemoryEntry *entry_2;
+
+        for (GList *l = entries; l != NULL; l = l->next) {
+            entry_2 = l->data;
+            assert(entry_2);
+
+            if (entry_2 == entry_1) {
+                continue;
+            }
+
+            start_2 = entry_2->mr.userspace_addr;
+            len_2 = entry_2->mr.memory_size;
+
+            overlaps = ranges_overlap(start_1, len_1, start_2, len_2);
+            if (entry_2 != entry_1 && entry_2->mapped && overlaps) {
+                return entry_2;
+            }
+        }
+    }
+
+    return NULL;
+}
+
+void mshv_init_mem_manager(void)
+{
+    mem_entries = NULL;
+}
+
 static int set_guest_memory(int vm_fd, const mshv_user_mem_region *region)
 {
     int ret;
+    MshvMemoryEntry *overlap_entry, entry = { .mr = { 0 }, .mapped = false };
 
     ret = ioctl(vm_fd, MSHV_SET_GUEST_MEMORY, region);
     if (ret < 0) {
+        entry.mr.userspace_addr = region->userspace_addr;
+        entry.mr.memory_size = region->size;
+
+        overlap_entry = find_overlap_mem_entry(&entry);
+        if (overlap_entry != NULL) {
+            return -MSHV_USERSPACE_ADDR_REMAP_ERROR;
+        }
+
         error_report("failed to set guest memory");
         return -errno;
     }
@@ -54,6 +145,142 @@ static int map_or_unmap(int vm_fd, const MshvMemoryRegion 
*mr, bool add)
     return set_guest_memory(vm_fd, &region);
 }
 
+static MshvMemoryEntry *find_mem_entry_by_region(const MshvMemoryRegion *mr)
+{
+    WITH_RCU_READ_LOCK_GUARD() {
+        GList *entries = qatomic_rcu_read(&mem_entries);
+        MshvMemoryEntry *entry;
+
+        for (GList *l = entries; l != NULL; l = l->next) {
+            entry = l->data;
+            assert(entry);
+            if (memcmp(mr, &entry->mr, sizeof(MshvMemoryRegion)) == 0) {
+                return entry;
+            }
+        }
+    }
+
+    return NULL;
+}
+
+static inline int tracked_map_or_unmap(int vm_fd, const MshvMemoryRegion *mr, 
bool add)
+{
+    MshvMemoryEntry *entry;
+    int ret;
+
+    entry = find_mem_entry_by_region(mr);
+
+    if (!entry) {
+        /* delete */
+        if (!add) {
+            error_report("mem entry selected for removal does not exist");
+            return -1;
+        }
+
+        /* add */
+        ret = map_or_unmap(vm_fd, mr, true);
+        entry = g_new0(MshvMemoryEntry, 1);
+        entry->mr = *mr;
+        /* set depending on success */
+        entry->mapped = (ret == 0);
+        add_mem_entry(entry);
+
+        if (ret == -MSHV_USERSPACE_ADDR_REMAP_ERROR) {
+            warn_report(
+                "ignoring failed remapping userspace_addr=0x%016lx "
+                "gpa=0x%08lx size=0x%lx", mr->userspace_addr,
+                mr->guest_phys_addr, mr->memory_size);
+            ret = 0;
+        }
+
+        return ret;
+    }
+
+    /* entry exists */
+
+    /* delete */
+    if (!add) {
+        ret = 0;
+        if (entry->mapped) {
+            ret = map_or_unmap(vm_fd, mr, false);
+        }
+        remove_mem_entry(entry);
+        g_free(entry);
+        return ret;
+    }
+
+    /* add */
+    ret = map_or_unmap(vm_fd, mr, true);
+
+    /* set depending on success */
+    entry->mapped = (ret == 0);
+    return ret;
+}
+
+static MshvMemoryEntry* find_mem_entry_by_gpa(uint64_t gpa)
+{
+    WITH_RCU_READ_LOCK_GUARD() {
+        GList *entries = qatomic_rcu_read(&mem_entries);
+        MshvMemoryEntry *entry;
+        uint64_t gpa_offset;
+
+        for (GList *l = entries; l != NULL; l = l->next) {
+            entry = l->data;
+            assert(entry);
+            gpa_offset = gpa - entry->mr.guest_phys_addr;
+            if (entry->mr.guest_phys_addr <= gpa
+                && gpa_offset < entry->mr.memory_size) {
+                return entry;
+            }
+        }
+    }
+
+    return NULL;
+}
+
+MshvRemapResult mshv_remap_overlapped_region(int vm_fd, uint64_t gpa)
+{
+    MshvMemoryEntry *gpa_entry, *overlap_entry;
+    int ret;
+
+    /* return early if no entry is found */
+    gpa_entry = find_mem_entry_by_gpa(gpa);
+    if (gpa_entry == NULL) {
+        return MshvRemapNoMapping;
+    }
+
+    overlap_entry = find_overlap_mem_entry(gpa_entry);
+    if (overlap_entry == NULL) {
+        return MshvRemapNoOverlap;
+    }
+
+    /* unmap overlapping region */
+    ret = map_or_unmap(vm_fd, &overlap_entry->mr, false);
+    if (ret < 0) {
+        error_report("failed to unmap overlap region");
+        abort();
+    }
+    overlap_entry->mapped = false;
+    warn_report("mapped out userspace_addr=0x%016lx gpa=0x%010lx size=0x%lx",
+                overlap_entry->mr.userspace_addr,
+                overlap_entry->mr.guest_phys_addr,
+                overlap_entry->mr.memory_size);
+
+    /* map region for gpa */
+    ret = map_or_unmap(vm_fd, &gpa_entry->mr, true);
+    if (ret < 0) {
+        error_report("failed to map new region");
+        abort();
+    }
+    gpa_entry->mapped = true;
+    warn_report("mapped in  userspace_addr=0x%016lx gpa=0x%010lx size=0x%lx",
+                gpa_entry->mr.userspace_addr,
+                gpa_entry->mr.guest_phys_addr,
+                gpa_entry->mr.memory_size);
+
+    return MshvRemapOk;
+}
+
 static inline MemTxAttrs get_mem_attrs(bool is_secure_mode)
 {
     MemTxAttrs memattr = {0};
@@ -139,7 +366,7 @@ static int set_memory(const MshvMemoryRegion *mshv_mr, bool 
add)
                           mshv_mr->memory_size,
                           mshv_mr->userspace_addr, mshv_mr->readonly,
                           ret);
-    return map_or_unmap(mshv_state->vm, mshv_mr, add);
+    return tracked_map_or_unmap(mshv_state->vm, mshv_mr, add);
 }
 
 /*
diff --git a/accel/mshv/mshv-all.c b/accel/mshv/mshv-all.c
index 97212c54f1..bf30c968ce 100644
--- a/accel/mshv/mshv-all.c
+++ b/accel/mshv/mshv-all.c
@@ -439,6 +439,8 @@ static int mshv_init(MachineState *ms)
 
     mshv_init_msicontrol();
 
+    mshv_init_mem_manager();
+
     do {
         int vm_fd = create_vm(mshv_fd);
         s->vm = vm_fd;
diff --git a/include/system/mshv.h b/include/system/mshv.h
index 622b3db540..c4072b980f 100644
--- a/include/system/mshv.h
+++ b/include/system/mshv.h
@@ -147,6 +147,12 @@ typedef enum MshvVmExit {
     MshvVmExitSpecial  = 2,
 } MshvVmExit;
 
+typedef enum MshvRemapResult {
+    MshvRemapOk = 0,
+    MshvRemapNoMapping = 1,
+    MshvRemapNoOverlap = 2,
+} MshvRemapResult;
+
 void mshv_init_cpu_logic(void);
 int mshv_create_vcpu(int vm_fd, uint8_t vp_index, int *cpu_fd);
 void mshv_remove_vcpu(int vm_fd, int cpu_fd);
@@ -199,8 +205,15 @@ typedef struct MshvMemoryRegion {
     bool readonly;
 } MshvMemoryRegion;
 
+typedef struct MshvMemoryEntry {
+    MshvMemoryRegion mr;
+    bool mapped;
+} MshvMemoryEntry;
+
+void mshv_init_mem_manager(void);
 int mshv_add_mem(int vm_fd, const MshvMemoryRegion *mr);
 int mshv_remove_mem(int vm_fd, const MshvMemoryRegion *mr);
+MshvRemapResult mshv_remap_overlapped_region(int vm_fd, uint64_t gpa);
 int mshv_guest_mem_read(uint64_t gpa, uint8_t *data, uintptr_t size,
                         bool is_secure_mode, bool instruction_fetch);
 int mshv_guest_mem_write(uint64_t gpa, const uint8_t *data, uintptr_t size,
diff --git a/target/i386/mshv/mshv-cpu.c b/target/i386/mshv/mshv-cpu.c
index 27c6cd6138..4c74081968 100644
--- a/target/i386/mshv/mshv-cpu.c
+++ b/target/i386/mshv/mshv-cpu.c
@@ -1159,7 +1159,9 @@ static int handle_unmapped_mem(int vm_fd, CPUState *cpu,
                                MshvVmExit *exit_reason)
 {
     struct hv_x64_memory_intercept_message info = { 0 };
+    uint64_t gpa;
     int ret;
+    enum MshvRemapResult remap_result;
 
     ret = set_memory_info(msg, &info);
     if (ret < 0) {
@@ -1167,7 +1169,26 @@ static int handle_unmapped_mem(int vm_fd, CPUState *cpu,
         return -1;
     }
 
-    return handle_mmio(cpu, msg, exit_reason);
+    gpa = info.guest_physical_address;
+
+    /* attempt to remap the region, in case of overlapping userspase mappings 
*/
+    remap_result = mshv_remap_overlapped_region(vm_fd, gpa);
+    *exit_reason = MshvVmExitIgnore;
+
+    switch (remap_result) {
+    case MshvRemapNoMapping:
+        /* if we didn't find a mapping, it is probably mmio */
+        return handle_mmio(cpu, msg, exit_reason);
+    case MshvRemapOk:
+        break;
+    case MshvRemapNoOverlap:
+        /* This should not happen, but we are forgiving it */
+        warn_report("found no overlap for unmapped region");
+        *exit_reason = MshvVmExitSpecial;
+        break;
+    }
+
+    return 0;
 }
 
 static int set_ioport_info(const struct hyperv_message *msg,
-- 
2.34.1


Reply via email to