Qemu maps regions of userland multiple times into the guest. The MSHV kernel driver detects those overlapping regions and rejects those mappings.
A logic is introduced to track all mappings and replace a region on the fly if an unmapped gpa is encountered. If there is a region in the list that would qualify and is currently unmapped, the current region is unmapped and the requested region is mapped in. Signed-off-by: Magnus Kulke <magnusku...@linux.microsoft.com> --- accel/mshv/mem.c | 229 +++++++++++++++++++++++++++++++++++- accel/mshv/mshv-all.c | 2 + include/system/mshv.h | 13 ++ target/i386/mshv/mshv-cpu.c | 23 +++- 4 files changed, 265 insertions(+), 2 deletions(-) diff --git a/accel/mshv/mem.c b/accel/mshv/mem.c index ee627e7bd6..53e43873dc 100644 --- a/accel/mshv/mem.c +++ b/accel/mshv/mem.c @@ -12,7 +12,9 @@ */ #include "qemu/osdep.h" +#include "qemu/lockable.h" #include "qemu/error-report.h" +#include "qemu/rcu.h" #include "hw/hyperv/linux-mshv.h" #include "system/address-spaces.h" #include "system/mshv.h" @@ -20,12 +22,101 @@ #include <sys/ioctl.h> #include "trace.h" +static GList *mem_entries; + +/* We need this, because call_rcu1 won't operate on empty lists (NULL) */ +typedef struct { + struct rcu_head rcu; + GList *list; +} FreeMemEntriesJob; + +static inline void free_mem_entries(struct rcu_head *rh) +{ + FreeMemEntriesJob *job = container_of(rh, FreeMemEntriesJob, rcu); + g_list_free(job->list); + g_free(job); +} + +static void add_mem_entry(MshvMemoryEntry *entry) +{ + GList *old = qatomic_rcu_read(&mem_entries); + GList *new = g_list_copy(old); + new = g_list_prepend(new, entry); + + qatomic_rcu_set(&mem_entries, new); + + /* defer freeing of an obsolete snapshot */ + FreeMemEntriesJob *job = g_new(FreeMemEntriesJob, 1); + job->list = old; + call_rcu1(&job->rcu, free_mem_entries); +} + +static void remove_mem_entry(MshvMemoryEntry *entry) +{ + GList *old = qatomic_rcu_read(&mem_entries); + GList *new = g_list_copy(old); + new = g_list_remove(new, entry); + + qatomic_rcu_set(&mem_entries, new); + + /* Defer freeing of an obsolete snapshot */ + FreeMemEntriesJob *job = g_new(FreeMemEntriesJob, 1); + job->list = old; + call_rcu1((struct rcu_head *)old, free_mem_entries); +} + +/* Find _currently mapped_ memory entry, that is overlapping in userspace */ +static MshvMemoryEntry *find_overlap_mem_entry(const MshvMemoryEntry *entry_1) +{ + uint64_t start_1 = entry_1->mr.userspace_addr, start_2; + size_t len_1 = entry_1->mr.memory_size, len_2; + + WITH_RCU_READ_LOCK_GUARD() { + GList *entries = qatomic_rcu_read(&mem_entries); + bool overlaps; + MshvMemoryEntry *entry_2; + + for (GList *l = entries; l != NULL; l = l->next) { + entry_2 = l->data; + assert(entry_2); + + if (entry_2 == entry_1) { + continue; + } + + start_2 = entry_2->mr.userspace_addr; + len_2 = entry_2->mr.memory_size; + + overlaps = ranges_overlap(start_1, len_1, start_2, len_2); + if (entry_2 != entry_1 && entry_2->mapped && overlaps) { + return entry_2; + } + } + } + + return NULL; +} + +void mshv_init_mem_manager(void) +{ + mem_entries = NULL; +} + static int set_guest_memory(int vm_fd, const mshv_user_mem_region *region) { int ret; + MshvMemoryEntry *overlap_entry, entry = { .mr = { 0 }, .mapped = false }; ret = ioctl(vm_fd, MSHV_SET_GUEST_MEMORY, region); if (ret < 0) { + entry.mr.userspace_addr = region->userspace_addr; + entry.mr.memory_size = region->size; + + overlap_entry = find_overlap_mem_entry(&entry); + if (overlap_entry != NULL) { + return -MSHV_USERSPACE_ADDR_REMAP_ERROR; + } + error_report("failed to set guest memory"); return -errno; } @@ -54,6 +145,142 @@ static int map_or_unmap(int vm_fd, const MshvMemoryRegion *mr, bool add) return set_guest_memory(vm_fd, ®ion); } +static MshvMemoryEntry *find_mem_entry_by_region(const MshvMemoryRegion *mr) +{ + WITH_RCU_READ_LOCK_GUARD() { + GList *entries = qatomic_rcu_read(&mem_entries); + MshvMemoryEntry *entry; + + for (GList *l = entries; l != NULL; l = l->next) { + entry = l->data; + assert(entry); + if (memcmp(mr, &entry->mr, sizeof(MshvMemoryRegion)) == 0) { + return entry; + } + } + } + + return NULL; +} + +static inline int tracked_map_or_unmap(int vm_fd, const MshvMemoryRegion *mr, bool add) +{ + MshvMemoryEntry *entry; + int ret; + + entry = find_mem_entry_by_region(mr); + + if (!entry) { + /* delete */ + if (!add) { + error_report("mem entry selected for removal does not exist"); + return -1; + } + + /* add */ + ret = map_or_unmap(vm_fd, mr, true); + entry = g_new0(MshvMemoryEntry, 1); + entry->mr = *mr; + /* set depending on success */ + entry->mapped = (ret == 0); + add_mem_entry(entry); + + if (ret == -MSHV_USERSPACE_ADDR_REMAP_ERROR) { + warn_report( + "ignoring failed remapping userspace_addr=0x%016lx " + "gpa=0x%08lx size=0x%lx", mr->userspace_addr, + mr->guest_phys_addr, mr->memory_size); + ret = 0; + } + + return ret; + } + + /* entry exists */ + + /* delete */ + if (!add) { + ret = 0; + if (entry->mapped) { + ret = map_or_unmap(vm_fd, mr, false); + } + remove_mem_entry(entry); + g_free(entry); + return ret; + } + + /* add */ + ret = map_or_unmap(vm_fd, mr, true); + + /* set depending on success */ + entry->mapped = (ret == 0); + return ret; +} + +static MshvMemoryEntry* find_mem_entry_by_gpa(uint64_t gpa) +{ + WITH_RCU_READ_LOCK_GUARD() { + GList *entries = qatomic_rcu_read(&mem_entries); + MshvMemoryEntry *entry; + uint64_t gpa_offset; + + for (GList *l = entries; l != NULL; l = l->next) { + entry = l->data; + assert(entry); + gpa_offset = gpa - entry->mr.guest_phys_addr; + if (entry->mr.guest_phys_addr <= gpa + && gpa_offset < entry->mr.memory_size) { + return entry; + } + } + } + + return NULL; +} + +MshvRemapResult mshv_remap_overlapped_region(int vm_fd, uint64_t gpa) +{ + MshvMemoryEntry *gpa_entry, *overlap_entry; + int ret; + + /* return early if no entry is found */ + gpa_entry = find_mem_entry_by_gpa(gpa); + if (gpa_entry == NULL) { + return MshvRemapNoMapping; + } + + overlap_entry = find_overlap_mem_entry(gpa_entry); + if (overlap_entry == NULL) { + return MshvRemapNoOverlap; + } + + /* unmap overlapping region */ + ret = map_or_unmap(vm_fd, &overlap_entry->mr, false); + if (ret < 0) { + error_report("failed to unmap overlap region"); + abort(); + } + overlap_entry->mapped = false; + warn_report("mapped out userspace_addr=0x%016lx gpa=0x%010lx size=0x%lx", + overlap_entry->mr.userspace_addr, + overlap_entry->mr.guest_phys_addr, + overlap_entry->mr.memory_size); + + /* map region for gpa */ + ret = map_or_unmap(vm_fd, &gpa_entry->mr, true); + if (ret < 0) { + error_report("failed to map new region"); + abort(); + } + gpa_entry->mapped = true; + warn_report("mapped in userspace_addr=0x%016lx gpa=0x%010lx size=0x%lx", + gpa_entry->mr.userspace_addr, + gpa_entry->mr.guest_phys_addr, + gpa_entry->mr.memory_size); + + return MshvRemapOk; +} + static inline MemTxAttrs get_mem_attrs(bool is_secure_mode) { MemTxAttrs memattr = {0}; @@ -139,7 +366,7 @@ static int set_memory(const MshvMemoryRegion *mshv_mr, bool add) mshv_mr->memory_size, mshv_mr->userspace_addr, mshv_mr->readonly, ret); - return map_or_unmap(mshv_state->vm, mshv_mr, add); + return tracked_map_or_unmap(mshv_state->vm, mshv_mr, add); } /* diff --git a/accel/mshv/mshv-all.c b/accel/mshv/mshv-all.c index 97212c54f1..bf30c968ce 100644 --- a/accel/mshv/mshv-all.c +++ b/accel/mshv/mshv-all.c @@ -439,6 +439,8 @@ static int mshv_init(MachineState *ms) mshv_init_msicontrol(); + mshv_init_mem_manager(); + do { int vm_fd = create_vm(mshv_fd); s->vm = vm_fd; diff --git a/include/system/mshv.h b/include/system/mshv.h index 622b3db540..c4072b980f 100644 --- a/include/system/mshv.h +++ b/include/system/mshv.h @@ -147,6 +147,12 @@ typedef enum MshvVmExit { MshvVmExitSpecial = 2, } MshvVmExit; +typedef enum MshvRemapResult { + MshvRemapOk = 0, + MshvRemapNoMapping = 1, + MshvRemapNoOverlap = 2, +} MshvRemapResult; + void mshv_init_cpu_logic(void); int mshv_create_vcpu(int vm_fd, uint8_t vp_index, int *cpu_fd); void mshv_remove_vcpu(int vm_fd, int cpu_fd); @@ -199,8 +205,15 @@ typedef struct MshvMemoryRegion { bool readonly; } MshvMemoryRegion; +typedef struct MshvMemoryEntry { + MshvMemoryRegion mr; + bool mapped; +} MshvMemoryEntry; + +void mshv_init_mem_manager(void); int mshv_add_mem(int vm_fd, const MshvMemoryRegion *mr); int mshv_remove_mem(int vm_fd, const MshvMemoryRegion *mr); +MshvRemapResult mshv_remap_overlapped_region(int vm_fd, uint64_t gpa); int mshv_guest_mem_read(uint64_t gpa, uint8_t *data, uintptr_t size, bool is_secure_mode, bool instruction_fetch); int mshv_guest_mem_write(uint64_t gpa, const uint8_t *data, uintptr_t size, diff --git a/target/i386/mshv/mshv-cpu.c b/target/i386/mshv/mshv-cpu.c index 27c6cd6138..4c74081968 100644 --- a/target/i386/mshv/mshv-cpu.c +++ b/target/i386/mshv/mshv-cpu.c @@ -1159,7 +1159,9 @@ static int handle_unmapped_mem(int vm_fd, CPUState *cpu, MshvVmExit *exit_reason) { struct hv_x64_memory_intercept_message info = { 0 }; + uint64_t gpa; int ret; + enum MshvRemapResult remap_result; ret = set_memory_info(msg, &info); if (ret < 0) { @@ -1167,7 +1169,26 @@ static int handle_unmapped_mem(int vm_fd, CPUState *cpu, return -1; } - return handle_mmio(cpu, msg, exit_reason); + gpa = info.guest_physical_address; + + /* attempt to remap the region, in case of overlapping userspase mappings */ + remap_result = mshv_remap_overlapped_region(vm_fd, gpa); + *exit_reason = MshvVmExitIgnore; + + switch (remap_result) { + case MshvRemapNoMapping: + /* if we didn't find a mapping, it is probably mmio */ + return handle_mmio(cpu, msg, exit_reason); + case MshvRemapOk: + break; + case MshvRemapNoOverlap: + /* This should not happen, but we are forgiving it */ + warn_report("found no overlap for unmapped region"); + *exit_reason = MshvVmExitSpecial; + break; + } + + return 0; } static int set_ioport_info(const struct hyperv_message *msg, -- 2.34.1