From: Jagannathan Raman <jag.ra...@oracle.com> Add memory-listener object which is used to keep the view of the RAM in sync between QEMU and remote process. A MemoryListener is registered for system-memory AddressSpace. The listener sends SYNC_SYSMEM message to the remote process when memory listener commits the changes to memory, the remote process receives the message and processes it in the handler for SYNC_SYSMEM message.
Signed-off-by: Jagannathan Raman <jag.ra...@oracle.com> Signed-off-by: John G Johnson <john.g.john...@oracle.com> Signed-off-by: Elena Ufimtseva <elena.ufimts...@oracle.com> --- MAINTAINERS | 2 + hw/i386/remote-msg.c | 4 + hw/pci/Makefile.objs | 1 + hw/pci/memory-sync.c | 214 +++++++++++++++++++++++++++++++++++ hw/pci/proxy.c | 4 + include/hw/pci/memory-sync.h | 30 +++++ include/hw/pci/proxy.h | 3 + 7 files changed, 258 insertions(+) create mode 100644 hw/pci/memory-sync.c create mode 100644 include/hw/pci/memory-sync.h diff --git a/MAINTAINERS b/MAINTAINERS index b48c3114c1..38d605445e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2954,6 +2954,8 @@ F: include/hw/i386/remote-memory.h F: hw/i386/remote-memory.c F: hw/pci/proxy.c F: include/hw/pci/proxy.h +F: hw/pci/memory-sync.c +F: include/hw/pci/memory-sync.h Build and test automation ------------------------- diff --git a/hw/i386/remote-msg.c b/hw/i386/remote-msg.c index ffb4143736..48b153eaae 100644 --- a/hw/i386/remote-msg.c +++ b/hw/i386/remote-msg.c @@ -9,6 +9,7 @@ #include "io/channel-util.h" #include "hw/pci/pci.h" #include "exec/memattrs.h" +#include "hw/i386/remote-memory.h" static void process_connect_dev_msg(MPQemuMsg *msg, QIOChannel *com, Error **errp); @@ -63,6 +64,9 @@ gboolean mpqemu_process_msg(QIOChannel *ioc, GIOCondition cond, case BAR_READ: process_bar_read(ioc, &msg, &local_err); break; + case SYNC_SYSMEM: + remote_sysmem_reconfig(&msg, &local_err); + break; default: error_setg(&local_err, "Unknown command (%d) received from proxy \ in remote process pid=%d", msg.cmd, getpid()); diff --git a/hw/pci/Makefile.objs b/hw/pci/Makefile.objs index 515dda506c..c90acd5a6e 100644 --- a/hw/pci/Makefile.objs +++ b/hw/pci/Makefile.objs @@ -13,3 +13,4 @@ common-obj-$(CONFIG_PCI_EXPRESS) += pcie_port.o pcie_host.o common-obj-$(call lnot,$(CONFIG_PCI)) += pci-stub.o common-obj-$(CONFIG_ALL) += pci-stub.o obj-$(CONFIG_MPQEMU) += proxy.o +obj-$(CONFIG_MPQEMU) += memory-sync.o diff --git a/hw/pci/memory-sync.c b/hw/pci/memory-sync.c new file mode 100644 index 0000000000..5f867974c4 --- /dev/null +++ b/hw/pci/memory-sync.c @@ -0,0 +1,214 @@ +/* + * Copyright © 2018, 2020 Oracle and/or its affiliates. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#include "qemu/osdep.h" +#include "qemu-common.h" + +#include "qemu/compiler.h" +#include "qemu/int128.h" +#include "qemu/range.h" +#include "exec/memory.h" +#include "exec/cpu-common.h" +#include "cpu.h" +#include "exec/ram_addr.h" +#include "exec/address-spaces.h" +#include "io/mpqemu-link.h" +#include "hw/pci/memory-sync.h" + +static void proxy_ml_begin(MemoryListener *listener) +{ + RemoteMemSync *sync = container_of(listener, RemoteMemSync, listener); + int mrs; + + for (mrs = 0; mrs < sync->n_mr_sections; mrs++) { + memory_region_unref(sync->mr_sections[mrs].mr); + } + + g_free(sync->mr_sections); + sync->mr_sections = NULL; + sync->n_mr_sections = 0; +} + +static int get_fd_from_hostaddr(uint64_t host, ram_addr_t *offset) +{ + MemoryRegion *mr; + ram_addr_t off; + + /** + * Assumes that the host address is a valid address as it's + * coming from the MemoryListener system. In the case host + * address is not valid, the following call would return + * the default subregion of "system_memory" region, and + * not NULL. So it's not possible to check for NULL here. + */ + mr = memory_region_from_host((void *)(uintptr_t)host, &off); + + if (offset) { + *offset = off; + } + + return memory_region_get_fd(mr); +} + +static bool proxy_mrs_can_merge(uint64_t host, uint64_t prev_host, size_t size) +{ + bool merge; + int fd1, fd2; + + fd1 = get_fd_from_hostaddr(host, NULL); + + fd2 = get_fd_from_hostaddr(prev_host, NULL); + + merge = (fd1 == fd2); + + merge &= ((prev_host + size) == host); + + return merge; +} + +static bool try_merge(RemoteMemSync *sync, MemoryRegionSection *section) +{ + uint64_t mrs_size, mrs_gpa, mrs_page; + MemoryRegionSection *prev_sec; + bool merged = false; + uintptr_t mrs_host; + RAMBlock *mrs_rb; + + if (!sync->n_mr_sections) { + return false; + } + + mrs_rb = section->mr->ram_block; + mrs_page = (uint64_t)qemu_ram_pagesize(mrs_rb); + mrs_size = int128_get64(section->size); + mrs_gpa = section->offset_within_address_space; + mrs_host = (uintptr_t)memory_region_get_ram_ptr(section->mr) + + section->offset_within_region; + + if (get_fd_from_hostaddr(mrs_host, NULL) < 0) { + return true; + } + + mrs_host = mrs_host & ~(mrs_page - 1); + mrs_gpa = mrs_gpa & ~(mrs_page - 1); + mrs_size = ROUND_UP(mrs_size, mrs_page); + + if (sync->n_mr_sections) { + prev_sec = sync->mr_sections + (sync->n_mr_sections - 1); + uint64_t prev_gpa_start = prev_sec->offset_within_address_space; + uint64_t prev_size = int128_get64(prev_sec->size); + uint64_t prev_gpa_end = range_get_last(prev_gpa_start, prev_size); + uint64_t prev_host_start = + (uintptr_t)memory_region_get_ram_ptr(prev_sec->mr) + + prev_sec->offset_within_region; + uint64_t prev_host_end = range_get_last(prev_host_start, prev_size); + + if (mrs_gpa <= (prev_gpa_end + 1)) { + g_assert(mrs_gpa > prev_gpa_start); + + if ((section->mr == prev_sec->mr) && + proxy_mrs_can_merge(mrs_host, prev_host_start, + (mrs_gpa - prev_gpa_start))) { + uint64_t max_end = MAX(prev_host_end, mrs_host + mrs_size); + merged = true; + prev_sec->offset_within_address_space = + MIN(prev_gpa_start, mrs_gpa); + prev_sec->offset_within_region = + MIN(prev_host_start, mrs_host) - + (uintptr_t)memory_region_get_ram_ptr(prev_sec->mr); + prev_sec->size = int128_make64(max_end - MIN(prev_host_start, + mrs_host)); + } + } + } + + return merged; +} + +static void proxy_ml_region_addnop(MemoryListener *listener, + MemoryRegionSection *section) +{ + RemoteMemSync *sync = container_of(listener, RemoteMemSync, listener); + + if (!(memory_region_is_ram(section->mr) && + !memory_region_is_rom(section->mr))) { + return; + } + + if (try_merge(sync, section)) { + return; + } + + ++sync->n_mr_sections; + sync->mr_sections = g_renew(MemoryRegionSection, sync->mr_sections, + sync->n_mr_sections); + sync->mr_sections[sync->n_mr_sections - 1] = *section; + sync->mr_sections[sync->n_mr_sections - 1].fv = NULL; + memory_region_ref(section->mr); +} + +static void proxy_ml_commit(MemoryListener *listener) +{ + RemoteMemSync *sync = container_of(listener, RemoteMemSync, listener); + MPQemuMsg msg; + MemoryRegionSection section; + ram_addr_t offset; + uintptr_t host_addr; + int region; + + memset(&msg, 0, sizeof(MPQemuMsg)); + + msg.cmd = SYNC_SYSMEM; + msg.bytestream = 0; + msg.num_fds = sync->n_mr_sections; + msg.size = sizeof(msg.data1); + assert(msg.num_fds <= REMOTE_MAX_FDS); + + for (region = 0; region < sync->n_mr_sections; region++) { + section = sync->mr_sections[region]; + msg.data1.sync_sysmem.gpas[region] = + section.offset_within_address_space; + msg.data1.sync_sysmem.sizes[region] = int128_get64(section.size); + host_addr = (uintptr_t)memory_region_get_ram_ptr(section.mr) + + section.offset_within_region; + msg.fds[region] = get_fd_from_hostaddr(host_addr, &offset); + msg.data1.sync_sysmem.offsets[region] = offset; + } + mpqemu_msg_send(&msg, sync->ioc); +} + +void deconfigure_memory_sync(RemoteMemSync *sync) +{ + memory_listener_unregister(&sync->listener); +} + +/* + * TODO: Memory Sync need not be instantianted once per every proxy device. + * All remote devices are going to get the exact same updates at the + * same time. It therefore makes sense to have a broadcast model. + * + * Broadcast model would involve running the MemorySync object in a + * thread. MemorySync would contain a list of mpqemu-link objects + * that need notification. proxy_ml_commit() could send the same + * message to all the links at the same time. + */ +void configure_memory_sync(RemoteMemSync *sync, QIOChannel *ioc) +{ + sync->n_mr_sections = 0; + sync->mr_sections = NULL; + + sync->ioc = ioc; + + sync->listener.begin = proxy_ml_begin; + sync->listener.commit = proxy_ml_commit; + sync->listener.region_add = proxy_ml_region_addnop; + sync->listener.region_nop = proxy_ml_region_addnop; + sync->listener.priority = 10; + + memory_listener_register(&sync->listener, &address_space_memory); +} diff --git a/hw/pci/proxy.c b/hw/pci/proxy.c index fff021a06a..5ecbdd2dcf 100644 --- a/hw/pci/proxy.c +++ b/hw/pci/proxy.c @@ -17,6 +17,8 @@ #include "monitor/monitor.h" #include "io/mpqemu-link.h" #include "qemu/error-report.h" +#include "hw/pci/memory-sync.h" +#include "qom/object.h" static void proxy_set_socket(PCIProxyDev *pdev, int fd, Error **errp) { @@ -68,6 +70,8 @@ static void pci_proxy_dev_realize(PCIDevice *device, Error **errp) } proxy_set_socket(dev, proxyfd, errp); } + + configure_memory_sync(&dev->sync, dev->com); } static int config_op_send(PCIProxyDev *pdev, uint32_t addr, uint32_t *val, diff --git a/include/hw/pci/memory-sync.h b/include/hw/pci/memory-sync.h new file mode 100644 index 0000000000..3c9007f318 --- /dev/null +++ b/include/hw/pci/memory-sync.h @@ -0,0 +1,30 @@ +/* + * Copyright © 2018, 2020 Oracle and/or its affiliates. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#ifndef MEMORY_SYNC_H +#define MEMORY_SYNC_H + +#include "qemu/osdep.h" +#include "qemu-common.h" + +#include "exec/memory.h" +#include "io/channel.h" + +typedef struct RemoteMemSync { + MemoryListener listener; + + int n_mr_sections; + MemoryRegionSection *mr_sections; + + QIOChannel *ioc; +} RemoteMemSync; + +void configure_memory_sync(RemoteMemSync *sync, QIOChannel *ioc); +void deconfigure_memory_sync(RemoteMemSync *sync); + +#endif diff --git a/include/hw/pci/proxy.h b/include/hw/pci/proxy.h index 4f9f9c4e15..a41a6aeaa5 100644 --- a/include/hw/pci/proxy.h +++ b/include/hw/pci/proxy.h @@ -14,6 +14,7 @@ #include "hw/pci/pci.h" #include "io/channel.h" +#include "hw/pci/memory-sync.h" #define TYPE_PCI_PROXY_DEV "pci-proxy-dev" @@ -42,6 +43,8 @@ struct PCIProxyDev { QIOChannel *com; QIOChannel *dev; + RemoteMemSync sync; + ProxyMemoryRegion region[PCI_NUM_REGIONS]; }; -- 2.25.GIT