Hi ----- Original Message ----- > comments below > > On 06/29/17 15:23, Marc-André Lureau wrote: > > The VM coreinfo (vmcoreinfo) device is an emulated device which > > exposes a 4k memory range to the guest to store various informations > > useful to debug the guest OS. (it is greatly inspired by the VMGENID > > device implementation) > > > > This is an early-boot alternative to the qemu-ga VMDUMP_INFO event > > proposed in "[PATCH 00/21] WIP: dump: add kaslr support". > > > > A proof-of-concept kernel module: > > https://github.com/elmarco/vmgenid-test/blob/master/qemuvmci-test.c > > > > Signed-off-by: Marc-André Lureau <marcandre.lur...@redhat.com> > > --- > > include/hw/acpi/aml-build.h | 1 + > > include/hw/acpi/vmcoreinfo.h | 36 +++++++ > > hw/acpi/aml-build.c | 2 + > > hw/acpi/vmcoreinfo.c | 198 > > +++++++++++++++++++++++++++++++++++++ > > hw/i386/acpi-build.c | 14 +++ > > default-configs/arm-softmmu.mak | 1 + > > default-configs/i386-softmmu.mak | 1 + > > default-configs/x86_64-softmmu.mak | 1 + > > docs/specs/vmcoreinfo.txt | 138 ++++++++++++++++++++++++++ > > hw/acpi/Makefile.objs | 1 + > > 10 files changed, 393 insertions(+) > > create mode 100644 include/hw/acpi/vmcoreinfo.h > > create mode 100644 hw/acpi/vmcoreinfo.c > > create mode 100644 docs/specs/vmcoreinfo.txt > > > > diff --git a/include/hw/acpi/aml-build.h b/include/hw/acpi/aml-build.h > > index 88d0738d76..cf781bcd34 100644 > > --- a/include/hw/acpi/aml-build.h > > +++ b/include/hw/acpi/aml-build.h > > @@ -211,6 +211,7 @@ struct AcpiBuildTables { > > GArray *rsdp; > > GArray *tcpalog; > > GArray *vmgenid; > > + GArray *vmcoreinfo; > > BIOSLinker *linker; > > } AcpiBuildTables; > > > > diff --git a/include/hw/acpi/vmcoreinfo.h b/include/hw/acpi/vmcoreinfo.h > > new file mode 100644 > > index 0000000000..40fe99c3ed > > --- /dev/null > > +++ b/include/hw/acpi/vmcoreinfo.h > > @@ -0,0 +1,36 @@ > > +#ifndef ACPI_VMCOREINFO_H > > +#define ACPI_VMCOREINFO_H > > + > > +#include "hw/acpi/bios-linker-loader.h" > > +#include "hw/qdev.h" > > + > > +#define VMCOREINFO_DEVICE "vmcoreinfo" > > +#define VMCOREINFO_FW_CFG_FILE "etc/vmcoreinfo" > > +#define VMCOREINFO_ADDR_FW_CFG_FILE "etc/vmcoreinfo-addr" > > + > > +#define VMCOREINFO_FW_CFG_SIZE 4096 /* Occupy a page of memory */ > > +#define VMCOREINFO_OFFSET 40 /* allow space for > > + * OVMF SDT Header Probe > > Supressor > > + */ > > + > > +#define VMCOREINFO(obj) OBJECT_CHECK(VmcoreinfoState, (obj), > > VMCOREINFO_DEVICE) > > + > > +typedef struct VmcoreinfoState { > > I think this should be spelled with a bit more camel-casing, like > VMCoreInfoState or some such. >
ok > > + DeviceClass parent_obj; > > + uint8_t vmcoreinfo_addr_le[8]; /* Address of memory region */ > > + bool write_pointer_available; > > +} VmcoreinfoState; > > + > > +/* returns NULL unless there is exactly one device */ > > +static inline Object *find_vmcoreinfo_dev(void) > > +{ > > + return object_resolve_path_type("", VMCOREINFO_DEVICE, NULL); > > +} > > + > > +void vmcoreinfo_build_acpi(VmcoreinfoState *vis, GArray *table_data, > > + GArray *vmci, BIOSLinker *linker); > > +void vmcoreinfo_add_fw_cfg(VmcoreinfoState *vis, FWCfgState *s, GArray > > *vmci); > > +bool vmcoreinfo_get(VmcoreinfoState *vis, uint64_t *paddr, uint64_t *size, > > + Error **errp); > > + > > +#endif > > diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c > > index 36a6cc450e..47043ade4a 100644 > > --- a/hw/acpi/aml-build.c > > +++ b/hw/acpi/aml-build.c > > @@ -1561,6 +1561,7 @@ void acpi_build_tables_init(AcpiBuildTables *tables) > > tables->table_data = g_array_new(false, true /* clear */, 1); > > tables->tcpalog = g_array_new(false, true /* clear */, 1); > > tables->vmgenid = g_array_new(false, true /* clear */, 1); > > + tables->vmcoreinfo = g_array_new(false, true /* clear */, 1); > > tables->linker = bios_linker_loader_init(); > > } > > > > @@ -1571,6 +1572,7 @@ void acpi_build_tables_cleanup(AcpiBuildTables > > *tables, bool mfre) > > g_array_free(tables->table_data, true); > > g_array_free(tables->tcpalog, mfre); > > g_array_free(tables->vmgenid, mfre); > > + g_array_free(tables->vmcoreinfo, mfre); > > } > > > > /* Build rsdt table */ > > diff --git a/hw/acpi/vmcoreinfo.c b/hw/acpi/vmcoreinfo.c > > new file mode 100644 > > index 0000000000..216e0bb83a > > --- /dev/null > > +++ b/hw/acpi/vmcoreinfo.c > > @@ -0,0 +1,198 @@ > > +/* > > + * Virtual Machine coreinfo device > > + * (based on Virtual Machine Generation ID Device) > > + * > > + * Copyright (C) 2017 Red Hat, Inc. > > + * Copyright (C) 2017 Skyport Systems. > > + * > > + * Authors: Marc-André Lureau <marcandre.lur...@redhat.com> > > + * Ben Warren <b...@skyportsystems.com> > > + * > > + * This work is licensed under the terms of the GNU GPL, version 2 or > > later. > > + * See the COPYING file in the top-level directory. > > + * > > + */ > > +#include "qemu/osdep.h" > > +#include "hw/acpi/acpi.h" > > +#include "hw/acpi/aml-build.h" > > +#include "hw/acpi/vmcoreinfo.h" > > +#include "hw/nvram/fw_cfg.h" > > +#include "sysemu/sysemu.h" > > +#include "qapi/error.h" > > + > > +void vmcoreinfo_build_acpi(VmcoreinfoState *vis, GArray *table_data, > > + GArray *vmci, BIOSLinker *linker) > > +{ > > + Aml *ssdt, *dev, *scope, *method, *addr, *if_ctx; > > + uint32_t vgia_offset; > > This should be called "vcia_offset". ok > > > + > > + g_array_set_size(vmci, VMCOREINFO_FW_CFG_SIZE); > > + > > + /* Put this in a separate SSDT table */ > > + ssdt = init_aml_allocator(); > > + > > + /* Reserve space for header */ > > + acpi_data_push(ssdt->buf, sizeof(AcpiTableHeader)); > > + > > + /* Storage address */ > > + vgia_offset = table_data->len + > > + build_append_named_dword(ssdt->buf, "VCIA"); > > + scope = aml_scope("\\_SB"); > > + dev = aml_device("VMCI"); > > + aml_append(dev, aml_name_decl("_HID", aml_string("QEMUVMCI"))); > > + > > + /* Simple status method to check that address is linked and non-zero > > */ > > + method = aml_method("_STA", 0, AML_NOTSERIALIZED); > > + addr = aml_local(0); > > + aml_append(method, aml_store(aml_int(0xf), addr)); > > + if_ctx = aml_if(aml_equal(aml_name("VCIA"), aml_int(0))); > > + aml_append(if_ctx, aml_store(aml_int(0), addr)); > > + aml_append(method, if_ctx); > > + aml_append(method, aml_return(addr)); > > + aml_append(dev, method); > > + > > + /* the ADDR method returns two 32-bit words representing the lower and > > + * upper halves of the physical address of the vmcoreinfo area > > + */ > > + method = aml_method("ADDR", 0, AML_NOTSERIALIZED); > > + > > + addr = aml_local(0); > > + aml_append(method, aml_store(aml_package(2), addr)); > > + > > + aml_append(method, aml_store(aml_add(aml_name("VCIA"), > > + aml_int(VMCOREINFO_OFFSET), > > NULL), > > + aml_index(addr, aml_int(0)))); > > + aml_append(method, aml_store(aml_int(0), aml_index(addr, > > aml_int(1)))); > > + aml_append(method, aml_return(addr)); > > + > > + aml_append(dev, method); > > + aml_append(scope, dev); > > + aml_append(ssdt, scope); > > + > > + g_array_append_vals(table_data, ssdt->buf->data, ssdt->buf->len); > > + > > + /* Allocate guest memory */ > > + bios_linker_loader_alloc(linker, VMCOREINFO_FW_CFG_FILE, vmci, 4096, > > + false /* page boundary, high memory */); > > + > > + /* Patch address of vmcoreinfo fw_cfg blob into the ADDR fw_cfg > > + * blob so QEMU can read the info from there. The address is > > + * expected to be < 4GB, but write 64 bits anyway. > > + * The address that is patched in is offset in order to implement > > + * the "OVMF SDT Header probe suppressor" > > + * see docs/specs/vmcoreinfo.txt for more details. > > + */ > > + bios_linker_loader_write_pointer(linker, > > + VMCOREINFO_ADDR_FW_CFG_FILE, 0, sizeof(uint64_t), > > + VMCOREINFO_FW_CFG_FILE, VMCOREINFO_OFFSET); > > + > > + /* Patch address of vmcoreinfo into the AML so OSPM can retrieve > > + * and read it. Note that while we provide storage for 64 bits, only > > + * the least-signficant 32 get patched into AML. > > + */ > > + bios_linker_loader_add_pointer(linker, > > + ACPI_BUILD_TABLE_FILE, vgia_offset, sizeof(uint32_t), > > + VMCOREINFO_FW_CFG_FILE, 0); > > + > > + build_header(linker, table_data, > > + (void *)(table_data->data + table_data->len - ssdt->buf->len), > > + "SSDT", ssdt->buf->len, 1, NULL, "VMCOREIN"); > > + free_aml_allocator(); > > +} > > + > > +void vmcoreinfo_add_fw_cfg(VmcoreinfoState *vis, FWCfgState *s, GArray > > *vmci) > > +{ > > + /* Create a read-only fw_cfg file for vmcoreinfo allocation */ > > + /* XXX: linker could learn to allocate without backing fw_cfg? */ > > Yes, and a number of other things, as I'm sure Igor and MST will point > out upon reading the patch :) > > > + fw_cfg_add_file(s, VMCOREINFO_FW_CFG_FILE, vmci->data, > > + VMCOREINFO_FW_CFG_SIZE); > > + /* Create a read-write fw_cfg file for Address */ > > + fw_cfg_add_file_callback(s, VMCOREINFO_ADDR_FW_CFG_FILE, NULL, NULL, > > + vis->vmcoreinfo_addr_le, > > + ARRAY_SIZE(vis->vmcoreinfo_addr_le), false); > > +} > > + > > +bool vmcoreinfo_get(VmcoreinfoState *vis, > > + uint64_t *paddr, uint64_t *size, > > + Error **errp) > > +{ > > + uint32_t vmcoreinfo_addr; > > + uint32_t version; > > + > > + assert(vis); > > + assert(paddr); > > + assert(size); > > + > > + memcpy(&vmcoreinfo_addr, vis->vmcoreinfo_addr_le, > > sizeof(vmcoreinfo_addr)); > > + vmcoreinfo_addr = le32_to_cpu(vmcoreinfo_addr); > > + if (!vmcoreinfo_addr) { > > + error_setg(errp, "BIOS has not yet written the address of %s", > > + VMCOREINFO_DEVICE); > > + return false; > > + } > > + > > + cpu_physical_memory_read(vmcoreinfo_addr, &version, sizeof(version)); > > + if (version != 0) { > > + error_setg(errp, "Unknown %s memory version", VMCOREINFO_DEVICE); > > + return false; > > + } > > + > > + cpu_physical_memory_read(vmcoreinfo_addr + 4, paddr, sizeof(paddr)); > > This is wrong, it should be sizeof(*paddr). right > > > + *paddr = le64_to_cpu(*paddr); > > + cpu_physical_memory_read(vmcoreinfo_addr + 12, size, sizeof(size)); > > This is wrong for two reasons: > - first, it should be sizeof(*size), ok > - second, sizeof(*size) is 8, however, according to the design, the size > field is 4 bytes wide. I think the function prototype should be > updated to fix this. > yes > > + *size = le32_to_cpu(*size); > > + > > + return true; > > +} > > + > > +static const VMStateDescription vmstate_vmcoreinfo = { > > + .name = "vmcoreinfo", > > + .version_id = 1, > > + .minimum_version_id = 1, > > + .fields = (VMStateField[]) { > > + VMSTATE_UINT8_ARRAY(vmcoreinfo_addr_le, VmcoreinfoState, > > sizeof(uint64_t)), > > + VMSTATE_END_OF_LIST() > > + }, > > +}; > > + > > +static void vmcoreinfo_realize(DeviceState *dev, Error **errp) > > +{ > > + if (!bios_linker_loader_can_write_pointer()) { > > + error_setg(errp, "%s requires DMA write support in fw_cfg, " > > + "which this machine type does not provide", > > + VMCOREINFO_DEVICE); > > + return; > > + } > > + > > + /* Given that this function is executing, there is at least one > > VMCOREINFO > > + * device. Check if there are several. > > + */ > > + if (!find_vmcoreinfo_dev()) { > > + error_setg(errp, "at most one %s device is permitted", > > + VMCOREINFO_DEVICE); > > + return; > > + } > > +} > > You didn't copy the reset logic from vmgenid, but that's wrong -- all > device models that produce WRITE_POINTER linker/loader commands must > forget about guest-returned GPAs upon reset. Think of it this way: when > the guest executes WRITE_POINTER, it creates a reference to guest-phys > memory in QEMU; and when the guest initiates a reboot, guest-phys memory > is fully "invalidated", so all such references must be dropped in QEMU. > > (S3 resume is an exception, because guest memory is preserved, but both > SeaBIOS and OVMF handle that specially -- they stash the WRITE_POINTER > commands in a condensed format during normal boot, and then replay them > during S3 resume. Thus, they restore the QEMU references discussed > abovem at S3 resume.) > > In more practical terms, assume the guest boots Linux, the address is > passed back fine, then the Linux guest reboots, and then some other > guest OS is launched from the guest firmware and/or bootloader (or we > just stay in the firmware / bootloader indefinitely). If a dump is > requested at this point, QEMU shouldn't go looking for the VMCOREINFO > ELF note at the previously communicated address. > Good point, I didn't realize it. > The rest looks OK to me. Thanks