On Fri, Jul 14, 2017 at 08:20:05PM +0200, Marc-André Lureau wrote: > The VM coreinfo (vmcoreinfo) device is an emulated device which > exposes a 4k memory range to the guest to store various informations > useful to debug the guest OS. (it is greatly inspired by the VMGENID > device implementation) > > This is an early-boot alternative to the qemu-ga VMDUMP_INFO event > proposed in "[PATCH 00/21] WIP: dump: add kaslr support". > > A proof-of-concept kernel module: > https://github.com/elmarco/vmgenid-test/blob/master/qemuvmci-test.c > > Signed-off-by: Marc-André Lureau <marcandre.lur...@redhat.com> > Reviewed-by: Laszlo Ersek <ler...@redhat.com>
It worries me that the format of this seems completely undefined except in the patchset cover letter. I don't think we should merge this before it is. > --- > include/hw/acpi/aml-build.h | 1 + > include/hw/acpi/vmcoreinfo.h | 37 +++++++ > hw/acpi/aml-build.c | 2 + > hw/acpi/vmcoreinfo.c | 208 > +++++++++++++++++++++++++++++++++++++ > hw/i386/acpi-build.c | 14 +++ > default-configs/arm-softmmu.mak | 1 + > default-configs/i386-softmmu.mak | 1 + > default-configs/x86_64-softmmu.mak | 1 + > docs/specs/vmcoreinfo.txt | 138 ++++++++++++++++++++++++ > hw/acpi/Makefile.objs | 1 + > 10 files changed, 404 insertions(+) > create mode 100644 include/hw/acpi/vmcoreinfo.h > create mode 100644 hw/acpi/vmcoreinfo.c > create mode 100644 docs/specs/vmcoreinfo.txt > > diff --git a/include/hw/acpi/aml-build.h b/include/hw/acpi/aml-build.h > index 88d0738d76..cf781bcd34 100644 > --- a/include/hw/acpi/aml-build.h > +++ b/include/hw/acpi/aml-build.h > @@ -211,6 +211,7 @@ struct AcpiBuildTables { > GArray *rsdp; > GArray *tcpalog; > GArray *vmgenid; > + GArray *vmcoreinfo; > BIOSLinker *linker; > } AcpiBuildTables; > > diff --git a/include/hw/acpi/vmcoreinfo.h b/include/hw/acpi/vmcoreinfo.h > new file mode 100644 > index 0000000000..6a73bcd1b2 > --- /dev/null > +++ b/include/hw/acpi/vmcoreinfo.h > @@ -0,0 +1,37 @@ > +#ifndef ACPI_VMCOREINFO_H > +#define ACPI_VMCOREINFO_H > + > +#include "hw/acpi/bios-linker-loader.h" > +#include "hw/qdev.h" > + > +#define VMCOREINFO_DEVICE "vmcoreinfo" > +#define VMCOREINFO_FW_CFG_FILE "etc/vmcoreinfo" > +#define VMCOREINFO_ADDR_FW_CFG_FILE "etc/vmcoreinfo-addr" > + > +/* Occupy a page of memory */ > +#define VMCOREINFO_FW_CFG_SIZE 4096 > + > +/* allow space for OVMF SDT Header Probe Supressor */ > +#define VMCOREINFO_OFFSET sizeof(AcpiTableHeader) > + > +#define VMCOREINFO(obj) OBJECT_CHECK(VMCoreInfoState, (obj), > VMCOREINFO_DEVICE) > + > +typedef struct VMCoreInfoState { > + DeviceClass parent_obj; > + uint8_t vmcoreinfo_addr_le[8]; /* Address of memory region */ > + bool write_pointer_available; > +} VMCoreInfoState; > + > +/* returns NULL unless there is exactly one device */ > +static inline Object *find_vmcoreinfo_dev(void) > +{ > + return object_resolve_path_type("", VMCOREINFO_DEVICE, NULL); > +} > + > +void vmcoreinfo_build_acpi(VMCoreInfoState *vis, GArray *table_data, > + GArray *vmci, BIOSLinker *linker); > +void vmcoreinfo_add_fw_cfg(VMCoreInfoState *vis, FWCfgState *s, GArray > *vmci); > +bool vmcoreinfo_get(VMCoreInfoState *vis, uint64_t *paddr, uint32_t *size, > + Error **errp); > + > +#endif > diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c > index 36a6cc450e..47043ade4a 100644 > --- a/hw/acpi/aml-build.c > +++ b/hw/acpi/aml-build.c > @@ -1561,6 +1561,7 @@ void acpi_build_tables_init(AcpiBuildTables *tables) > tables->table_data = g_array_new(false, true /* clear */, 1); > tables->tcpalog = g_array_new(false, true /* clear */, 1); > tables->vmgenid = g_array_new(false, true /* clear */, 1); > + tables->vmcoreinfo = g_array_new(false, true /* clear */, 1); > tables->linker = bios_linker_loader_init(); > } > > @@ -1571,6 +1572,7 @@ void acpi_build_tables_cleanup(AcpiBuildTables *tables, > bool mfre) > g_array_free(tables->table_data, true); > g_array_free(tables->tcpalog, mfre); > g_array_free(tables->vmgenid, mfre); > + g_array_free(tables->vmcoreinfo, mfre); > } > > /* Build rsdt table */ > diff --git a/hw/acpi/vmcoreinfo.c b/hw/acpi/vmcoreinfo.c > new file mode 100644 > index 0000000000..0ea41de8d9 > --- /dev/null > +++ b/hw/acpi/vmcoreinfo.c > @@ -0,0 +1,208 @@ > +/* > + * Virtual Machine coreinfo device > + * (based on Virtual Machine Generation ID Device) > + * > + * Copyright (C) 2017 Red Hat, Inc. > + * Copyright (C) 2017 Skyport Systems. > + * > + * Authors: Marc-André Lureau <marcandre.lur...@redhat.com> > + * Ben Warren <b...@skyportsystems.com> > + * > + * This work is licensed under the terms of the GNU GPL, version 2 or later. > + * See the COPYING file in the top-level directory. > + * > + */ > +#include "qemu/osdep.h" > +#include "hw/acpi/acpi.h" > +#include "hw/acpi/aml-build.h" > +#include "hw/acpi/vmcoreinfo.h" > +#include "hw/nvram/fw_cfg.h" > +#include "sysemu/sysemu.h" > +#include "qapi/error.h" > + > +void vmcoreinfo_build_acpi(VMCoreInfoState *vis, GArray *table_data, > + GArray *vmci, BIOSLinker *linker) > +{ > + Aml *ssdt, *dev, *scope, *method, *addr, *if_ctx; > + uint32_t vcia_offset; > + > + g_array_set_size(vmci, VMCOREINFO_FW_CFG_SIZE); > + > + /* Put this in a separate SSDT table */ > + ssdt = init_aml_allocator(); > + > + /* Reserve space for header */ > + acpi_data_push(ssdt->buf, sizeof(AcpiTableHeader)); > + > + /* Storage address */ > + vcia_offset = table_data->len + > + build_append_named_dword(ssdt->buf, "VCIA"); > + scope = aml_scope("\\_SB"); > + dev = aml_device("VMCI"); > + aml_append(dev, aml_name_decl("_HID", aml_string("QEMUVMCI"))); > + > + /* Simple status method to check that address is linked and non-zero */ > + method = aml_method("_STA", 0, AML_NOTSERIALIZED); > + addr = aml_local(0); > + aml_append(method, aml_store(aml_int(0xf), addr)); > + if_ctx = aml_if(aml_equal(aml_name("VCIA"), aml_int(0))); > + aml_append(if_ctx, aml_store(aml_int(0), addr)); > + aml_append(method, if_ctx); > + aml_append(method, aml_return(addr)); > + aml_append(dev, method); > + > + /* the ADDR method returns two 32-bit words representing the lower and > + * upper halves of the physical address of the vmcoreinfo area > + */ > + method = aml_method("ADDR", 0, AML_NOTSERIALIZED); > + > + addr = aml_local(0); > + aml_append(method, aml_store(aml_package(2), addr)); > + > + aml_append(method, aml_store(aml_add(aml_name("VCIA"), > + aml_int(VMCOREINFO_OFFSET), NULL), > + aml_index(addr, aml_int(0)))); > + aml_append(method, aml_store(aml_int(0), aml_index(addr, aml_int(1)))); > + aml_append(method, aml_return(addr)); > + > + aml_append(dev, method); > + aml_append(scope, dev); > + aml_append(ssdt, scope); > + > + g_array_append_vals(table_data, ssdt->buf->data, ssdt->buf->len); > + > + /* Allocate guest memory */ > + bios_linker_loader_alloc(linker, VMCOREINFO_FW_CFG_FILE, vmci, 4096, > + false /* page boundary, high memory */); > + > + /* Patch address of vmcoreinfo fw_cfg blob into the ADDR fw_cfg > + * blob so QEMU can read the info from there. The address is > + * expected to be < 4GB, but write 64 bits anyway. > + * The address that is patched in is offset in order to implement > + * the "OVMF SDT Header probe suppressor" > + * see docs/specs/vmcoreinfo.txt for more details. > + */ > + bios_linker_loader_write_pointer(linker, > + VMCOREINFO_ADDR_FW_CFG_FILE, 0, sizeof(uint64_t), > + VMCOREINFO_FW_CFG_FILE, VMCOREINFO_OFFSET); > + > + /* Patch address of vmcoreinfo into the AML so OSPM can retrieve > + * and read it. Note that while we provide storage for 64 bits, only > + * the least-signficant 32 get patched into AML. > + */ > + bios_linker_loader_add_pointer(linker, > + ACPI_BUILD_TABLE_FILE, vcia_offset, sizeof(uint32_t), > + VMCOREINFO_FW_CFG_FILE, 0); > + > + build_header(linker, table_data, > + (void *)(table_data->data + table_data->len - ssdt->buf->len), > + "SSDT", ssdt->buf->len, 1, NULL, "VMCOREIN"); > + free_aml_allocator(); > +} > + > +void vmcoreinfo_add_fw_cfg(VMCoreInfoState *vis, FWCfgState *s, GArray *vmci) > +{ > + /* Create a read-only fw_cfg file for vmcoreinfo allocation */ > + /* XXX: linker could learn to allocate without backing fw_cfg? */ > + fw_cfg_add_file(s, VMCOREINFO_FW_CFG_FILE, vmci->data, > + VMCOREINFO_FW_CFG_SIZE); > + /* Create a read-write fw_cfg file for Address */ > + fw_cfg_add_file_callback(s, VMCOREINFO_ADDR_FW_CFG_FILE, NULL, NULL, > + vis->vmcoreinfo_addr_le, > + ARRAY_SIZE(vis->vmcoreinfo_addr_le), false); > +} > + > +bool vmcoreinfo_get(VMCoreInfoState *vis, > + uint64_t *paddr, uint32_t *size, > + Error **errp) > +{ > + uint32_t vmcoreinfo_addr; > + uint32_t version; > + > + assert(vis); > + assert(paddr); > + assert(size); > + > + memcpy(&vmcoreinfo_addr, vis->vmcoreinfo_addr_le, > sizeof(vmcoreinfo_addr)); > + vmcoreinfo_addr = le32_to_cpu(vmcoreinfo_addr); > + if (!vmcoreinfo_addr) { > + error_setg(errp, "BIOS has not yet written the address of %s", > + VMCOREINFO_DEVICE); > + return false; > + } > + > + cpu_physical_memory_read(vmcoreinfo_addr, &version, sizeof(version)); > + if (version != 0) { > + error_setg(errp, "Unknown %s memory version", VMCOREINFO_DEVICE); > + return false; > + } > + > + cpu_physical_memory_read(vmcoreinfo_addr + 4, paddr, sizeof(*paddr)); > + *paddr = le64_to_cpu(*paddr); > + cpu_physical_memory_read(vmcoreinfo_addr + 12, size, sizeof(*size)); > + *size = le32_to_cpu(*size); > + > + return true; > +} > + > +static const VMStateDescription vmstate_vmcoreinfo = { > + .name = "vmcoreinfo", > + .version_id = 1, > + .minimum_version_id = 1, > + .fields = (VMStateField[]) { > + VMSTATE_UINT8_ARRAY(vmcoreinfo_addr_le, VMCoreInfoState, > sizeof(uint64_t)), > + VMSTATE_END_OF_LIST() > + }, > +}; > + > +static void vmcoreinfo_handle_reset(void *opaque) > +{ > + VMCoreInfoState *vis = VMCOREINFO(opaque); > + > + /* Clear the guest-allocated address when the VM resets */ > + memset(vis->vmcoreinfo_addr_le, 0, ARRAY_SIZE(vis->vmcoreinfo_addr_le)); > +} > + > +static void vmcoreinfo_realize(DeviceState *dev, Error **errp) > +{ > + if (!bios_linker_loader_can_write_pointer()) { > + error_setg(errp, "%s requires DMA write support in fw_cfg, " > + "which this machine type does not provide", > + VMCOREINFO_DEVICE); > + return; > + } > + > + /* Given that this function is executing, there is at least one > VMCOREINFO > + * device. Check if there are several. > + */ > + if (!find_vmcoreinfo_dev()) { > + error_setg(errp, "at most one %s device is permitted", > + VMCOREINFO_DEVICE); > + return; > + } > + > + qemu_register_reset(vmcoreinfo_handle_reset, dev); > +} > + > +static void vmcoreinfo_device_class_init(ObjectClass *klass, void *data) > +{ > + DeviceClass *dc = DEVICE_CLASS(klass); > + > + dc->vmsd = &vmstate_vmcoreinfo; > + dc->realize = vmcoreinfo_realize; > + dc->hotpluggable = false; > +} > + > +static const TypeInfo vmcoreinfo_device_info = { > + .name = VMCOREINFO_DEVICE, > + .parent = TYPE_DEVICE, > + .instance_size = sizeof(VMCoreInfoState), > + .class_init = vmcoreinfo_device_class_init, > +}; > + > +static void vmcoreinfo_register_types(void) > +{ > + type_register_static(&vmcoreinfo_device_info); > +} > + > +type_init(vmcoreinfo_register_types) > diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c > index 6b7bade183..7ac529680e 100644 > --- a/hw/i386/acpi-build.c > +++ b/hw/i386/acpi-build.c > @@ -43,6 +43,7 @@ > #include "sysemu/tpm.h" > #include "hw/acpi/tpm.h" > #include "hw/acpi/vmgenid.h" > +#include "hw/acpi/vmcoreinfo.h" > #include "sysemu/tpm_backend.h" > #include "hw/timer/mc146818rtc_regs.h" > #include "sysemu/numa.h" > @@ -2631,6 +2632,7 @@ void acpi_build(AcpiBuildTables *tables, MachineState > *machine) > GArray *tables_blob = tables->table_data; > AcpiSlicOem slic_oem = { .id = NULL, .table_id = NULL }; > Object *vmgenid_dev; > + Object *vmcoreinfo_dev; > > acpi_get_pm_info(&pm); > acpi_get_misc_info(&misc); > @@ -2680,6 +2682,12 @@ void acpi_build(AcpiBuildTables *tables, MachineState > *machine) > vmgenid_build_acpi(VMGENID(vmgenid_dev), tables_blob, > tables->vmgenid, tables->linker); > } > + vmcoreinfo_dev = find_vmcoreinfo_dev(); > + if (vmcoreinfo_dev) { > + acpi_add_table(table_offsets, tables_blob); > + vmcoreinfo_build_acpi(VMCOREINFO(vmcoreinfo_dev), tables_blob, > + tables->vmcoreinfo, tables->linker); > + } > > if (misc.has_hpet) { > acpi_add_table(table_offsets, tables_blob); > @@ -2856,6 +2864,7 @@ void acpi_setup(void) > AcpiBuildTables tables; > AcpiBuildState *build_state; > Object *vmgenid_dev; > + Object *vmcoreinfo_dev; > > if (!pcms->fw_cfg) { > ACPI_BUILD_DPRINTF("No fw cfg. Bailing out.\n"); > @@ -2897,6 +2906,11 @@ void acpi_setup(void) > vmgenid_add_fw_cfg(VMGENID(vmgenid_dev), pcms->fw_cfg, > tables.vmgenid); > } > + vmcoreinfo_dev = find_vmcoreinfo_dev(); > + if (vmcoreinfo_dev) { > + vmcoreinfo_add_fw_cfg(VMCOREINFO(vmcoreinfo_dev), pcms->fw_cfg, > + tables.vmcoreinfo); > + } > > if (!pcmc->rsdp_in_ram) { > /* > diff --git a/default-configs/arm-softmmu.mak b/default-configs/arm-softmmu.mak > index 93e995d318..320dd3680d 100644 > --- a/default-configs/arm-softmmu.mak > +++ b/default-configs/arm-softmmu.mak > @@ -120,6 +120,7 @@ CONFIG_XIO3130=y > CONFIG_IOH3420=y > CONFIG_I82801B11=y > CONFIG_ACPI=y > +CONFIG_ACPI_VMCOREINFO=y > CONFIG_SMBIOS=y > CONFIG_ASPEED_SOC=y > CONFIG_GPIO_KEY=y > diff --git a/default-configs/i386-softmmu.mak > b/default-configs/i386-softmmu.mak > index d2ab2f6655..df68628895 100644 > --- a/default-configs/i386-softmmu.mak > +++ b/default-configs/i386-softmmu.mak > @@ -59,3 +59,4 @@ CONFIG_SMBIOS=y > CONFIG_HYPERV_TESTDEV=$(CONFIG_KVM) > CONFIG_PXB=y > CONFIG_ACPI_VMGENID=y > +CONFIG_ACPI_VMCOREINFO=y > diff --git a/default-configs/x86_64-softmmu.mak > b/default-configs/x86_64-softmmu.mak > index 9bde2f1c4b..e39ad5a680 100644 > --- a/default-configs/x86_64-softmmu.mak > +++ b/default-configs/x86_64-softmmu.mak > @@ -59,3 +59,4 @@ CONFIG_SMBIOS=y > CONFIG_HYPERV_TESTDEV=$(CONFIG_KVM) > CONFIG_PXB=y > CONFIG_ACPI_VMGENID=y > +CONFIG_ACPI_VMCOREINFO=y > diff --git a/docs/specs/vmcoreinfo.txt b/docs/specs/vmcoreinfo.txt > new file mode 100644 > index 0000000000..36d5a39ab1 > --- /dev/null > +++ b/docs/specs/vmcoreinfo.txt > @@ -0,0 +1,138 @@ > +VIRTUAL MACHINE COREINFO DEVICE > +=============================== > + > +Copyright (C) 2017 Red Hat, Inc. > + > +This work is licensed under the terms of the GNU GPL, version 2 or later. > +See the COPYING file in the top-level directory. > + > +=== > + > +The VM coreinfo (vmcoreinfo) device is an emulated device which > +exposes a 4k memory range to the guest to store various informations > +useful to debug the guest OS. > + > +QEMU Implementation > +------------------- > + > +The vmcoreinfo device is put in its own ACPI descriptor table, in a > +Secondary System Description Table, or SSDT. > + > +The following is a dump of the contents from a running system: > + > +# iasl -p ./SSDT -d /sys/firmware/acpi/tables/SSDT > +/* > + * Intel ACPI Component Architecture > + * AML/ASL+ Disassembler version 20160831-64 > + * Copyright (c) 2000 - 2016 Intel Corporation > + * > + * Disassembling to symbolic ASL+ operators > + * > + * Disassembly of /sys/firmware/acpi/tables/SSDT, Mon Apr 24 15:59:53 2017 > + * > + * Original Table Header: > + * Signature "SSDT" > + * Length 0x00000086 (134) > + * Revision 0x01 > + * Checksum 0x5C > + * OEM ID "BOCHS " > + * OEM Table ID "VMCOREIN" > + * OEM Revision 0x00000001 (1) > + * Compiler ID "BXPC" > + * Compiler Version 0x00000001 (1) > + */ > +DefinitionBlock ("", "SSDT", 1, "BOCHS ", "VMCOREIN", 0x00000001) > +{ > + Name (VCIA, 0x3FFFF000) > + Scope (\_SB) > + { > + Device (VMCI) > + { > + Name (_HID, "QEMUVMCI") // _HID: Hardware ID > + Method (_STA, 0, NotSerialized) // _STA: Status > + { > + Local0 = 0x0F > + If (VCIA == Zero) > + { > + Local0 = Zero > + } > + > + Return (Local0) > + } > + > + Method (ADDR, 0, NotSerialized) > + { > + Local0 = Package (0x02) {} > + Local0 [Zero] = (VCIA + 0x24) > + Local0 [One] = Zero > + Return (Local0) > + } > + } > + } > +} > + > + > +Design Details: > +--------------- > + > +QEMU must be able to read the contents of the device memory, > +specifically when starting a memory dump. In order to do this, QEMU > +must know the address that has been allocated. > + > +The mechanism chosen for this memory sharing is writeable fw_cfg blobs. > +These are data object that are visible to both QEMU and guests, and are > +addressable as sequential files. > + > +More information about fw_cfg can be found in "docs/specs/fw_cfg.txt" > + > +Two fw_cfg blobs are used in this case: > + > +/etc/vmcoreinfo - used to allocate memory range, read-only to the guest > +/etc/vmcoreinfo-addr - contains the address of the allocated range > + - writeable by the guest > + > + > +QEMU sends the following commands to the guest at startup: > + > +1. Allocate memory for vmcoreinfo fw_cfg blob. > +2. Write the address of vmcoreinfo into the SSDT (VCIA ACPI variable as > + shown above in the iasl dump). Note that this change is not propagated > + back to QEMU. > +3. Write the address of vmcoreinfo back to QEMU's copy of vmcoreinfo-addr > + via the fw_cfg DMA interface. > + > +After step 3, QEMU is able to read the contents of vmcoreinfo. > + > +The value of VCIA is persisted via the VMState mechanism. > + > + > +Storage Format: > +--------------- > + > +The content is expected to use little-endian format. > + > +In order to implement an OVMF "SDT Header Probe Suppressor", the contents of > +the vmcoreinfo blob has 36 bytes of padding: > + > ++-----------------------------------+ > +| SSDT with OEM Table ID = VMCOREIN | > ++-----------------------------------+ > +| ... | TOP OF PAGE > +| VCIA dword object ----------------|-----> +---------------------------+ > +| ... | | fw-allocated array for | > +| _STA method referring to VCIA | | "etc/vmcoreinfo" | > +| ... | +---------------------------+ > +| ADDR method referring to VCIA | | 0: OVMF SDT Header probe | > +| ... | | suppressor | > ++-----------------------------------+ | 36: uint32 version field | > + | 40: info contents | > + | .... | > + +---------------------------+ > + END OF PAGE > + > +Version 0 content: > + > + uint64 paddr: > + Physical address of the Linux vmcoreinfo ELF note. > + uint32 size: > + Size of the vmcoreinfo ELF note. > diff --git a/hw/acpi/Makefile.objs b/hw/acpi/Makefile.objs > index 11c35bcb44..9623078f95 100644 > --- a/hw/acpi/Makefile.objs > +++ b/hw/acpi/Makefile.objs > @@ -6,6 +6,7 @@ common-obj-$(CONFIG_ACPI_MEMORY_HOTPLUG) += memory_hotplug.o > common-obj-$(CONFIG_ACPI_CPU_HOTPLUG) += cpu.o > common-obj-$(CONFIG_ACPI_NVDIMM) += nvdimm.o > common-obj-$(CONFIG_ACPI_VMGENID) += vmgenid.o > +common-obj-$(CONFIG_ACPI_VMCOREINFO) += vmcoreinfo.o > common-obj-$(call lnot,$(CONFIG_ACPI_X86)) += acpi-stub.o > > common-obj-y += acpi_interface.o > -- > 2.13.1.395.gf7b71de06