This adds the fw_cfg interface that QEMU is using to pass data to the BIOS. It implements both IO port access and DMA access. SeaBIOS will use the latter if available. This should be useful for adding ACPI tables or SMBIOS data.
This requires the latest vmm-firmware (which I just commited) and the vmm(4) diff I just sent out to work correctly. Since fw_cfg requires to zero out DMA memory I extended write_mem to do this if a NULL pointer is used for buf. I felt this is something which may be generally useful. -- :wq Claudio Index: Makefile =================================================================== RCS file: /cvs/src/usr.sbin/vmd/Makefile,v retrieving revision 1.20 diff -u -p -r1.20 Makefile --- Makefile 9 Sep 2018 04:09:32 -0000 1.20 +++ Makefile 8 Dec 2018 06:59:17 -0000 @@ -6,7 +6,7 @@ PROG= vmd SRCS= vmd.c control.c log.c priv.c proc.c config.c vmm.c SRCS+= vm.c loadfile_elf.c pci.c virtio.c i8259.c mc146818.c SRCS+= ns8250.c i8253.c vmboot.c ufs.c disklabel.c dhcp.c packet.c -SRCS+= parse.y atomicio.c vioscsi.c vioraw.c vioqcow2.c +SRCS+= parse.y atomicio.c vioscsi.c vioraw.c vioqcow2.c fw_cfg.c CFLAGS+= -Wall -I${.CURDIR} CFLAGS+= -Wstrict-prototypes -Wmissing-prototypes Index: fw_cfg.c =================================================================== RCS file: fw_cfg.c diff -N fw_cfg.c --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ fw_cfg.c 10 Dec 2018 16:39:55 -0000 @@ -0,0 +1,434 @@ +/* $OpenBSD$ */ +/* + * Copyright (c) 2018 Claudio Jeker <clau...@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include <sys/types.h> +#include <sys/uio.h> +#include <machine/vmmvar.h> + +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include "atomicio.h" +#include "proc.h" +#include "vmd.h" +#include "vmm.h" +#include "fw_cfg.h" + +#define FW_CFG_SIGNATURE 0x0000 +#define FW_CFG_ID 0x0001 +#define FW_CFG_NOGRAPHIC 0x0004 +#define FW_CFG_FILE_DIR 0x0019 +#define FW_CFG_FILE_FIRST 0x0020 + +#define FW_CFG_DMA_SIGNATURE 0x51454d5520434647ULL /* QEMU CFG */ + +struct fw_cfg_dma_access { + uint32_t control; +#define FW_CFG_DMA_ERROR 0x0001 +#define FW_CFG_DMA_READ 0x0002 +#define FW_CFG_DMA_SKIP 0x0004 +#define FW_CFG_DMA_SELECT 0x0008 +#define FW_CFG_DMA_WRITE 0x0010 /* not implemented */ + uint32_t length; + uint64_t address; +}; + +struct fw_cfg_file { + uint32_t size; + uint16_t selector; + uint16_t reserved; + char name[56]; +}; + +extern char *__progname; + +static struct fw_cfg_state { + size_t offset; + size_t size; + uint8_t *data; +} fw_cfg_state; + +static uint64_t fw_cfg_dma_addr; + +static int fw_cfg_select_file(uint16_t); +static void fw_cfg_file_dir(void); + +void +fw_cfg_init(struct vmop_create_params *vmc) +{ + const char *bootorder = NULL; + unsigned int sd = 0; + + /* do not double print chars on serial port */ + fw_cfg_add_file("etc/screen-and-debug", &sd, sizeof(sd)); + + switch (vmc->vmc_bootdevice) { + case VMBOOTDEV_DISK: + bootorder = "/pci@i0cf8/*@2\nHALT"; + break; + case VMBOOTDEV_CDROM: + bootorder = "/pci@i0cf8/*@4/*@0/*@0,100\nHALT"; + break; + case VMBOOTDEV_NET: + /* XXX not yet */ + bootorder = "HALT"; + break; + } + if (bootorder) + fw_cfg_add_file("bootorder", bootorder, strlen(bootorder) + 1); +} + +int +fw_cfg_dump(int fd) +{ + log_debug("%s: sending fw_cfg state", __func__); + if (atomicio(vwrite, fd, &fw_cfg_dma_addr, + sizeof(fw_cfg_dma_addr)) != sizeof(fw_cfg_dma_addr)) { + log_warnx("%s: error writing fw_cfg to fd", __func__); + return -1; + } + if (atomicio(vwrite, fd, &fw_cfg_state.offset, + sizeof(fw_cfg_state.offset)) != sizeof(fw_cfg_state.offset)) { + log_warnx("%s: error writing fw_cfg to fd", __func__); + return -1; + } + if (atomicio(vwrite, fd, &fw_cfg_state.size, + sizeof(fw_cfg_state.size)) != sizeof(fw_cfg_state.size)) { + log_warnx("%s: error writing fw_cfg to fd", __func__); + return -1; + } + if (fw_cfg_state.size != 0) + if (atomicio(vwrite, fd, fw_cfg_state.data, + fw_cfg_state.size) != fw_cfg_state.size) { + log_warnx("%s: error writing fw_cfg to fd", __func__); + return (-1); + } + return 0; +} + +int +fw_cfg_restore(int fd) +{ + log_debug("%s: receiving fw_cfg state", __func__); + if (atomicio(read, fd, &fw_cfg_dma_addr, + sizeof(fw_cfg_dma_addr)) != sizeof(fw_cfg_dma_addr)) { + log_warnx("%s: error reading fw_cfg from fd", __func__); + return -1; + } + if (atomicio(read, fd, &fw_cfg_state.offset, + sizeof(fw_cfg_state.offset)) != sizeof(fw_cfg_state.offset)) { + log_warnx("%s: error reading fw_cfg from fd", __func__); + return -1; + } + if (atomicio(read, fd, &fw_cfg_state.size, + sizeof(fw_cfg_state.size)) != sizeof(fw_cfg_state.size)) { + log_warnx("%s: error reading fw_cfg from fd", __func__); + return -1; + } + fw_cfg_state.data = NULL; + if (fw_cfg_state.size != 0) { + if ((fw_cfg_state.data = malloc(fw_cfg_state.size)) == NULL) + fatal("%s", __func__); + if (atomicio(read, fd, fw_cfg_state.data, + fw_cfg_state.size) != fw_cfg_state.size) { + log_warnx("%s: error reading fw_cfg from fd", __func__); + return -1; + } + } + return 0; +} + +static void +fw_cfg_reset_state(void) +{ + free(fw_cfg_state.data); + fw_cfg_state.offset = 0; + fw_cfg_state.size = 0; + fw_cfg_state.data = NULL; +} + +static void +fw_cfg_set_state(void *data, size_t len) +{ + if ((fw_cfg_state.data = malloc(len)) == NULL) { + log_warn("%s", __func__); + return; + } + memcpy(fw_cfg_state.data, data, len); + fw_cfg_state.size = len; + fw_cfg_state.offset = 0; +} + +static void +fw_cfg_select(uint16_t selector) +{ + uint16_t one = 1; + uint32_t id = htole32(0x3); + + fw_cfg_reset_state(); + switch (selector) { + case FW_CFG_SIGNATURE: + fw_cfg_set_state("QEMU", 4); + break; + case FW_CFG_ID: + fw_cfg_set_state(&id, sizeof(id)); + break; + case FW_CFG_NOGRAPHIC: + fw_cfg_set_state(&one, sizeof(one)); + break; + case FW_CFG_FILE_DIR: + fw_cfg_file_dir(); + break; + default: + if (!fw_cfg_select_file(selector)) + log_debug("%s: unhandled selector %x", + __func__, selector); + break; + } +} + +static void +fw_cfg_handle_dma(struct fw_cfg_dma_access *fw) +{ + uint32_t len = 0, control = fw->control; + + fw->control = 0; + if (control & FW_CFG_DMA_SELECT) { + uint16_t selector = control >> 16; + log_debug("%s: selector 0x%04x", __func__, selector); + fw_cfg_select(selector); + } + + /* calculate correct length of operation */ + if (fw_cfg_state.offset < fw_cfg_state.size) + len = fw_cfg_state.size - fw_cfg_state.offset; + if (len > fw->length) + len = fw->length; + + if (control & FW_CFG_DMA_WRITE) { + fw->control |= FW_CFG_DMA_ERROR; + } else if (control & FW_CFG_DMA_READ) { + if (write_mem(fw->address, + fw_cfg_state.data + fw_cfg_state.offset, len)) { + log_warnx("%s: write_mem error", __func__); + fw->control |= FW_CFG_DMA_ERROR; + } + /* clear rest of buffer */ + if (len < fw->length) + if (write_mem(fw->address + len, NULL, + fw->length - len)) { + log_warnx("%s: write_mem error", __func__); + fw->control |= FW_CFG_DMA_ERROR; + } + } + fw_cfg_state.offset += len; + + if (fw_cfg_state.offset == fw_cfg_state.size) + fw_cfg_reset_state(); +} + +uint8_t +vcpu_exit_fw_cfg(struct vm_run_params *vrp) +{ + uint32_t data = 0; + struct vm_exit *vei = vrp->vrp_exit; + + get_input_data(vei, &data); + + switch (vei->vei.vei_port) { + case FW_CFG_IO_SELECT: + if (vei->vei.vei_dir == VEI_DIR_IN) { + log_warnx("%s: fw_cfg: read from selector port " + "unsupported", __progname); + set_return_data(vei, 0); + break; + } + log_debug("%s: selector 0x%04x", __func__, data); + fw_cfg_select(data); + break; + case FW_CFG_IO_DATA: + if (vei->vei.vei_dir == VEI_DIR_OUT) { + log_debug("%s: fw_cfg: discarding data written to " + "data port", __progname); + break; + } + /* fw_cfg only defines 1-byte reads via IO port */ + if (fw_cfg_state.offset < fw_cfg_state.size) { + set_return_data(vei, + fw_cfg_state.data[fw_cfg_state.offset++]); + if (fw_cfg_state.offset == fw_cfg_state.size) + fw_cfg_reset_state(); + } else + set_return_data(vei, 0); + break; + } + + return 0xFF; +} + +uint8_t +vcpu_exit_fw_cfg_dma(struct vm_run_params *vrp) +{ + struct fw_cfg_dma_access fw_dma; + uint32_t data = 0; + struct vm_exit *vei = vrp->vrp_exit; + + if (vei->vei.vei_size != 4) { + log_debug("%s: fw_cfg_dma: discarding data written to " + "dma addr", __progname); + if (vei->vei.vei_dir == VEI_DIR_OUT) + fw_cfg_dma_addr = 0; + return 0xFF; + } + + if (vei->vei.vei_dir == VEI_DIR_OUT) { + get_input_data(vei, &data); + switch (vei->vei.vei_port) { + case FW_CFG_IO_DMA_ADDR_HIGH: + fw_cfg_dma_addr = (uint64_t)be32toh(data) << 32; + break; + case FW_CFG_IO_DMA_ADDR_LOW: + fw_cfg_dma_addr |= be32toh(data); + + /* writing least significant half triggers operation */ + if (read_mem(fw_cfg_dma_addr, &fw_dma, sizeof(fw_dma))) + break; + /* adjust byteorder */ + fw_dma.control = be32toh(fw_dma.control); + fw_dma.length = be32toh(fw_dma.length); + fw_dma.address = be64toh(fw_dma.address); + + fw_cfg_handle_dma(&fw_dma); + + /* just write control byte back */ + data = be32toh(fw_dma.control); + if (write_mem(fw_cfg_dma_addr, &data, sizeof(data))) + break; + + /* done, reset base address */ + fw_cfg_dma_addr = 0; + break; + } + } else { + uint64_t sig = htobe64(FW_CFG_DMA_SIGNATURE); + switch (vei->vei.vei_port) { + case FW_CFG_IO_DMA_ADDR_HIGH: + set_return_data(vei, sig >> 32); + break; + case FW_CFG_IO_DMA_ADDR_LOW: + set_return_data(vei, sig & 0xffffffff); + break; + } + } + return 0xFF; +} + +static uint16_t file_id = FW_CFG_FILE_FIRST; + +struct fw_cfg_file_entry { + TAILQ_ENTRY(fw_cfg_file_entry) entry; + struct fw_cfg_file file; + void *data; +}; + +TAILQ_HEAD(, fw_cfg_file_entry) fw_cfg_files = + TAILQ_HEAD_INITIALIZER(fw_cfg_files); + +static struct fw_cfg_file_entry * +fw_cfg_lookup_file(const char *name) +{ + struct fw_cfg_file_entry *f; + + TAILQ_FOREACH(f, &fw_cfg_files, entry) { + if (strcmp(name, f->file.name) == 0) + return f; + } + return NULL; +} + +void +fw_cfg_add_file(const char *name, const void *data, size_t len) +{ + struct fw_cfg_file_entry *f; + + if (fw_cfg_lookup_file(name)) + fatalx("%s: fw_cfg: file %s exists", __progname, name); + + if ((f = calloc(sizeof(f), 1)) == NULL) + fatal("%s", __func__); + + if ((f->data = malloc(len)) == NULL) + fatal("%s", __func__); + + if (strlcpy(f->file.name, name, sizeof(f->file.name)) >= + sizeof(f->file.name)) + fatalx("%s: fw_cfg: file name too long", __progname); + + f->file.size = htobe32(len); + f->file.selector = htobe16(file_id++); + memcpy(f->data, data, len); + + TAILQ_INSERT_TAIL(&fw_cfg_files, f, entry); +} + +static int +fw_cfg_select_file(uint16_t id) +{ + struct fw_cfg_file_entry *f; + + id = htobe16(id); + TAILQ_FOREACH(f, &fw_cfg_files, entry) + if (f->file.selector == id) { + size_t size = be32toh(f->file.size); + fw_cfg_set_state(f->data, size); + log_debug("%s: accessing file %s", __func__, + f->file.name); + return 1; + } + return 0; +} + +static void +fw_cfg_file_dir(void) +{ + struct fw_cfg_file_entry *f; + struct fw_cfg_file *fp; + uint32_t count = 0; + uint32_t *data; + size_t size; + + TAILQ_FOREACH(f, &fw_cfg_files, entry) + count++; + + size = sizeof(count) + count * sizeof(struct fw_cfg_file); + if ((data = malloc(size)) == NULL) + fatal("%s", __func__); + *data = htobe32(count); + fp = (struct fw_cfg_file *)(data + 1); + + log_debug("%s: file directory with %d files", __func__, count); + TAILQ_FOREACH(f, &fw_cfg_files, entry) { + log_debug(" %6dB %04x %s", be32toh(f->file.size), + be16toh(f->file.selector), f->file.name); + memcpy(fp, &f->file, sizeof(f->file)); + fp++; + } + + /* XXX should sort by name but SeaBIOS does not care */ + + fw_cfg_set_state(data, size); +} Index: fw_cfg.h =================================================================== RCS file: fw_cfg.h diff -N fw_cfg.h --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ fw_cfg.h 8 Dec 2018 07:26:33 -0000 @@ -0,0 +1,28 @@ +/* $OpenBSD$ */ +/* + * Copyright (c) 2018 Claudio Jeker <clau...@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#define FW_CFG_IO_SELECT 0x510 +#define FW_CFG_IO_DATA 0x511 +#define FW_CFG_IO_DMA_ADDR_HIGH 0x514 +#define FW_CFG_IO_DMA_ADDR_LOW 0x518 + +void fw_cfg_init(struct vmop_create_params *); +int fw_cfg_dump(int); +int fw_cfg_restore(int); +uint8_t vcpu_exit_fw_cfg(struct vm_run_params *); +uint8_t vcpu_exit_fw_cfg_dma(struct vm_run_params *); +void fw_cfg_add_file(const char *, const void *, size_t); Index: i8253.c =================================================================== RCS file: /cvs/src/usr.sbin/vmd/i8253.c,v retrieving revision 1.28 diff -u -p -r1.28 i8253.c --- i8253.c 12 Jul 2018 10:15:44 -0000 1.28 +++ i8253.c 8 Dec 2018 06:59:17 -0000 @@ -170,7 +170,7 @@ vcpu_exit_i8253_misc(struct vm_run_param } } } else { - log_debug("%s: discarding data written to PIT misc port\n", + log_debug("%s: discarding data written to PIT misc port", __func__); } Index: vm.c =================================================================== RCS file: /cvs/src/usr.sbin/vmd/vm.c,v retrieving revision 1.42 diff -u -p -r1.42 vm.c --- vm.c 6 Dec 2018 09:20:06 -0000 1.42 +++ vm.c 10 Dec 2018 16:36:23 -0000 @@ -61,6 +61,7 @@ #include "i8259.h" #include "ns8250.h" #include "mc146818.h" +#include "fw_cfg.h" #include "atomicio.h" io_fn_t ioports_map[MAX_PORTS]; @@ -562,6 +563,8 @@ send_vm(int fd, struct vm_create_params goto err; if ((ret = mc146818_dump(fd))) goto err; + if ((ret = fw_cfg_dump(fd))) + goto err; if ((ret = pci_dump(fd))) goto err; if ((ret = virtio_dump(fd))) @@ -950,6 +953,13 @@ init_emulated_hw(struct vmop_create_para for (i = COM1_DATA; i <= COM1_SCR; i++) ioports_map[i] = vcpu_exit_com; + /* Init QEMU fw_cfg interface */ + fw_cfg_init(vmc); + ioports_map[FW_CFG_IO_SELECT] = vcpu_exit_fw_cfg; + ioports_map[FW_CFG_IO_DATA] = vcpu_exit_fw_cfg; + ioports_map[FW_CFG_IO_DMA_ADDR_HIGH] = vcpu_exit_fw_cfg_dma; + ioports_map[FW_CFG_IO_DMA_ADDR_LOW] = vcpu_exit_fw_cfg_dma; + /* Initialize PCI */ for (i = VMM_PCI_IO_BAR_BASE; i <= VMM_PCI_IO_BAR_END; i++) ioports_map[i] = vcpu_exit_pci; @@ -1001,6 +1011,13 @@ restore_emulated_hw(struct vm_create_par ioports_map[IO_RTC] = vcpu_exit_mc146818; ioports_map[IO_RTC + 1] = vcpu_exit_mc146818; + /* Init QEMU fw_cfg interface */ + fw_cfg_restore(fd); + ioports_map[FW_CFG_IO_SELECT] = vcpu_exit_fw_cfg; + ioports_map[FW_CFG_IO_DATA] = vcpu_exit_fw_cfg; + ioports_map[FW_CFG_IO_DMA_ADDR_HIGH] = vcpu_exit_fw_cfg_dma; + ioports_map[FW_CFG_IO_DMA_ADDR_LOW] = vcpu_exit_fw_cfg_dma; + /* Initialize PCI */ for (i = VMM_PCI_IO_BAR_BASE; i <= VMM_PCI_IO_BAR_END; i++) ioports_map[i] = vcpu_exit_pci; @@ -1622,7 +1639,7 @@ vaddr_mem(paddr_t gpa, size_t len) * * Parameters: * dst: the destination paddr_t in the guest VM - * buf: data to copy + * buf: data to copy (or NULL to zero the data) * len: number of bytes to copy * * Return values: @@ -1653,9 +1670,12 @@ write_mem(paddr_t dst, const void *buf, n = len; to = (char *)vmr->vmr_va + off; - memcpy(to, from, n); - - from += n; + if (buf == NULL) + memset(to, 0, n); + else { + memcpy(to, from, n); + from += n; + } len -= n; off = 0; vmr++; Index: vmd.h =================================================================== RCS file: /cvs/src/usr.sbin/vmd/vmd.h,v retrieving revision 1.88 diff -u -p -r1.88 vmd.h --- vmd.h 6 Dec 2018 09:20:06 -0000 1.88 +++ vmd.h 10 Dec 2018 16:41:02 -0000 @@ -214,7 +214,7 @@ struct vm_dump_header { #define VM_DUMP_SIGNATURE VMM_HV_SIGNATURE uint8_t vmh_pad[3]; uint8_t vmh_version; -#define VM_DUMP_VERSION 4 +#define VM_DUMP_VERSION 5 struct vm_dump_header_cpuid vmh_cpuids[VM_DUMP_HEADER_CPUID_COUNT]; } __packed;