A quick check by checkpatch.pl, pretty straightforward to fix. ERROR: return is not a function, parentheses are not required #211: FILE: hw/misc/ivshmem2.c:138: + return (ivs->features & (1 << feature));
ERROR: memory barrier without comment #255: FILE: hw/misc/ivshmem2.c:182: + smp_mb(); ERROR: braces {} are necessary for all arms of this statement #626: FILE: hw/misc/ivshmem2.c:553: + if (msg->vector == 0) [...] Best, Liang On 1/7/20 9:36 AM, Jan Kiszka wrote: > From: Jan Kiszka <jan.kis...@siemens.com> > > This adds a reimplementation of ivshmem in its new revision 2 as > separate device. The goal of this is not to enable sharing with v1, > rather to allow explore the properties and potential limitation of the > new version prior to discussing its integration with the existing code. > > v2 always requires a server to interconnect two more more QEMU > instances because it provides signaling between peers unconditionally. > Therefore, only the interconnecting chardev, master mode, and the usage > of ioeventfd can be configured at device level. All other parameters are > defined by the server instance. > > A new server protocol is introduced along this. Its primary difference > is the introduction of a single welcome message that contains all peer > parameters, rather than a series of single-word messages pushing them > piece by piece. > > A complicating difference in interrupt handling, compare to v1, is the > auto-disable mode of v2: When this is active, interrupt delivery is > disabled by the device after each interrupt event. This prevents the > usage of irqfd on the receiving side, but it lowers the handling cost > for guests that implemented interrupt throttling this way (specifically > when exposing the device via UIO). > > No changes have been made to the ivshmem device regarding migration: > Only the master can live-migrate, slave peers have to hot-unplug the > device first. > > The details of the device model will be specified in a succeeding > commit. Drivers for this device can currently be found under > > http://git.kiszka.org/?p=linux.git;a=shortlog;h=refs/heads/queues/ivshmem2 > > To instantiate a ivshmem v2 device, just add > > ... -chardev socket,path=/tmp/ivshmem_socket,id=ivshmem \ > -device ivshmem,chardev=ivshmem > > provided the server opened its socket under the default path. > > Signed-off-by: Jan Kiszka <jan.kis...@siemens.com> > --- > hw/misc/Makefile.objs | 2 +- > hw/misc/ivshmem2.c | 1085 > ++++++++++++++++++++++++++++++++++++++++++++ > include/hw/misc/ivshmem2.h | 48 ++ > include/hw/pci/pci_ids.h | 2 + > 4 files changed, 1136 insertions(+), 1 deletion(-) > create mode 100644 hw/misc/ivshmem2.c > create mode 100644 include/hw/misc/ivshmem2.h > > diff --git a/hw/misc/Makefile.objs b/hw/misc/Makefile.objs > index ba898a5781..90a4a6608c 100644 > --- a/hw/misc/Makefile.objs > +++ b/hw/misc/Makefile.objs > @@ -26,7 +26,7 @@ common-obj-$(CONFIG_PUV3) += puv3_pm.o > > common-obj-$(CONFIG_MACIO) += macio/ > > -common-obj-$(CONFIG_IVSHMEM_DEVICE) += ivshmem.o > +common-obj-$(CONFIG_IVSHMEM_DEVICE) += ivshmem.o ivshmem2.o > > common-obj-$(CONFIG_REALVIEW) += arm_sysctl.o > common-obj-$(CONFIG_NSERIES) += cbus.o > diff --git a/hw/misc/ivshmem2.c b/hw/misc/ivshmem2.c > new file mode 100644 > index 0000000000..d5f88ed0e9 > --- /dev/null > +++ b/hw/misc/ivshmem2.c > @@ -0,0 +1,1085 @@ > +/* > + * Inter-VM Shared Memory PCI device, version 2. > + * > + * Copyright (c) Siemens AG, 2019 > + * > + * Authors: > + * Jan Kiszka <jan.kis...@siemens.com> > + * > + * Based on ivshmem.c by Cam Macdonell <c...@cs.ualberta.ca> > + * > + * This code is licensed under the GNU GPL v2. > + */ > + > +#include "qemu/osdep.h" > +#include "qemu/units.h" > +#include "qapi/error.h" > +#include "qemu/cutils.h" > +#include "hw/hw.h" > +#include "hw/pci/pci.h" > +#include "hw/pci/msi.h" > +#include "hw/pci/msix.h" > +#include "hw/qdev-properties.h" > +#include "sysemu/kvm.h" > +#include "migration/blocker.h" > +#include "migration/vmstate.h" > +#include "qemu/error-report.h" > +#include "qemu/event_notifier.h" > +#include "qemu/module.h" > +#include "qom/object_interfaces.h" > +#include "chardev/char-fe.h" > +#include "sysemu/qtest.h" > +#include "qapi/visitor.h" > + > +#include "hw/misc/ivshmem2.h" > + > +#define PCI_VENDOR_ID_IVSHMEM PCI_VENDOR_ID_SIEMENS > +#define PCI_DEVICE_ID_IVSHMEM 0x4106 > + > +#define IVSHMEM_MAX_PEERS UINT16_MAX > +#define IVSHMEM_IOEVENTFD 0 > +#define IVSHMEM_MSI 1 > + > +#define IVSHMEM_REG_BAR_SIZE 0x1000 > + > +#define IVSHMEM_REG_ID 0x00 > +#define IVSHMEM_REG_MAX_PEERS 0x04 > +#define IVSHMEM_REG_INT_CTRL 0x08 > +#define IVSHMEM_REG_DOORBELL 0x0c > +#define IVSHMEM_REG_STATE 0x10 > + > +#define IVSHMEM_INT_ENABLE 0x1 > + > +#define IVSHMEM_ONESHOT_MODE 0x1 > + > +#define IVSHMEM_DEBUG 0 > +#define IVSHMEM_DPRINTF(fmt, ...) \ > + do { \ > + if (IVSHMEM_DEBUG) { \ > + printf("IVSHMEM: " fmt, ## __VA_ARGS__); \ > + } \ > + } while (0) > + > +#define TYPE_IVSHMEM "ivshmem" > +#define IVSHMEM(obj) \ > + OBJECT_CHECK(IVShmemState, (obj), TYPE_IVSHMEM) > + > +typedef struct Peer { > + int nb_eventfds; > + EventNotifier *eventfds; > +} Peer; > + > +typedef struct MSIVector { > + PCIDevice *pdev; > + int virq; > + bool unmasked; > +} MSIVector; > + > +typedef struct IVShmemVndrCap { > + uint8_t id; > + uint8_t next; > + uint8_t length; > + uint8_t priv_ctrl; > + uint32_t state_tab_sz; > + uint64_t rw_section_sz; > + uint64_t output_section_sz; > +} IVShmemVndrCap; > + > +typedef struct IVShmemState { > + /*< private >*/ > + PCIDevice parent_obj; > + /*< public >*/ > + > + uint32_t features; > + > + CharBackend server_chr; > + > + /* registers */ > + uint8_t *priv_ctrl; > + uint32_t vm_id; > + uint32_t intctrl; > + uint32_t state; > + > + /* BARs */ > + MemoryRegion ivshmem_mmio; /* BAR 0 (registers) */ > + MemoryRegion ivshmem_bar2; /* BAR 2 (shared memory) */ > + > + void *shmem; > + size_t shmem_sz; > + size_t output_section_sz; > + > + MemoryRegion state_tab; > + MemoryRegion rw_section; > + MemoryRegion input_sections; > + MemoryRegion output_section; > + > + /* interrupt support */ > + Peer *peers; > + int nb_peers; /* space in @peers[] */ > + uint32_t max_peers; > + uint32_t vectors; > + MSIVector *msi_vectors; > + > + uint8_t msg_buf[32]; /* buffer for receiving server messages */ > + int msg_buffered_bytes; /* #bytes in @msg_buf */ > + > + uint32_t protocol; > + > + /* migration stuff */ > + OnOffAuto master; > + Error *migration_blocker; > +} IVShmemState; > + > +static void ivshmem_enable_irqfd(IVShmemState *s); > +static void ivshmem_disable_irqfd(IVShmemState *s); > + > +static inline uint32_t ivshmem_has_feature(IVShmemState *ivs, > + unsigned int feature) { > + return (ivs->features & (1 << feature)); > +} > + > +static inline bool ivshmem_is_master(IVShmemState *s) > +{ > + assert(s->master != ON_OFF_AUTO_AUTO); > + return s->master == ON_OFF_AUTO_ON; > +} > + > +static bool ivshmem_irqfd_usable(IVShmemState *s) > +{ > + PCIDevice *pdev = PCI_DEVICE(s); > + > + return (s->intctrl & IVSHMEM_INT_ENABLE) && msix_enabled(pdev) && > + !(*s->priv_ctrl & IVSHMEM_ONESHOT_MODE); > +} > + > +static void ivshmem_update_irqfd(IVShmemState *s, bool was_usable) > +{ > + bool is_usable = ivshmem_irqfd_usable(s); > + > + if (kvm_msi_via_irqfd_enabled()) { > + if (!was_usable && is_usable) { > + ivshmem_enable_irqfd(s); > + } else if (was_usable && !is_usable) { > + ivshmem_disable_irqfd(s); > + } > + } > +} > + > +static void ivshmem_write_intctrl(IVShmemState *s, uint32_t new_state) > +{ > + bool was_usable = ivshmem_irqfd_usable(s); > + > + s->intctrl = new_state & IVSHMEM_INT_ENABLE; > + ivshmem_update_irqfd(s, was_usable); > +} > + > +static void ivshmem_write_state(IVShmemState *s, uint32_t new_state) > +{ > + uint32_t *state_table = s->shmem; > + int peer; > + > + state_table[s->vm_id] = new_state; > + smp_mb(); > + > + if (s->state != new_state) { > + s->state = new_state; > + for (peer = 0; peer < s->nb_peers; peer++) { > + if (peer != s->vm_id && s->peers[peer].nb_eventfds > 0) { > + event_notifier_set(&s->peers[peer].eventfds[0]); > + } > + } > + } > +} > + > +static void ivshmem_io_write(void *opaque, hwaddr addr, > + uint64_t val, unsigned size) > +{ > + IVShmemState *s = opaque; > + > + uint16_t dest = val >> 16; > + uint16_t vector = val & 0xff; > + > + addr &= 0xfc; > + > + IVSHMEM_DPRINTF("writing to addr " TARGET_FMT_plx "\n", addr); > + switch (addr) { > + case IVSHMEM_REG_INT_CTRL: > + ivshmem_write_intctrl(s, val); > + break; > + > + case IVSHMEM_REG_DOORBELL: > + /* check that dest VM ID is reasonable */ > + if (dest >= s->nb_peers) { > + IVSHMEM_DPRINTF("Invalid destination VM ID (%d)\n", dest); > + break; > + } > + > + /* check doorbell range */ > + if (vector < s->peers[dest].nb_eventfds) { > + IVSHMEM_DPRINTF("Notifying VM %d on vector %d\n", dest, vector); > + event_notifier_set(&s->peers[dest].eventfds[vector]); > + } else { > + IVSHMEM_DPRINTF("Invalid destination vector %d on VM %d\n", > + vector, dest); > + } > + break; > + > + case IVSHMEM_REG_STATE: > + ivshmem_write_state(s, val); > + break; > + > + default: > + IVSHMEM_DPRINTF("Unhandled write " TARGET_FMT_plx "\n", addr); > + } > +} > + > +static uint64_t ivshmem_io_read(void *opaque, hwaddr addr, > + unsigned size) > +{ > + IVShmemState *s = opaque; > + uint32_t ret; > + > + switch (addr) { > + case IVSHMEM_REG_ID: > + ret = s->vm_id; > + break; > + > + case IVSHMEM_REG_MAX_PEERS: > + ret = s->max_peers; > + break; > + > + case IVSHMEM_REG_INT_CTRL: > + ret = s->intctrl; > + break; > + > + case IVSHMEM_REG_STATE: > + ret = s->state; > + break; > + > + default: > + IVSHMEM_DPRINTF("why are we reading " TARGET_FMT_plx "\n", addr); > + ret = 0; > + } > + > + return ret; > +} > + > +static const MemoryRegionOps ivshmem_mmio_ops = { > + .read = ivshmem_io_read, > + .write = ivshmem_io_write, > + .endianness = DEVICE_NATIVE_ENDIAN, > + .impl = { > + .min_access_size = 4, > + .max_access_size = 4, > + }, > +}; > + > +static void ivshmem_vector_notify(void *opaque) > +{ > + MSIVector *entry = opaque; > + PCIDevice *pdev = entry->pdev; > + IVShmemState *s = IVSHMEM(pdev); > + int vector = entry - s->msi_vectors; > + EventNotifier *n = &s->peers[s->vm_id].eventfds[vector]; > + > + if (!event_notifier_test_and_clear(n) || > + !(s->intctrl & IVSHMEM_INT_ENABLE)) { > + return; > + } > + > + IVSHMEM_DPRINTF("interrupt on vector %p %d\n", pdev, vector); > + if (ivshmem_has_feature(s, IVSHMEM_MSI)) { > + if (msix_enabled(pdev)) { > + msix_notify(pdev, vector); > + } > + } else if (pdev->config[PCI_INTERRUPT_PIN]) { > + pci_set_irq(pdev, 1); > + pci_set_irq(pdev, 0); > + } > + if (*s->priv_ctrl & IVSHMEM_ONESHOT_MODE) { > + s->intctrl &= ~IVSHMEM_INT_ENABLE; > + } > +} > + > +static int ivshmem_irqfd_vector_unmask(PCIDevice *dev, unsigned vector, > + MSIMessage msg) > +{ > + IVShmemState *s = IVSHMEM(dev); > + EventNotifier *n = &s->peers[s->vm_id].eventfds[vector]; > + MSIVector *v = &s->msi_vectors[vector]; > + int ret; > + > + IVSHMEM_DPRINTF("vector unmask %p %d\n", dev, vector); > + if (!v->pdev) { > + error_report("ivshmem: vector %d route does not exist", vector); > + return -EINVAL; > + } > + assert(!v->unmasked); > + > + ret = kvm_irqchip_add_msi_route(kvm_state, vector, dev); > + if (ret < 0) { > + error_report("kvm_irqchip_add_msi_route failed"); > + return ret; > + } > + v->virq = ret; > + kvm_irqchip_commit_routes(kvm_state); > + > + ret = kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, n, NULL, v->virq); > + if (ret < 0) { > + error_report("kvm_irqchip_add_irqfd_notifier_gsi failed"); > + return ret; > + } > + v->unmasked = true; > + > + return 0; > +} > + > +static void ivshmem_irqfd_vector_mask(PCIDevice *dev, unsigned vector) > +{ > + IVShmemState *s = IVSHMEM(dev); > + EventNotifier *n = &s->peers[s->vm_id].eventfds[vector]; > + MSIVector *v = &s->msi_vectors[vector]; > + int ret; > + > + IVSHMEM_DPRINTF("vector mask %p %d\n", dev, vector); > + if (!v->pdev) { > + error_report("ivshmem: vector %d route does not exist", vector); > + return; > + } > + assert(v->unmasked); > + > + ret = kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state, n, v->virq); > + if (ret < 0) { > + error_report("remove_irqfd_notifier_gsi failed"); > + return; > + } > + kvm_irqchip_release_virq(kvm_state, v->virq); > + > + v->unmasked = false; > +} > + > +static void ivshmem_irqfd_vector_poll(PCIDevice *dev, > + unsigned int vector_start, > + unsigned int vector_end) > +{ > + IVShmemState *s = IVSHMEM(dev); > + unsigned int vector; > + > + IVSHMEM_DPRINTF("vector poll %p %d-%d\n", dev, vector_start, vector_end); > + > + vector_end = MIN(vector_end, s->vectors); > + > + for (vector = vector_start; vector < vector_end; vector++) { > + EventNotifier *notifier = &s->peers[s->vm_id].eventfds[vector]; > + > + if (!msix_is_masked(dev, vector)) { > + continue; > + } > + > + if (event_notifier_test_and_clear(notifier)) { > + msix_set_pending(dev, vector); > + } > + } > +} > + > +static void ivshmem_watch_vector_notifier(IVShmemState *s, int vector) > +{ > + EventNotifier *n = &s->peers[s->vm_id].eventfds[vector]; > + int eventfd = event_notifier_get_fd(n); > + > + assert(!s->msi_vectors[vector].pdev); > + s->msi_vectors[vector].pdev = PCI_DEVICE(s); > + > + qemu_set_fd_handler(eventfd, ivshmem_vector_notify, > + NULL, &s->msi_vectors[vector]); > +} > + > +static void ivshmem_unwatch_vector_notifier(IVShmemState *s, int vector) > +{ > + EventNotifier *n = &s->peers[s->vm_id].eventfds[vector]; > + int eventfd = event_notifier_get_fd(n); > + > + if (!s->msi_vectors[vector].pdev) { > + return; > + } > + > + qemu_set_fd_handler(eventfd, NULL, NULL, NULL); > + > + s->msi_vectors[vector].pdev = NULL; > +} > + > +static void ivshmem_add_eventfd(IVShmemState *s, int posn, int i) > +{ > + memory_region_add_eventfd(&s->ivshmem_mmio, > + IVSHMEM_REG_DOORBELL, > + 4, > + true, > + (posn << 16) | i, > + &s->peers[posn].eventfds[i]); > +} > + > +static void ivshmem_del_eventfd(IVShmemState *s, int posn, int i) > +{ > + memory_region_del_eventfd(&s->ivshmem_mmio, > + IVSHMEM_REG_DOORBELL, > + 4, > + true, > + (posn << 16) | i, > + &s->peers[posn].eventfds[i]); > +} > + > +static void close_peer_eventfds(IVShmemState *s, int posn) > +{ > + int i, n; > + > + assert(posn >= 0 && posn < s->nb_peers); > + n = s->peers[posn].nb_eventfds; > + > + if (ivshmem_has_feature(s, IVSHMEM_IOEVENTFD)) { > + memory_region_transaction_begin(); > + for (i = 0; i < n; i++) { > + ivshmem_del_eventfd(s, posn, i); > + } > + memory_region_transaction_commit(); > + } > + > + for (i = 0; i < n; i++) { > + event_notifier_cleanup(&s->peers[posn].eventfds[i]); > + } > + > + g_free(s->peers[posn].eventfds); > + s->peers[posn].nb_eventfds = 0; > +} > + > +static void resize_peers(IVShmemState *s, int nb_peers) > +{ > + int old_nb_peers = s->nb_peers; > + int i; > + > + assert(nb_peers > old_nb_peers); > + IVSHMEM_DPRINTF("bumping storage to %d peers\n", nb_peers); > + > + s->peers = g_realloc(s->peers, nb_peers * sizeof(Peer)); > + s->nb_peers = nb_peers; > + > + for (i = old_nb_peers; i < nb_peers; i++) { > + s->peers[i].eventfds = NULL; > + s->peers[i].nb_eventfds = 0; > + } > +} > + > +static void ivshmem_add_kvm_msi_virq(IVShmemState *s, int vector, Error > **errp) > +{ > + PCIDevice *pdev = PCI_DEVICE(s); > + > + IVSHMEM_DPRINTF("ivshmem_add_kvm_msi_virq vector:%d\n", vector); > + assert(!s->msi_vectors[vector].pdev); > + > + s->msi_vectors[vector].unmasked = false; > + s->msi_vectors[vector].pdev = pdev; > +} > + > +static void ivshmem_remove_kvm_msi_virq(IVShmemState *s, int vector) > +{ > + IVSHMEM_DPRINTF("ivshmem_remove_kvm_msi_virq vector:%d\n", vector); > + > + if (s->msi_vectors[vector].pdev == NULL) { > + return; > + } > + > + if (s->msi_vectors[vector].unmasked) { > + ivshmem_irqfd_vector_mask(s->msi_vectors[vector].pdev, vector); > + } > + > + s->msi_vectors[vector].pdev = NULL; > +} > + > +static void process_msg_disconnect(IVShmemState *s, IvshmemPeerGone *msg, > + Error **errp) > +{ > + if (msg->header.len < sizeof(*msg)) { > + error_setg(errp, "Invalid peer-gone message size"); > + return; > + } > + > + le32_to_cpus(&msg->id); > + > + IVSHMEM_DPRINTF("peer %d has gone away\n", msg->id); > + if (msg->id >= s->nb_peers || msg->id == s->vm_id) { > + error_setg(errp, "invalid peer %d", msg->id); > + return; > + } > + close_peer_eventfds(s, msg->id); > + event_notifier_set(&s->peers[s->vm_id].eventfds[0]); > +} > + > +static void process_msg_connect(IVShmemState *s, IvshmemEventFd *msg, int fd, > + Error **errp) > +{ > + Peer *peer; > + > + if (msg->header.len < sizeof(*msg)) { > + error_setg(errp, "Invalid eventfd message size"); > + close(fd); > + return; > + } > + > + le32_to_cpus(&msg->id); > + le32_to_cpus(&msg->vector); > + > + if (msg->id >= s->nb_peers) { > + resize_peers(s, msg->id + 1); > + } > + > + peer = &s->peers[msg->id]; > + > + /* > + * The N-th connect message for this peer comes with the file > + * descriptor for vector N-1. > + */ > + if (msg->vector != peer->nb_eventfds) { > + error_setg(errp, "Received vector %d out of order", msg->vector); > + close(fd); > + return; > + } > + if (peer->nb_eventfds >= s->vectors) { > + error_setg(errp, "Too many eventfd received, device has %d vectors", > + s->vectors); > + close(fd); > + return; > + } > + peer->nb_eventfds++; > + > + if (msg->vector == 0) > + peer->eventfds = g_new0(EventNotifier, s->vectors); > + > + IVSHMEM_DPRINTF("eventfds[%d][%d] = %d\n", msg->id, msg->vector, fd); > + event_notifier_init_fd(&peer->eventfds[msg->vector], fd); > + fcntl_setfl(fd, O_NONBLOCK); /* msix/irqfd poll non block */ > + > + if (ivshmem_has_feature(s, IVSHMEM_IOEVENTFD)) { > + ivshmem_add_eventfd(s, msg->id, msg->vector); > + } > + > + if (msg->id == s->vm_id) { > + ivshmem_watch_vector_notifier(s, peer->nb_eventfds - 1); > + } > +} > + > +static int ivshmem_can_receive(void *opaque) > +{ > + IVShmemState *s = opaque; > + > + assert(s->msg_buffered_bytes < sizeof(s->msg_buf)); > + return sizeof(s->msg_buf) - s->msg_buffered_bytes; > +} > + > +static void ivshmem_read(void *opaque, const uint8_t *buf, int size) > +{ > + IVShmemState *s = opaque; > + IvshmemMsgHeader *header = (IvshmemMsgHeader *)&s->msg_buf; > + Error *err = NULL; > + int fd; > + > + assert(size >= 0 && s->msg_buffered_bytes + size <= sizeof(s->msg_buf)); > + memcpy(s->msg_buf + s->msg_buffered_bytes, buf, size); > + s->msg_buffered_bytes += size; > + if (s->msg_buffered_bytes < sizeof(*header) || > + s->msg_buffered_bytes < le32_to_cpu(header->len)) { > + return; > + } > + > + fd = qemu_chr_fe_get_msgfd(&s->server_chr); > + > + le32_to_cpus(&header->type); > + le32_to_cpus(&header->len); > + > + switch (header->type) { > + case IVSHMEM_MSG_EVENT_FD: > + process_msg_connect(s, (IvshmemEventFd *)header, fd, &err); > + break; > + case IVSHMEM_MSG_PEER_GONE: > + process_msg_disconnect(s, (IvshmemPeerGone *)header, &err); > + break; > + default: > + error_setg(&err, "invalid message, type %d", header->type); > + break; > + } > + if (err) { > + error_report_err(err); > + } > + > + s->msg_buffered_bytes -= header->len; > + memmove(s->msg_buf, s->msg_buf + header->len, s->msg_buffered_bytes); > +} > + > +static void ivshmem_recv_setup(IVShmemState *s, Error **errp) > +{ > + IvshmemInitialInfo msg; > + struct stat buf; > + uint8_t dummy; > + int fd, n, ret; > + > + n = 0; > + do { > + ret = qemu_chr_fe_read_all(&s->server_chr, (uint8_t *)&msg + n, > + sizeof(msg) - n); > + if (ret < 0) { > + if (ret == -EINTR) { > + continue; > + } > + error_setg_errno(errp, -ret, "read from server failed"); > + return; > + } > + n += ret; > + } while (n < sizeof(msg)); > + > + fd = qemu_chr_fe_get_msgfd(&s->server_chr); > + > + le32_to_cpus(&msg.header.type); > + le32_to_cpus(&msg.header.len); > + if (msg.header.type != IVSHMEM_MSG_INIT || msg.header.len < sizeof(msg)) > { > + error_setg(errp, "server sent invalid initial info"); > + return; > + } > + > + /* consume additional bytes of message */ > + msg.header.len -= sizeof(msg); > + while (msg.header.len > 0) { > + ret = qemu_chr_fe_read_all(&s->server_chr, &dummy, 1); > + if (ret < 0) { > + if (ret == -EINTR) { > + continue; > + } > + error_setg_errno(errp, -ret, "read from server failed"); > + return; > + } > + msg.header.len -= ret; > + } > + > + le32_to_cpus(&msg.compatible_version); > + if (msg.compatible_version != IVSHMEM_PROTOCOL_VERSION) { > + error_setg(errp, "server sent compatible version %u, expecting %u", > + msg.compatible_version, IVSHMEM_PROTOCOL_VERSION); > + return; > + } > + > + le32_to_cpus(&msg.id); > + if (msg.id > IVSHMEM_MAX_PEERS) { > + error_setg(errp, "server sent invalid ID"); > + return; > + } > + s->vm_id = msg.id; > + > + if (fstat(fd, &buf) < 0) { > + error_setg_errno(errp, errno, > + "can't determine size of shared memory sent by server"); > + close(fd); > + return; > + } > + > + s->shmem_sz = buf.st_size; > + > + s->shmem = mmap(NULL, s->shmem_sz, PROT_READ | PROT_WRITE, MAP_SHARED, > + fd, 0); > + if (s->shmem == MAP_FAILED) { > + error_setg_errno(errp, errno, > + "can't map shared memory sent by server"); > + return; > + } > + > + le32_to_cpus(&msg.vectors); > + if (msg.vectors < 1 || msg.vectors > 1024) { > + error_setg(errp, "server sent invalid number of vectors message"); > + return; > + } > + s->vectors = msg.vectors; > + > + s->max_peers = le32_to_cpu(msg.max_peers); > + s->protocol = le32_to_cpu(msg.protocol); > + s->output_section_sz = le64_to_cpu(msg.output_section_size); > +} > + > +/* Select the MSI-X vectors used by device. > + * ivshmem maps events to vectors statically, so > + * we just enable all vectors on init and after reset. */ > +static void ivshmem_msix_vector_use(IVShmemState *s) > +{ > + PCIDevice *d = PCI_DEVICE(s); > + int i; > + > + for (i = 0; i < s->vectors; i++) { > + msix_vector_use(d, i); > + } > +} > + > +static void ivshmem_reset(DeviceState *d) > +{ > + IVShmemState *s = IVSHMEM(d); > + > + ivshmem_disable_irqfd(s); > + > + s->intctrl = 0; > + ivshmem_write_state(s, 0); > + if (ivshmem_has_feature(s, IVSHMEM_MSI)) { > + ivshmem_msix_vector_use(s); > + } > +} > + > +static int ivshmem_setup_interrupts(IVShmemState *s, Error **errp) > +{ > + /* allocate QEMU callback data for receiving interrupts */ > + s->msi_vectors = g_malloc0(s->vectors * sizeof(MSIVector)); > + > + if (ivshmem_has_feature(s, IVSHMEM_MSI)) { > + if (msix_init_exclusive_bar(PCI_DEVICE(s), s->vectors, 1, errp)) { > + IVSHMEM_DPRINTF("msix requested but not available - > disabling\n"); > + s->features &= ~(IVSHMEM_MSI | IVSHMEM_IOEVENTFD); > + } else { > + IVSHMEM_DPRINTF("msix initialized (%d vectors)\n", s->vectors); > + ivshmem_msix_vector_use(s); > + } > + } > + > + return 0; > +} > + > +static void ivshmem_enable_irqfd(IVShmemState *s) > +{ > + PCIDevice *pdev = PCI_DEVICE(s); > + int i; > + > + for (i = 0; i < s->peers[s->vm_id].nb_eventfds; i++) { > + Error *err = NULL; > + > + ivshmem_unwatch_vector_notifier(s, i); > + > + ivshmem_add_kvm_msi_virq(s, i, &err); > + if (err) { > + error_report_err(err); > + goto undo; > + } > + } > + > + if (msix_set_vector_notifiers(pdev, > + ivshmem_irqfd_vector_unmask, > + ivshmem_irqfd_vector_mask, > + ivshmem_irqfd_vector_poll)) { > + error_report("ivshmem: msix_set_vector_notifiers failed"); > + goto undo; > + } > + return; > + > +undo: > + while (--i >= 0) { > + ivshmem_remove_kvm_msi_virq(s, i); > + } > +} > + > +static void ivshmem_disable_irqfd(IVShmemState *s) > +{ > + PCIDevice *pdev = PCI_DEVICE(s); > + int i; > + > + if (!pdev->msix_vector_use_notifier) { > + return; > + } > + > + msix_unset_vector_notifiers(pdev); > + > + for (i = 0; i < s->peers[s->vm_id].nb_eventfds; i++) { > + ivshmem_remove_kvm_msi_virq(s, i); > + ivshmem_watch_vector_notifier(s, i); > + } > + > +} > + > +static void ivshmem_write_config(PCIDevice *pdev, uint32_t address, > + uint32_t val, int len) > +{ > + IVShmemState *s = IVSHMEM(pdev); > + bool was_usable = ivshmem_irqfd_usable(s); > + > + pci_default_write_config(pdev, address, val, len); > + ivshmem_update_irqfd(s, was_usable); > +} > + > +static void ivshmem_exit(PCIDevice *dev) > +{ > + IVShmemState *s = IVSHMEM(dev); > + int i; > + > + if (s->migration_blocker) { > + migrate_del_blocker(s->migration_blocker); > + error_free(s->migration_blocker); > + } > + > + if (memory_region_is_mapped(&s->rw_section)) { > + void *addr = memory_region_get_ram_ptr(&s->rw_section); > + int fd; > + > + if (munmap(addr, memory_region_size(&s->rw_section) == -1)) { > + error_report("Failed to munmap shared memory %s", > + strerror(errno)); > + } > + > + fd = memory_region_get_fd(&s->rw_section); > + close(fd); > + > + vmstate_unregister_ram(&s->state_tab, DEVICE(dev)); > + vmstate_unregister_ram(&s->rw_section, DEVICE(dev)); > + } > + > + if (s->peers) { > + for (i = 0; i < s->nb_peers; i++) { > + close_peer_eventfds(s, i); > + } > + g_free(s->peers); > + } > + > + if (ivshmem_has_feature(s, IVSHMEM_MSI)) { > + msix_uninit_exclusive_bar(dev); > + } > + > + g_free(s->msi_vectors); > +} > + > +static int ivshmem_pre_load(void *opaque) > +{ > + IVShmemState *s = opaque; > + > + if (!ivshmem_is_master(s)) { > + error_report("'peer' devices are not migratable"); > + return -EINVAL; > + } > + > + return 0; > +} > + > +static int ivshmem_post_load(void *opaque, int version_id) > +{ > + IVShmemState *s = opaque; > + > + if (ivshmem_has_feature(s, IVSHMEM_MSI)) { > + ivshmem_msix_vector_use(s); > + } > + return 0; > +} > + > +static const VMStateDescription ivshmem_vmsd = { > + .name = TYPE_IVSHMEM, > + .version_id = 0, > + .minimum_version_id = 0, > + .pre_load = ivshmem_pre_load, > + .post_load = ivshmem_post_load, > + .fields = (VMStateField[]) { > + VMSTATE_PCI_DEVICE(parent_obj, IVShmemState), > + VMSTATE_MSIX(parent_obj, IVShmemState), > + VMSTATE_UINT32(state, IVShmemState), > + VMSTATE_UINT32(intctrl, IVShmemState), > + VMSTATE_END_OF_LIST() > + }, > +}; > + > +static Property ivshmem_properties[] = { > + DEFINE_PROP_CHR("chardev", IVShmemState, server_chr), > + DEFINE_PROP_BIT("ioeventfd", IVShmemState, features, IVSHMEM_IOEVENTFD, > + true), > + DEFINE_PROP_ON_OFF_AUTO("master", IVShmemState, master, ON_OFF_AUTO_OFF), > + DEFINE_PROP_END_OF_LIST(), > +}; > + > +static void ivshmem_init(Object *obj) > +{ > + IVShmemState *s = IVSHMEM(obj); > + > + s->features |= (1 << IVSHMEM_MSI); > +} > + > +static void ivshmem_realize(PCIDevice *dev, Error **errp) > +{ > + IVShmemState *s = IVSHMEM(dev); > + Chardev *chr = qemu_chr_fe_get_driver(&s->server_chr); > + size_t rw_section_sz, input_sections_sz; > + IVShmemVndrCap *vndr_cap; > + Error *err = NULL; > + uint8_t *pci_conf; > + int offset, priv_ctrl_pos; > + off_t shmem_pos; > + > + if (!qemu_chr_fe_backend_connected(&s->server_chr)) { > + error_setg(errp, "You must specify a 'chardev'"); > + return; > + } > + > + /* IRQFD requires MSI */ > + if (ivshmem_has_feature(s, IVSHMEM_IOEVENTFD) && > + !ivshmem_has_feature(s, IVSHMEM_MSI)) { > + error_setg(errp, "ioeventfd/irqfd requires MSI"); > + return; > + } > + > + pci_conf = dev->config; > + pci_conf[PCI_COMMAND] = PCI_COMMAND_IO | PCI_COMMAND_MEMORY; > + > + memory_region_init_io(&s->ivshmem_mmio, OBJECT(s), &ivshmem_mmio_ops, s, > + "ivshmem.mmio", IVSHMEM_REG_BAR_SIZE); > + > + /* region for registers*/ > + pci_register_bar(dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, > + &s->ivshmem_mmio); > + > + assert(chr); > + IVSHMEM_DPRINTF("using shared memory server (socket = %s)\n", > + chr->filename); > + > + /* > + * Receive setup messages from server synchronously. > + * Older versions did it asynchronously, but that creates a > + * number of entertaining race conditions. > + */ > + ivshmem_recv_setup(s, &err); > + if (err) { > + error_propagate(errp, err); > + return; > + } > + > + /* we allocate enough space for 16 peers and grow as needed */ > + resize_peers(s, 16); > + > + if (s->master == ON_OFF_AUTO_ON && s->vm_id != 0) { > + error_setg(errp, > + "Master must connect to the server before any peers"); > + return; > + } > + > + qemu_chr_fe_set_handlers(&s->server_chr, ivshmem_can_receive, > + ivshmem_read, NULL, NULL, s, NULL, true); > + > + if (ivshmem_setup_interrupts(s, errp) < 0) { > + error_prepend(errp, "Failed to initialize interrupts: "); > + return; > + } > + > + memory_region_init(&s->ivshmem_bar2, OBJECT(s), "ivshmem.bar2", > + s->shmem_sz); > + > + input_sections_sz = s->output_section_sz * s->max_peers; > + if (input_sections_sz + 4096 > s->shmem_sz) { > + error_setg(errp, > + "Invalid output section size, shared memory too small"); > + return; > + } > + rw_section_sz = s->shmem_sz - input_sections_sz - 4096; > + > + shmem_pos = 0; > + memory_region_init_ram_ptr(&s->state_tab, OBJECT(s), "ivshmem.state", > + 4096, s->shmem + shmem_pos); > + memory_region_set_readonly(&s->state_tab, true); > + memory_region_add_subregion(&s->ivshmem_bar2, shmem_pos, &s->state_tab); > + > + vmstate_register_ram(&s->state_tab, DEVICE(s)); > + > + if (rw_section_sz > 0) { > + shmem_pos += 4096; > + memory_region_init_ram_ptr(&s->rw_section, OBJECT(s), > + "ivshmem.rw-section", > + rw_section_sz, s->shmem + shmem_pos); > + memory_region_add_subregion(&s->ivshmem_bar2, shmem_pos, > + &s->rw_section); > + > + vmstate_register_ram(&s->rw_section, DEVICE(s)); > + } > + > + if (s->output_section_sz > 0) { > + shmem_pos += rw_section_sz; > + memory_region_init_ram_ptr(&s->input_sections, OBJECT(s), > + "ivshmem.input-sections", > input_sections_sz, > + s->shmem + shmem_pos); > + memory_region_set_readonly(&s->input_sections, true); > + memory_region_add_subregion(&s->ivshmem_bar2, shmem_pos, > + &s->input_sections); > + > + shmem_pos += s->vm_id * s->output_section_sz; > + memory_region_init_ram_ptr(&s->output_section, OBJECT(s), > + "ivshmem.output-section", > + s->output_section_sz, s->shmem + > shmem_pos); > + memory_region_add_subregion_overlap(&s->ivshmem_bar2, shmem_pos, > + &s->output_section, 1); > + > + vmstate_register_ram(&s->input_sections, DEVICE(s)); > + } > + > + pci_config_set_class(dev->config, 0xff00 | (s->protocol >> 8)); > + pci_config_set_prog_interface(dev->config, (uint8_t)s->protocol); > + > + offset = pci_add_capability(dev, PCI_CAP_ID_VNDR, 0, 0x18, > + &error_abort); > + vndr_cap = (IVShmemVndrCap *)(pci_conf + offset); > + vndr_cap->length = 0x18; > + vndr_cap->state_tab_sz = cpu_to_le32(4096); > + vndr_cap->rw_section_sz = cpu_to_le64(rw_section_sz); > + vndr_cap->output_section_sz = s->output_section_sz; > + > + priv_ctrl_pos = offset + offsetof(IVShmemVndrCap, priv_ctrl); > + s->priv_ctrl = &dev->config[priv_ctrl_pos]; > + dev->wmask[priv_ctrl_pos] |= IVSHMEM_ONESHOT_MODE; > + > + if (s->master == ON_OFF_AUTO_AUTO) { > + s->master = s->vm_id == 0 ? ON_OFF_AUTO_ON : ON_OFF_AUTO_OFF; > + } > + > + if (!ivshmem_is_master(s)) { > + error_setg(&s->migration_blocker, > + "Migration is disabled when using feature 'peer mode' in > device 'ivshmem'"); > + migrate_add_blocker(s->migration_blocker, &err); > + if (err) { > + error_propagate(errp, err); > + error_free(s->migration_blocker); > + return; > + } > + } > + > + pci_register_bar(PCI_DEVICE(s), 2, > + PCI_BASE_ADDRESS_SPACE_MEMORY | > + PCI_BASE_ADDRESS_MEM_PREFETCH | > + PCI_BASE_ADDRESS_MEM_TYPE_64, > + &s->ivshmem_bar2); > +} > + > +static void ivshmem_class_init(ObjectClass *klass, void *data) > +{ > + DeviceClass *dc = DEVICE_CLASS(klass); > + PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); > + > + k->realize = ivshmem_realize; > + k->exit = ivshmem_exit; > + k->config_write = ivshmem_write_config; > + k->vendor_id = PCI_VENDOR_ID_IVSHMEM; > + k->device_id = PCI_DEVICE_ID_IVSHMEM; > + dc->reset = ivshmem_reset; > + set_bit(DEVICE_CATEGORY_MISC, dc->categories); > + dc->desc = "Inter-VM shared memory v2"; > + > + dc->props = ivshmem_properties; > + dc->vmsd = &ivshmem_vmsd; > +} > + > +static const TypeInfo ivshmem_info = { > + .name = TYPE_IVSHMEM, > + .parent = TYPE_PCI_DEVICE, > + .instance_size = sizeof(IVShmemState), > + .instance_init = ivshmem_init, > + .class_init = ivshmem_class_init, > + .interfaces = (InterfaceInfo[]) { > + { INTERFACE_CONVENTIONAL_PCI_DEVICE }, > + { }, > + }, > +}; > + > +static void ivshmem_register_types(void) > +{ > + type_register_static(&ivshmem_info); > +} > + > +type_init(ivshmem_register_types) > diff --git a/include/hw/misc/ivshmem2.h b/include/hw/misc/ivshmem2.h > new file mode 100644 > index 0000000000..ee994699e8 > --- /dev/null > +++ b/include/hw/misc/ivshmem2.h > @@ -0,0 +1,48 @@ > +/* > + * Inter-VM Shared Memory PCI device, Version 2. > + * > + * Copyright (c) Siemens AG, 2019 > + * > + * Authors: > + * Jan Kiszka <jan.kis...@siemens.com> > + * > + * This work is licensed under the terms of the GNU GPL, version 2 or > + * (at your option) any later version. > + */ > +#ifndef IVSHMEM2_H > +#define IVSHMEM2_H > + > +#define IVSHMEM_PROTOCOL_VERSION 2 > + > +#define IVSHMEM_MSG_INIT 0 > +#define IVSHMEM_MSG_EVENT_FD 1 > +#define IVSHMEM_MSG_PEER_GONE 2 > + > +typedef struct IvshmemMsgHeader { > + uint32_t type; > + uint32_t len; > +} IvshmemMsgHeader; > + > +typedef struct IvshmemInitialInfo { > + IvshmemMsgHeader header; > + uint32_t version; > + uint32_t compatible_version; > + uint32_t id; > + uint32_t max_peers; > + uint32_t vectors; > + uint32_t protocol; > + uint64_t output_section_size; > +} IvshmemInitialInfo; > + > +typedef struct IvshmemEventFd { > + IvshmemMsgHeader header; > + uint32_t id; > + uint32_t vector; > +} IvshmemEventFd; > + > +typedef struct IvshmemPeerGone { > + IvshmemMsgHeader header; > + uint32_t id; > +} IvshmemPeerGone; > + > +#endif /* IVSHMEM2_H */ > diff --git a/include/hw/pci/pci_ids.h b/include/hw/pci/pci_ids.h > index 11f8ab7149..f0c5d5ed2b 100644 > --- a/include/hw/pci/pci_ids.h > +++ b/include/hw/pci/pci_ids.h > @@ -264,6 +264,8 @@ > #define PCI_VENDOR_ID_NEC 0x1033 > #define PCI_DEVICE_ID_NEC_UPD720200 0x0194 > > +#define PCI_VENDOR_ID_SIEMENS 0x110a > + > #define PCI_VENDOR_ID_TEWS 0x1498 > #define PCI_DEVICE_ID_TEWS_TPCI200 0x30C8 > >