Forward remote device's interrupts to the guest Signed-off-by: Elena Ufimtseva <elena.ufimts...@oracle.com> Signed-off-by: John G Johnson <john.g.john...@oracle.com> Signed-off-by: Jagannathan Raman <jag.ra...@oracle.com> --- include/hw/pci/pci.h | 6 ++++ include/hw/remote/iohub.h | 1 + hw/pci/msi.c | 13 ++++++- hw/pci/msix.c | 12 ++++++- hw/remote/iohub.c | 7 ++++ hw/remote/vfio-user-obj.c | 74 +++++++++++++++++++++++++++++++++++++++ hw/remote/trace-events | 1 + 7 files changed, 112 insertions(+), 2 deletions(-)
diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h index f2fc2d5375..ffc030d9ca 100644 --- a/include/hw/pci/pci.h +++ b/include/hw/pci/pci.h @@ -128,6 +128,8 @@ typedef uint32_t PCIConfigReadFunc(PCIDevice *pci_dev, typedef void PCIMapIORegionFunc(PCIDevice *pci_dev, int region_num, pcibus_t addr, pcibus_t size, int type); typedef void PCIUnregisterFunc(PCIDevice *pci_dev); +typedef void PCIMSINotify(PCIDevice *pci_dev, unsigned vector); +typedef void PCIMSIxNotify(PCIDevice *pci_dev, unsigned vector); typedef struct PCIIORegion { pcibus_t addr; /* current PCI mapping address. -1 means not mapped */ @@ -321,6 +323,10 @@ struct PCIDevice { /* Space to store MSIX table & pending bit array */ uint8_t *msix_table; uint8_t *msix_pba; + + PCIMSINotify *msi_notify; + PCIMSIxNotify *msix_notify; + /* MemoryRegion container for msix exclusive BAR setup */ MemoryRegion msix_exclusive_bar; /* Memory Regions for MSIX table and pending bit entries. */ diff --git a/include/hw/remote/iohub.h b/include/hw/remote/iohub.h index 0bf98e0d78..70d98b38d0 100644 --- a/include/hw/remote/iohub.h +++ b/include/hw/remote/iohub.h @@ -30,6 +30,7 @@ typedef struct RemoteIOHubState { unsigned int irq_level[REMOTE_IOHUB_NB_PIRQS]; ResampleToken token[REMOTE_IOHUB_NB_PIRQS]; QemuMutex irq_level_lock[REMOTE_IOHUB_NB_PIRQS]; + void (*intx_notify)(int pirq, unsigned vector); } RemoteIOHubState; int remote_iohub_map_irq(PCIDevice *pci_dev, int intx); diff --git a/hw/pci/msi.c b/hw/pci/msi.c index 47d2b0f33c..93f5e400cc 100644 --- a/hw/pci/msi.c +++ b/hw/pci/msi.c @@ -51,6 +51,8 @@ */ bool msi_nonbroken; +static void pci_msi_notify(PCIDevice *dev, unsigned int vector); + /* If we get rid of cap allocator, we won't need this. */ static inline uint8_t msi_cap_sizeof(uint16_t flags) { @@ -225,6 +227,8 @@ int msi_init(struct PCIDevice *dev, uint8_t offset, dev->msi_cap = config_offset; dev->cap_present |= QEMU_PCI_CAP_MSI; + dev->msi_notify = pci_msi_notify; + pci_set_word(dev->config + msi_flags_off(dev), flags); pci_set_word(dev->wmask + msi_flags_off(dev), PCI_MSI_FLAGS_QSIZE | PCI_MSI_FLAGS_ENABLE); @@ -307,7 +311,7 @@ bool msi_is_masked(const PCIDevice *dev, unsigned int vector) return mask & (1U << vector); } -void msi_notify(PCIDevice *dev, unsigned int vector) +static void pci_msi_notify(PCIDevice *dev, unsigned int vector) { uint16_t flags = pci_get_word(dev->config + msi_flags_off(dev)); bool msi64bit = flags & PCI_MSI_FLAGS_64BIT; @@ -332,6 +336,13 @@ void msi_notify(PCIDevice *dev, unsigned int vector) msi_send_message(dev, msg); } +void msi_notify(PCIDevice *dev, unsigned int vector) +{ + if (dev->msi_notify) { + dev->msi_notify(dev, vector); + } +} + void msi_send_message(PCIDevice *dev, MSIMessage msg) { MemTxAttrs attrs = {}; diff --git a/hw/pci/msix.c b/hw/pci/msix.c index ae9331cd0b..1c71e67f53 100644 --- a/hw/pci/msix.c +++ b/hw/pci/msix.c @@ -31,6 +31,8 @@ #define MSIX_ENABLE_MASK (PCI_MSIX_FLAGS_ENABLE >> 8) #define MSIX_MASKALL_MASK (PCI_MSIX_FLAGS_MASKALL >> 8) +static void pci_msix_notify(PCIDevice *dev, unsigned vector); + MSIMessage msix_get_message(PCIDevice *dev, unsigned vector) { uint8_t *table_entry = dev->msix_table + vector * PCI_MSIX_ENTRY_SIZE; @@ -334,6 +336,7 @@ int msix_init(struct PCIDevice *dev, unsigned short nentries, dev->msix_table = g_malloc0(table_size); dev->msix_pba = g_malloc0(pba_size); dev->msix_entry_used = g_malloc0(nentries * sizeof *dev->msix_entry_used); + dev->msix_notify = pci_msix_notify; msix_mask_all(dev, nentries); @@ -485,7 +488,7 @@ int msix_enabled(PCIDevice *dev) } /* Send an MSI-X message */ -void msix_notify(PCIDevice *dev, unsigned vector) +static void pci_msix_notify(PCIDevice *dev, unsigned vector) { MSIMessage msg; @@ -503,6 +506,13 @@ void msix_notify(PCIDevice *dev, unsigned vector) msi_send_message(dev, msg); } +void msix_notify(PCIDevice *dev, unsigned vector) +{ + if (dev->msix_notify) { + dev->msix_notify(dev, vector); + } +} + void msix_reset(PCIDevice *dev) { if (!msix_present(dev)) { diff --git a/hw/remote/iohub.c b/hw/remote/iohub.c index 547d597f0f..d28d9f3ce2 100644 --- a/hw/remote/iohub.c +++ b/hw/remote/iohub.c @@ -17,7 +17,9 @@ #include "qemu/thread.h" #include "hw/remote/machine.h" #include "hw/remote/iohub.h" +#include "hw/pci/msi.h" #include "qemu/main-loop.h" +#include "trace.h" void remote_iohub_init(RemoteIOHubState *iohub) { @@ -32,6 +34,8 @@ void remote_iohub_init(RemoteIOHubState *iohub) event_notifier_init_fd(&iohub->irqfds[pirq], -1); event_notifier_init_fd(&iohub->resamplefds[pirq], -1); } + + msi_nonbroken = true; } void remote_iohub_finalize(RemoteIOHubState *iohub) @@ -62,6 +66,9 @@ void remote_iohub_set_irq(void *opaque, int pirq, int level) QEMU_LOCK_GUARD(&iohub->irq_level_lock[pirq]); if (level) { + if (iohub->intx_notify) { + iohub->intx_notify(pirq, 0); + } if (++iohub->irq_level[pirq] == 1) { event_notifier_set(&iohub->irqfds[pirq]); } diff --git a/hw/remote/vfio-user-obj.c b/hw/remote/vfio-user-obj.c index ae375e69b9..2b28d465d5 100644 --- a/hw/remote/vfio-user-obj.c +++ b/hw/remote/vfio-user-obj.c @@ -50,6 +50,9 @@ #include "hw/pci/pci.h" #include "qemu/timer.h" #include "hw/remote/iommu.h" +#include "hw/pci/msi.h" +#include "hw/pci/msix.h" +#include "hw/remote/iohub.h" #define TYPE_VFU_OBJECT "x-vfio-user-server" OBJECT_DECLARE_TYPE(VfuObject, VfuObjectClass, VFU_OBJECT) @@ -81,6 +84,8 @@ struct VfuObject { int vfu_poll_fd; }; +static GHashTable *vfu_object_dev_table; + static void vfu_object_init_ctx(VfuObject *o, Error **errp); static void vfu_object_set_socket(Object *obj, Visitor *v, const char *name, @@ -347,6 +352,54 @@ static void vfu_object_register_bars(vfu_ctx_t *vfu_ctx, PCIDevice *pdev) } } +static void vfu_object_intx_notify(int pci_devfn, unsigned vector) +{ + vfu_ctx_t *vfu_ctx = g_hash_table_lookup(vfu_object_dev_table, + (void *)(uint64_t)pci_devfn); + + if (vfu_ctx) { + vfu_irq_trigger(vfu_ctx, vector); + } +} + +static void vfu_object_msi_notify(PCIDevice *pci_dev, unsigned vector) +{ + vfu_object_intx_notify(pci_dev->devfn, vector); +} + +static int vfu_object_setup_irqs(vfu_ctx_t *vfu_ctx, PCIDevice *pci_dev) +{ + RemoteMachineState *machine = REMOTE_MACHINE(current_machine); + RemoteIOHubState *iohub = &machine->iohub; + int ret; + + ret = vfu_setup_device_nr_irqs(vfu_ctx, VFU_DEV_INTX_IRQ, 1); + if (ret < 0) { + return ret; + } + + iohub->intx_notify = vfu_object_intx_notify; + + ret = 0; + if (msix_nr_vectors_allocated(pci_dev)) { + ret = vfu_setup_device_nr_irqs(vfu_ctx, VFU_DEV_MSIX_IRQ, + msix_nr_vectors_allocated(pci_dev)); + + pci_dev->msix_notify = vfu_object_msi_notify; + } else if (msi_nr_vectors_allocated(pci_dev)) { + ret = vfu_setup_device_nr_irqs(vfu_ctx, VFU_DEV_MSI_IRQ, + msi_nr_vectors_allocated(pci_dev)); + + pci_dev->msi_notify = vfu_object_msi_notify; + } + + if (ret < 0) { + return ret; + } + + return 0; +} + /* * TYPE_VFU_OBJECT depends on the availability of the 'socket' and 'device' * properties. It also depends on devices instantiated in QEMU. These @@ -437,6 +490,13 @@ static void vfu_object_init_ctx(VfuObject *o, Error **errp) vfu_object_register_bars(o->vfu_ctx, o->pci_dev); + ret = vfu_object_setup_irqs(o->vfu_ctx, o->pci_dev); + if (ret < 0) { + error_setg(errp, "vfu: Failed to setup interrupts for %s", + o->device); + goto fail; + } + ret = vfu_realize_ctx(o->vfu_ctx); if (ret < 0) { error_setg(errp, "vfu: Failed to realize device %s- %s", @@ -450,6 +510,9 @@ static void vfu_object_init_ctx(VfuObject *o, Error **errp) goto fail; } + g_hash_table_insert(vfu_object_dev_table, + (void *)(uint64_t)o->pci_dev->devfn, o->vfu_ctx); + qemu_set_fd_handler(o->vfu_poll_fd, vfu_object_attach_ctx, NULL, o); return; @@ -504,9 +567,18 @@ static void vfu_object_finalize(Object *obj) remote_iommu_free(o->pci_dev); } + if (o->pci_dev && + g_hash_table_lookup(vfu_object_dev_table, + (void *)(uint64_t)o->pci_dev->devfn)) { + g_hash_table_remove(vfu_object_dev_table, + (void *)(uint64_t)o->pci_dev->devfn); + } + o->pci_dev = NULL; if (!k->nr_devs && !k->daemon) { + g_hash_table_destroy(vfu_object_dev_table); + vfu_object_dev_table = NULL; qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN); } @@ -525,6 +597,8 @@ static void vfu_object_class_init(ObjectClass *klass, void *data) /* Later determine how to detect a daemon */ k->daemon = false; + vfu_object_dev_table = g_hash_table_new_full(NULL, NULL, NULL, NULL); + object_class_property_add(klass, "socket", "SocketAddress", NULL, vfu_object_set_socket, NULL, NULL); object_class_property_set_description(klass, "socket", diff --git a/hw/remote/trace-events b/hw/remote/trace-events index 847d50d88f..c167b3c7a5 100644 --- a/hw/remote/trace-events +++ b/hw/remote/trace-events @@ -12,3 +12,4 @@ vfu_dma_unregister(uint64_t gpa) "vfu: unregistering GPA 0x%"PRIx64"" vfu_bar_register(int i, uint64_t addr, uint64_t size) "vfu: BAR %d: addr 0x%"PRIx64" size 0x%"PRIx64"" vfu_bar_rw_enter(const char *op, uint64_t addr) "vfu: %s request for BAR address 0x%"PRIx64"" vfu_bar_rw_exit(const char *op, uint64_t addr) "vfu: Finished %s of BAR address 0x%"PRIx64"" +vfu_interrupt(int pirq) "vfu: sending interrupt to device - PIRQ %d" -- 2.20.1