From: Jagannathan Raman <jag.ra...@oracle.com> PCI BARs read from remote device PCI config reads/writes sent to remote server
Originally-by: John Johnson <john.g.john...@oracle.com> Signed-off-by: Elena Ufimtseva <elena.ufimts...@oracle.com> Signed-off-by: Jagannathan Raman <jag.ra...@oracle.com> Signed-off-by: John Levon <john.le...@nutanix.com> --- hw/vfio/pci.c | 243 ++++++++++++++++++++++++++------------------- hw/vfio/pci.h | 10 ++ hw/vfio/user-pci.c | 42 ++++++++ 3 files changed, 191 insertions(+), 104 deletions(-) diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index 90cf29325f..cd7bff2b4c 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -1728,7 +1728,7 @@ static bool vfio_msix_setup(VFIOPCIDevice *vdev, int pos, Error **errp) return true; } -static void vfio_teardown_msi(VFIOPCIDevice *vdev) +void vfio_teardown_msi(VFIOPCIDevice *vdev) { msi_uninit(&vdev->pdev); @@ -1829,7 +1829,7 @@ static void vfio_bars_register(VFIOPCIDevice *vdev) } } -static void vfio_bars_exit(VFIOPCIDevice *vdev) +void vfio_bars_exit(VFIOPCIDevice *vdev) { int i; @@ -1849,7 +1849,7 @@ static void vfio_bars_exit(VFIOPCIDevice *vdev) } } -static void vfio_bars_finalize(VFIOPCIDevice *vdev) +void vfio_bars_finalize(VFIOPCIDevice *vdev) { int i; @@ -2417,7 +2417,7 @@ static void vfio_add_ext_cap(VFIOPCIDevice *vdev) return; } -static bool vfio_add_capabilities(VFIOPCIDevice *vdev, Error **errp) +bool vfio_add_capabilities(VFIOPCIDevice *vdev, Error **errp) { PCIDevice *pdev = &vdev->pdev; @@ -2766,7 +2766,7 @@ bool vfio_populate_vga(VFIOPCIDevice *vdev, Error **errp) return true; } -static bool vfio_populate_device(VFIOPCIDevice *vdev, Error **errp) +bool vfio_populate_device(VFIOPCIDevice *vdev, Error **errp) { VFIODevice *vbasedev = &vdev->vbasedev; struct vfio_region_info *reg_info = NULL; @@ -2884,7 +2884,7 @@ static void vfio_err_notifier_handler(void *opaque) * and continue after disabling error recovery support for the * device. */ -static void vfio_register_err_notifier(VFIOPCIDevice *vdev) +void vfio_register_err_notifier(VFIOPCIDevice *vdev) { Error *err = NULL; int32_t fd; @@ -2943,7 +2943,7 @@ static void vfio_req_notifier_handler(void *opaque) } } -static void vfio_register_req_notifier(VFIOPCIDevice *vdev) +void vfio_register_req_notifier(VFIOPCIDevice *vdev) { struct vfio_irq_info irq_info = { .argsz = sizeof(irq_info), .index = VFIO_PCI_REQ_IRQ_INDEX }; @@ -2998,79 +2998,10 @@ static void vfio_unregister_req_notifier(VFIOPCIDevice *vdev) vdev->req_enabled = false; } -static void vfio_realize(PCIDevice *pdev, Error **errp) +bool vfio_pci_config_setup(VFIOPCIDevice *vdev, Error **errp) { - ERRP_GUARD(); - VFIOPCIDevice *vdev = VFIO_PCI_BASE(pdev); + PCIDevice *pdev = &vdev->pdev; VFIODevice *vbasedev = &vdev->vbasedev; - int i, ret; - char uuid[UUID_STR_LEN]; - g_autofree char *name = NULL; - - if (vbasedev->fd < 0 && !vbasedev->sysfsdev) { - if (!(~vdev->host.domain || ~vdev->host.bus || - ~vdev->host.slot || ~vdev->host.function)) { - error_setg(errp, "No provided host device"); - error_append_hint(errp, "Use -device vfio-pci,host=DDDD:BB:DD.F " -#ifdef CONFIG_IOMMUFD - "or -device vfio-pci,fd=DEVICE_FD " -#endif - "or -device vfio-pci,sysfsdev=PATH_TO_DEVICE\n"); - return; - } - vbasedev->sysfsdev = - g_strdup_printf("/sys/bus/pci/devices/%04x:%02x:%02x.%01x", - vdev->host.domain, vdev->host.bus, - vdev->host.slot, vdev->host.function); - } - - if (!vfio_device_get_name(vbasedev, errp)) { - return; - } - - /* - * Mediated devices *might* operate compatibly with discarding of RAM, but - * we cannot know for certain, it depends on whether the mdev vendor driver - * stays in sync with the active working set of the guest driver. Prevent - * the x-balloon-allowed option unless this is minimally an mdev device. - */ - vbasedev->mdev = vfio_device_is_mdev(vbasedev); - - trace_vfio_mdev(vbasedev->name, vbasedev->mdev); - - if (vbasedev->ram_block_discard_allowed && !vbasedev->mdev) { - error_setg(errp, "x-balloon-allowed only potentially compatible " - "with mdev devices"); - goto error; - } - - if (!qemu_uuid_is_null(&vdev->vf_token)) { - qemu_uuid_unparse(&vdev->vf_token, uuid); - name = g_strdup_printf("%s vf_token=%s", vbasedev->name, uuid); - } else { - name = g_strdup(vbasedev->name); - } - - vbasedev->use_regfds = false; - - if (!vfio_attach_device(name, vbasedev, - pci_device_iommu_address_space(pdev), errp)) { - goto error; - } - - if (!vfio_populate_device(vdev, errp)) { - goto error; - } - - /* Get a copy of config space */ - ret = pread(vbasedev->fd, vdev->pdev.config, - MIN(pci_config_size(&vdev->pdev), vdev->config_size), - vdev->config_offset); - if (ret < (int)MIN(pci_config_size(&vdev->pdev), vdev->config_size)) { - ret = ret < 0 ? -errno : -EFAULT; - error_setg_errno(errp, -ret, "failed to read device config space"); - goto error; - } /* vfio emulates a lot for us, but some bits need extra love */ vdev->emulated_config_bits = g_malloc0(vdev->config_size); @@ -3088,10 +3019,10 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) if (vdev->vendor_id != PCI_ANY_ID) { if (vdev->vendor_id >= 0xffff) { error_setg(errp, "invalid PCI vendor ID provided"); - goto error; + return false; } vfio_add_emulated_word(vdev, PCI_VENDOR_ID, vdev->vendor_id, ~0); - trace_vfio_pci_emulated_vendor_id(vbasedev->name, vdev->vendor_id); + trace_vfio_pci_emulated_vendor_id(vdev->vbasedev.name, vdev->vendor_id); } else { vdev->vendor_id = pci_get_word(pdev->config + PCI_VENDOR_ID); } @@ -3099,7 +3030,7 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) if (vdev->device_id != PCI_ANY_ID) { if (vdev->device_id > 0xffff) { error_setg(errp, "invalid PCI device ID provided"); - goto error; + return false; } vfio_add_emulated_word(vdev, PCI_DEVICE_ID, vdev->device_id, ~0); trace_vfio_pci_emulated_device_id(vbasedev->name, vdev->device_id); @@ -3110,7 +3041,7 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) if (vdev->sub_vendor_id != PCI_ANY_ID) { if (vdev->sub_vendor_id > 0xffff) { error_setg(errp, "invalid PCI subsystem vendor ID provided"); - goto error; + return false; } vfio_add_emulated_word(vdev, PCI_SUBSYSTEM_VENDOR_ID, vdev->sub_vendor_id, ~0); @@ -3121,7 +3052,7 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) if (vdev->sub_device_id != PCI_ANY_ID) { if (vdev->sub_device_id > 0xffff) { error_setg(errp, "invalid PCI subsystem device ID provided"); - goto error; + return false; } vfio_add_emulated_word(vdev, PCI_SUBSYSTEM_ID, vdev->sub_device_id, ~0); trace_vfio_pci_emulated_sub_device_id(vbasedev->name, @@ -3152,11 +3083,129 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) vfio_bars_prepare(vdev); if (!vfio_msix_early_setup(vdev, errp)) { - goto error; + return false; } vfio_bars_register(vdev); + return true; +} + +bool vfio_interrupt_setup(VFIOPCIDevice *vdev, Error **errp) +{ + PCIDevice *pdev = &vdev->pdev; + + /* QEMU emulates all of MSI & MSIX */ + if (pdev->cap_present & QEMU_PCI_CAP_MSIX) { + memset(vdev->emulated_config_bits + pdev->msix_cap, 0xff, + MSIX_CAP_LENGTH); + } + + if (pdev->cap_present & QEMU_PCI_CAP_MSI) { + memset(vdev->emulated_config_bits + pdev->msi_cap, 0xff, + vdev->msi_cap_size); + } + + if (vfio_pci_read_config(&vdev->pdev, PCI_INTERRUPT_PIN, 1)) { + vdev->intx.mmap_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, + vfio_intx_mmap_enable, vdev); + pci_device_set_intx_routing_notifier(&vdev->pdev, + vfio_intx_routing_notifier); + vdev->irqchip_change_notifier.notify = vfio_irqchip_change; + kvm_irqchip_add_change_notifier(&vdev->irqchip_change_notifier); + if (!vfio_intx_enable(vdev, errp)) { + pci_device_set_intx_routing_notifier(&vdev->pdev, NULL); + kvm_irqchip_remove_change_notifier(&vdev->irqchip_change_notifier); + return false; + } + } + return true; +} + +static void vfio_realize(PCIDevice *pdev, Error **errp) +{ + ERRP_GUARD(); + VFIOPCIDevice *vdev = VFIO_PCI_BASE(pdev); + VFIODevice *vbasedev = &vdev->vbasedev; + int i, ret; + char uuid[UUID_STR_LEN]; + g_autofree char *name = NULL; + + if (vbasedev->fd < 0 && !vbasedev->sysfsdev) { + if (!(~vdev->host.domain || ~vdev->host.bus || + ~vdev->host.slot || ~vdev->host.function)) { + error_setg(errp, "No provided host device"); + error_append_hint(errp, "Use -device vfio-pci,host=DDDD:BB:DD.F " +#ifdef CONFIG_IOMMUFD + "or -device vfio-pci,fd=DEVICE_FD " +#endif + "or -device vfio-pci,sysfsdev=PATH_TO_DEVICE\n"); + return; + } + vbasedev->sysfsdev = + g_strdup_printf("/sys/bus/pci/devices/%04x:%02x:%02x.%01x", + vdev->host.domain, vdev->host.bus, + vdev->host.slot, vdev->host.function); + } + + if (!vfio_device_get_name(vbasedev, errp)) { + return; + } + + /* + * Mediated devices *might* operate compatibly with discarding of RAM, but + * we cannot know for certain, it depends on whether the mdev vendor driver + * stays in sync with the active working set of the guest driver. Prevent + * the x-balloon-allowed option unless this is minimally an mdev device. + */ + vbasedev->mdev = vfio_device_is_mdev(vbasedev); + + trace_vfio_mdev(vbasedev->name, vbasedev->mdev); + + if (vbasedev->ram_block_discard_allowed && !vbasedev->mdev) { + error_setg(errp, "x-balloon-allowed only potentially compatible " + "with mdev devices"); + goto error; + } + + if (!qemu_uuid_is_null(&vdev->vf_token)) { + qemu_uuid_unparse(&vdev->vf_token, uuid); + name = g_strdup_printf("%s vf_token=%s", vbasedev->name, uuid); + } else { + name = g_strdup(vbasedev->name); + } + + vbasedev->use_regfds = false; + + if (!vfio_attach_device(name, vbasedev, + pci_device_iommu_address_space(pdev), errp)) { + goto error; + } + + if (!vfio_populate_device(vdev, errp)) { + goto error; + } + + /* Get a copy of config space */ + ret = pread(vbasedev->fd, vdev->pdev.config, + MIN(pci_config_size(&vdev->pdev), vdev->config_size), + vdev->config_offset); + if (ret < (int)MIN(pci_config_size(&vdev->pdev), vdev->config_size)) { + ret = ret < 0 ? -errno : -EFAULT; + error_setg_errno(errp, -ret, "failed to read device config space"); + goto error; + } + + if (!vfio_pci_config_setup(vdev, errp)) { + goto error; + } + + /* + * vfio_pci_config_setup will have registered the device's BARs + * and setup any MSIX BARs, so errors after it succeeds must + * use out_teardown + */ + if (!vbasedev->mdev && !pci_device_set_iommu_device(pdev, vbasedev->hiod, errp)) { error_prepend(errp, "Failed to set iommu_device: "); @@ -3200,28 +3249,14 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) } } - /* QEMU emulates all of MSI & MSIX */ - if (pdev->cap_present & QEMU_PCI_CAP_MSIX) { - memset(vdev->emulated_config_bits + pdev->msix_cap, 0xff, - MSIX_CAP_LENGTH); - } - - if (pdev->cap_present & QEMU_PCI_CAP_MSI) { - memset(vdev->emulated_config_bits + pdev->msi_cap, 0xff, - vdev->msi_cap_size); + if (!vfio_interrupt_setup(vdev, errp)) { + goto out_teardown; } - if (vfio_pci_read_config(&vdev->pdev, PCI_INTERRUPT_PIN, 1)) { - vdev->intx.mmap_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, - vfio_intx_mmap_enable, vdev); - pci_device_set_intx_routing_notifier(&vdev->pdev, - vfio_intx_routing_notifier); - vdev->irqchip_change_notifier.notify = vfio_irqchip_change; - kvm_irqchip_add_change_notifier(&vdev->irqchip_change_notifier); - if (!vfio_intx_enable(vdev, errp)) { - goto out_deregister; - } - } + /* + * vfio_interrupt_setup will have setup INTx's KVM routing + * so errors after it succeeds must use out_deregister + */ if (vdev->display != ON_OFF_AUTO_OFF) { if (!vfio_display_probe(vdev, errp)) { diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h index c0f030f4db..5fe6eb282c 100644 --- a/hw/vfio/pci.h +++ b/hw/vfio/pci.h @@ -218,6 +218,16 @@ Object *vfio_pci_get_object(VFIODevice *vbasedev); int vfio_pci_save_config(VFIODevice *vbasedev, QEMUFile *f, Error **errp); int vfio_pci_load_config(VFIODevice *vbasedev, QEMUFile *f); void vfio_pci_put_device(VFIOPCIDevice *vdev); +bool vfio_populate_device(VFIOPCIDevice *vdev, Error **errp); +void vfio_teardown_msi(VFIOPCIDevice *vdev); +void vfio_bars_exit(VFIOPCIDevice *vdev); +void vfio_bars_finalize(VFIOPCIDevice *vdev); +bool vfio_add_capabilities(VFIOPCIDevice *vdev, Error **errp); +void vfio_put_device(VFIOPCIDevice *vdev); +void vfio_register_err_notifier(VFIOPCIDevice *vdev); +void vfio_register_req_notifier(VFIOPCIDevice *vdev); +bool vfio_pci_config_setup(VFIOPCIDevice *vdev, Error **errp); +bool vfio_interrupt_setup(VFIOPCIDevice *vdev, Error **errp); void vfio_instance_init(Object *obj); uint64_t vfio_vga_read(void *opaque, hwaddr addr, unsigned size); diff --git a/hw/vfio/user-pci.c b/hw/vfio/user-pci.c index aa5146db0a..5758e1e234 100644 --- a/hw/vfio/user-pci.c +++ b/hw/vfio/user-pci.c @@ -78,6 +78,7 @@ static void vfio_user_pci_realize(PCIDevice *pdev, Error **errp) AddressSpace *as; SocketAddress addr; VFIOUserProxy *proxy; + int ret; /* * TODO: make option parser understand SocketAddress @@ -130,8 +131,45 @@ static void vfio_user_pci_realize(PCIDevice *pdev, Error **errp) goto error; } + if (!vfio_populate_device(vdev, errp)) { + goto error; + } + + /* Get a copy of config space */ + ret = vbasedev->io->region_read(vbasedev, VFIO_PCI_CONFIG_REGION_INDEX, 0, + MIN(pci_config_size(pdev), vdev->config_size), + pdev->config); + if (ret < (int)MIN(pci_config_size(&vdev->pdev), vdev->config_size)) { + error_setg_errno(errp, -ret, "failed to read device config space"); + goto error; + } + + if (!vfio_pci_config_setup(vdev, errp)) { + goto error; + } + + /* + * vfio_pci_config_setup will have registered the device's BARs + * and setup any MSIX BARs, so errors after it succeeds must + * use out_teardown + */ + + if (!vfio_add_capabilities(vdev, errp)) { + goto out_teardown; + } + + if (!vfio_interrupt_setup(vdev, errp)) { + goto out_teardown; + } + + vfio_register_err_notifier(vdev); + vfio_register_req_notifier(vdev); + return; +out_teardown: + vfio_teardown_msi(vdev); + vfio_bars_exit(vdev); error: error_prepend(errp, VFIO_MSG_PREFIX, vdev->vbasedev.name); } @@ -167,6 +205,10 @@ static void vfio_user_instance_finalize(Object *obj) VFIOPCIDevice *vdev = VFIO_PCI_BASE(obj); VFIODevice *vbasedev = &vdev->vbasedev; + vfio_bars_finalize(vdev); + g_free(vdev->emulated_config_bits); + g_free(vdev->rom); + vfio_pci_put_device(vdev); if (vbasedev->proxy != NULL) { -- 2.34.1