From: Jagannathan Raman <jag.ra...@oracle.com> Introduce basic plumbing for vfio-user behind a new --enable-vfio-user-client option.
We introduce VFIOUserContainer in hw/vfio-user/container.c, which is a container type for the "IOMMU" type "vfio-iommu-user", and share some common container code from hw/vfio/container.c. Add hw/vfio-user/pci.c for instantiating VFIOUserPCIDevice objects, sharing some common code from hw/vfio/pci.c. Originally-by: John Johnson <john.g.john...@oracle.com> Signed-off-by: Elena Ufimtseva <elena.ufimts...@oracle.com> Signed-off-by: Jagannathan Raman <jag.ra...@oracle.com> Signed-off-by: John Levon <john.le...@nutanix.com> --- MAINTAINERS | 2 + hw/meson.build | 1 + hw/vfio-user/container.c | 222 ++++++++++++++++++++++++++ hw/vfio-user/container.h | 23 +++ hw/vfio-user/meson.build | 9 ++ hw/vfio-user/pci.c | 154 ++++++++++++++++++ hw/vfio/container.c | 2 +- hw/vfio/pci.c | 12 +- hw/vfio/pci.h | 7 + include/hw/vfio/vfio-common.h | 3 + include/hw/vfio/vfio-container-base.h | 1 + meson_options.txt | 2 + scripts/meson-buildoptions.sh | 4 + 13 files changed, 435 insertions(+), 7 deletions(-) create mode 100644 hw/vfio-user/container.c create mode 100644 hw/vfio-user/container.h create mode 100644 hw/vfio-user/meson.build create mode 100644 hw/vfio-user/pci.c diff --git a/MAINTAINERS b/MAINTAINERS index 3e7e6743cc..c403742c27 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4158,6 +4158,8 @@ M: John Levon <john.le...@nutanix.com> M: Thanos Makatos <thanos.maka...@nutanix.com> S: Supported F: docs/devel/vfio-user.rst +F: hw/vfio-user/* +F: include/hw/vfio-user/* F: subprojects/libvfio-user EBPF: diff --git a/hw/meson.build b/hw/meson.build index b827c82c5d..91e8d2bdc0 100644 --- a/hw/meson.build +++ b/hw/meson.build @@ -38,6 +38,7 @@ subdir('tpm') subdir('ufs') subdir('usb') subdir('vfio') +subdir('vfio-user') subdir('virtio') subdir('watchdog') subdir('xen') diff --git a/hw/vfio-user/container.c b/hw/vfio-user/container.c new file mode 100644 index 0000000000..7b1c202517 --- /dev/null +++ b/hw/vfio-user/container.c @@ -0,0 +1,222 @@ +/* + * Container for vfio-user IOMMU type: rather than communicating with the kernel + * vfio driver, we communicate over a socket to a server using the vfio-user + * protocol. + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + */ + +#include <sys/ioctl.h> +#include <linux/vfio.h> +#include "qemu/osdep.h" + +#include "exec/address-spaces.h" +#include "exec/memory.h" +#include "exec/ram_addr.h" +#include "hw/hw.h" +#include "hw/vfio/pci.h" +#include "hw/vfio-user/container.h" +#include "qemu/error-report.h" +#include "qemu/range.h" +#include "qapi/error.h" +#include "trace.h" + +static int vfio_user_dma_unmap(const VFIOContainerBase *bcontainer, + hwaddr iova, ram_addr_t size, + IOMMUTLBEntry *iotlb, int flags) +{ + return -ENOTSUP; +} + +static int vfio_user_dma_map(const VFIOContainerBase *bcontainer, hwaddr iova, + ram_addr_t size, void *vaddr, bool readonly, + MemoryRegion *mrp) +{ + return -ENOTSUP; +} + +static int +vfio_user_set_dirty_page_tracking(const VFIOContainerBase *bcontainer, + bool start, Error **errp) +{ + error_setg_errno(errp, ENOTSUP, "Not supported"); + return -ENOTSUP; +} + +static int vfio_user_query_dirty_bitmap(const VFIOContainerBase *bcontainer, + VFIOBitmap *vbmap, hwaddr iova, + hwaddr size, Error **errp) +{ + error_setg_errno(errp, ENOTSUP, "Not supported"); + return -ENOTSUP; +} + +static bool vfio_user_setup(VFIOContainerBase *bcontainer, Error **errp) +{ + error_setg_errno(errp, ENOTSUP, "Not supported"); + return -ENOTSUP; +} + +static VFIOUserContainer *vfio_create_user_container(Error **errp) +{ + VFIOUserContainer *container; + + container = VFIO_IOMMU_USER(object_new(TYPE_VFIO_IOMMU_USER)); + return container; +} + +/* + * Try to mirror vfio_connect_container() as much as possible. + */ +static VFIOUserContainer * +vfio_connect_user_container(AddressSpace *as, Error **errp) +{ + VFIOContainerBase *bcontainer; + VFIOUserContainer *container; + VFIOAddressSpace *space; + VFIOIOMMUClass *vioc; + + space = vfio_get_address_space(as); + + container = vfio_create_user_container(errp); + if (!container) { + goto put_space_exit; + } + + bcontainer = &container->bcontainer; + + if (!vfio_cpr_register_container(bcontainer, errp)) { + goto free_container_exit; + } + + vioc = VFIO_IOMMU_GET_CLASS(bcontainer); + assert(vioc->setup); + + if (!vioc->setup(bcontainer, errp)) { + goto unregister_container_exit; + } + + vfio_address_space_insert(space, bcontainer); + + bcontainer->listener = vfio_memory_listener; + memory_listener_register(&bcontainer->listener, bcontainer->space->as); + + if (bcontainer->error) { + errno = EINVAL; + error_propagate_prepend(errp, bcontainer->error, + "memory listener initialization failed: "); + goto listener_release_exit; + } + + bcontainer->initialized = true; + + return container; + +listener_release_exit: + memory_listener_unregister(&bcontainer->listener); + if (vioc->release) { + vioc->release(bcontainer); + } + +unregister_container_exit: + vfio_cpr_unregister_container(bcontainer); + +free_container_exit: + object_unref(container); + +put_space_exit: + vfio_put_address_space(space); + + return NULL; +} + +static void vfio_disconnect_user_container(VFIOUserContainer *container) +{ + VFIOContainerBase *bcontainer = &container->bcontainer; + VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer); + + memory_listener_unregister(&bcontainer->listener); + if (vioc->release) { + vioc->release(bcontainer); + } + + VFIOAddressSpace *space = bcontainer->space; + + vfio_cpr_unregister_container(bcontainer); + object_unref(container); + + vfio_put_address_space(space); +} + +static bool vfio_user_get_device(VFIOUserContainer *container, + VFIODevice *vbasedev, Error **errp) +{ + struct vfio_device_info info = { 0 }; + + vbasedev->fd = -1; + + vfio_prepare_device(vbasedev, &container->bcontainer, NULL, &info); + + return true; +} + +/* + * vfio_user_attach_device: attach a device to a new container. + */ +static bool vfio_user_attach_device(const char *name, VFIODevice *vbasedev, + AddressSpace *as, Error **errp) +{ + VFIOUserContainer *container; + + container = vfio_connect_user_container(as, errp); + if (container == NULL) { + error_prepend(errp, "failed to connect proxy"); + return false; + } + + return vfio_user_get_device(container, vbasedev, errp); +} + +static void vfio_user_detach_device(VFIODevice *vbasedev) +{ + VFIOUserContainer *container = container_of(vbasedev->bcontainer, + VFIOUserContainer, bcontainer); + + QLIST_REMOVE(vbasedev, global_next); + QLIST_REMOVE(vbasedev, container_next); + vbasedev->bcontainer = NULL; + vfio_put_base_device(vbasedev); + vfio_disconnect_user_container(container); +} + +static int vfio_user_pci_hot_reset(VFIODevice *vbasedev, bool single) +{ + /* ->needs_reset is always false for vfio-user. */ + return 0; +} + +static void vfio_iommu_user_class_init(ObjectClass *klass, void *data) +{ + VFIOIOMMUClass *vioc = VFIO_IOMMU_CLASS(klass); + + vioc->setup = vfio_user_setup; + vioc->dma_map = vfio_user_dma_map; + vioc->dma_unmap = vfio_user_dma_unmap; + vioc->attach_device = vfio_user_attach_device; + vioc->detach_device = vfio_user_detach_device; + vioc->set_dirty_page_tracking = vfio_user_set_dirty_page_tracking; + vioc->query_dirty_bitmap = vfio_user_query_dirty_bitmap; + vioc->pci_hot_reset = vfio_user_pci_hot_reset; +}; + +static const TypeInfo types[] = { + { + .name = TYPE_VFIO_IOMMU_USER, + .parent = TYPE_VFIO_IOMMU, + .instance_size = sizeof(VFIOUserContainer), + .class_init = vfio_iommu_user_class_init, + }, +}; + +DEFINE_TYPES(types) diff --git a/hw/vfio-user/container.h b/hw/vfio-user/container.h new file mode 100644 index 0000000000..24ce13bc2d --- /dev/null +++ b/hw/vfio-user/container.h @@ -0,0 +1,23 @@ +/* + * vfio-user specific definitions. + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + */ + +#ifndef HW_VFIO_USER_CONTAINER_H +#define HW_VFIO_USER_CONTAINER_H + +#include <inttypes.h> +#include <stdbool.h> + +#include "hw/vfio/vfio-common.h" + +/* MMU container sub-class for vfio-user. */ +typedef struct VFIOUserContainer { + VFIOContainerBase bcontainer; +} VFIOUserContainer; + +OBJECT_DECLARE_SIMPLE_TYPE(VFIOUserContainer, VFIO_IOMMU_USER); + +#endif /* HW_VFIO_USER_CONTAINER_H */ diff --git a/hw/vfio-user/meson.build b/hw/vfio-user/meson.build new file mode 100644 index 0000000000..f1fee70c85 --- /dev/null +++ b/hw/vfio-user/meson.build @@ -0,0 +1,9 @@ +vfio_user_ss = ss.source_set() +vfio_user_ss.add(files( + 'container.c', + 'pci.c', +)) + +if get_option('vfio_user_client').enabled() + specific_ss.add_all(vfio_user_ss) +endif diff --git a/hw/vfio-user/pci.c b/hw/vfio-user/pci.c new file mode 100644 index 0000000000..6c85c60179 --- /dev/null +++ b/hw/vfio-user/pci.c @@ -0,0 +1,154 @@ +/* + * vfio PCI device over a UNIX socket. + * + * Copyright © 2018, 2021 Oracle and/or its affiliates. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#include <linux/vfio.h> +#include <sys/ioctl.h> +#include "qemu/osdep.h" + +#include "hw/hw.h" +#include "hw/pci/msi.h" +#include "hw/pci/msix.h" +#include "hw/pci/pci_bridge.h" +#include "hw/qdev-properties.h" +#include "hw/qdev-properties-system.h" +#include "hw/vfio/pci.h" +#include "qapi/error.h" +#include "qobject/qdict.h" +#include "qemu/error-report.h" +#include "qemu/module.h" +#include "qemu/range.h" +#include "qemu/units.h" +#include "system/kvm.h" +#include "trace.h" + +#define TYPE_VFIO_USER_PCI "vfio-user-pci" +OBJECT_DECLARE_SIMPLE_TYPE(VFIOUserPCIDevice, VFIO_USER_PCI) + +struct VFIOUserPCIDevice { + VFIOPCIDevice device; + char *sock_name; +}; + +/* + * Emulated devices don't use host hot reset + */ +static void vfio_user_compute_needs_reset(VFIODevice *vbasedev) +{ + vbasedev->needs_reset = false; +} + +static VFIODeviceOps vfio_user_pci_ops = { + .vfio_compute_needs_reset = vfio_user_compute_needs_reset, + .vfio_eoi = vfio_intx_eoi, + .vfio_get_object = vfio_pci_get_object, + .vfio_save_config = vfio_pci_save_config, + .vfio_load_config = vfio_pci_load_config, +}; + +static void vfio_user_pci_realize(PCIDevice *pdev, Error **errp) +{ + ERRP_GUARD(); + VFIOUserPCIDevice *udev = VFIO_USER_PCI(pdev); + VFIOPCIDevice *vdev = VFIO_PCI_BASE(pdev); + VFIODevice *vbasedev = &vdev->vbasedev; + AddressSpace *as; + + /* + * TODO: make option parser understand SocketAddress + * and use that instead of having scalar options + * for each socket type. + */ + if (!udev->sock_name) { + error_setg(errp, "No socket specified"); + error_append_hint(errp, "Use -device vfio-user-pci,socket=<name>\n"); + return; + } + + vbasedev->name = g_strdup_printf("VFIO user <%s>", udev->sock_name); + vbasedev->ops = &vfio_user_pci_ops; + vbasedev->type = VFIO_DEVICE_TYPE_PCI; + vbasedev->dev = DEVICE(vdev); + + /* + * vfio-user devices are effectively mdevs (don't use a host iommu). + */ + vbasedev->mdev = true; + + as = pci_device_iommu_address_space(pdev); + if (!vfio_attach_device_by_iommu_type(TYPE_VFIO_IOMMU_USER, + vbasedev->name, vbasedev, + as, errp)) { + error_prepend(errp, VFIO_MSG_PREFIX, vbasedev->name); + return; + } +} + +static void vfio_user_instance_init(Object *obj) +{ + PCIDevice *pci_dev = PCI_DEVICE(obj); + VFIOPCIDevice *vdev = VFIO_PCI_BASE(obj); + VFIODevice *vbasedev = &vdev->vbasedev; + + device_add_bootindex_property(obj, &vdev->bootindex, + "bootindex", NULL, + &pci_dev->qdev); + vdev->host.domain = ~0U; + vdev->host.bus = ~0U; + vdev->host.slot = ~0U; + vdev->host.function = ~0U; + + vfio_device_init(vbasedev, VFIO_DEVICE_TYPE_PCI, &vfio_user_pci_ops, + &vfio_dev_io_ioctl, DEVICE(vdev), false); + + vdev->nv_gpudirect_clique = 0xFF; + + /* + * QEMU_PCI_CAP_EXPRESS initialization does not depend on QEMU command + * line, therefore, no need to wait to realize like other devices. + */ + pci_dev->cap_present |= QEMU_PCI_CAP_EXPRESS; +} + +static void vfio_user_instance_finalize(Object *obj) +{ + VFIOPCIDevice *vdev = VFIO_PCI_BASE(obj); + + vfio_pci_put_device(vdev); +} + +static const Property vfio_user_pci_dev_properties[] = { + DEFINE_PROP_STRING("socket", VFIOUserPCIDevice, sock_name), +}; + +static void vfio_user_pci_dev_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + PCIDeviceClass *pdc = PCI_DEVICE_CLASS(klass); + + device_class_set_props(dc, vfio_user_pci_dev_properties); + dc->desc = "VFIO over socket PCI device assignment"; + pdc->realize = vfio_user_pci_realize; +} + +static const TypeInfo vfio_user_pci_dev_info = { + .name = TYPE_VFIO_USER_PCI, + .parent = TYPE_VFIO_PCI_BASE, + .instance_size = sizeof(VFIOUserPCIDevice), + .class_init = vfio_user_pci_dev_class_init, + .instance_init = vfio_user_instance_init, + .instance_finalize = vfio_user_instance_finalize, +}; + +static void register_vfio_user_dev_type(void) +{ + type_register_static(&vfio_user_pci_dev_info); +} + + type_init(register_vfio_user_dev_type) diff --git a/hw/vfio/container.c b/hw/vfio/container.c index 36cd245c92..0e1af34ce4 100644 --- a/hw/vfio/container.c +++ b/hw/vfio/container.c @@ -884,7 +884,7 @@ static bool vfio_get_device(VFIOGroup *group, const char *name, return true; } -static void vfio_put_base_device(VFIODevice *vbasedev) +void vfio_put_base_device(VFIODevice *vbasedev) { if (vbasedev->regions != NULL) { int i; diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index a9cc9366fb..a7084a7690 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -109,7 +109,7 @@ static void vfio_intx_interrupt(void *opaque) } } -static void vfio_intx_eoi(VFIODevice *vbasedev) +void vfio_intx_eoi(VFIODevice *vbasedev) { VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev); @@ -2587,7 +2587,7 @@ static void vfio_pci_compute_needs_reset(VFIODevice *vbasedev) } } -static Object *vfio_pci_get_object(VFIODevice *vbasedev) +Object *vfio_pci_get_object(VFIODevice *vbasedev) { VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev); @@ -2643,7 +2643,7 @@ static const VMStateDescription vmstate_vfio_pci_config = { } }; -static int vfio_pci_save_config(VFIODevice *vbasedev, QEMUFile *f, Error **errp) +int vfio_pci_save_config(VFIODevice *vbasedev, QEMUFile *f, Error **errp) { VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev); @@ -2651,7 +2651,7 @@ static int vfio_pci_save_config(VFIODevice *vbasedev, QEMUFile *f, Error **errp) errp); } -static int vfio_pci_load_config(VFIODevice *vbasedev, QEMUFile *f) +int vfio_pci_load_config(VFIODevice *vbasedev, QEMUFile *f) { VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev); PCIDevice *pdev = &vdev->pdev; @@ -2847,7 +2847,7 @@ static bool vfio_populate_device(VFIOPCIDevice *vdev, Error **errp) return true; } -static void vfio_pci_put_device(VFIOPCIDevice *vdev) +void vfio_pci_put_device(VFIOPCIDevice *vdev) { vfio_detach_device(&vdev->vbasedev); @@ -3394,7 +3394,7 @@ post_reset: vfio_pci_post_reset(vdev); } -static void vfio_instance_init(Object *obj) +void vfio_instance_init(Object *obj) { PCIDevice *pci_dev = PCI_DEVICE(obj); VFIOPCIDevice *vdev = VFIO_PCI_BASE(obj); diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h index 8e79740ddb..c0f030f4db 100644 --- a/hw/vfio/pci.h +++ b/hw/vfio/pci.h @@ -213,6 +213,13 @@ uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len); void vfio_pci_write_config(PCIDevice *pdev, uint32_t addr, uint32_t val, int len); +void vfio_intx_eoi(VFIODevice *vbasedev); +Object *vfio_pci_get_object(VFIODevice *vbasedev); +int vfio_pci_save_config(VFIODevice *vbasedev, QEMUFile *f, Error **errp); +int vfio_pci_load_config(VFIODevice *vbasedev, QEMUFile *f); +void vfio_pci_put_device(VFIOPCIDevice *vdev); +void vfio_instance_init(Object *obj); + uint64_t vfio_vga_read(void *opaque, hwaddr addr, unsigned size); void vfio_vga_write(void *opaque, hwaddr addr, uint64_t data, unsigned size); diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h index 3512556590..ee8e7f7c0d 100644 --- a/include/hw/vfio/vfio-common.h +++ b/include/hw/vfio/vfio-common.h @@ -80,6 +80,7 @@ typedef struct VFIOMigration { struct VFIOGroup; +/* MMU container sub-class for legacy vfio implementation. */ typedef struct VFIOContainer { VFIOContainerBase bcontainer; int fd; /* /dev/vfio/vfio, empowered by the attached groups */ @@ -106,6 +107,7 @@ typedef struct VFIOIOASHwpt { QLIST_ENTRY(VFIOIOASHwpt) next; } VFIOIOASHwpt; +/* MMU container sub-class for vfio iommufd implementation. */ typedef struct VFIOIOMMUFDContainer { VFIOContainerBase bcontainer; IOMMUFDBackend *be; @@ -285,6 +287,7 @@ bool vfio_attach_device_by_iommu_type(const char *iommu_type, char *name, Error **errp); void vfio_detach_device(VFIODevice *vbasedev); VFIODevice *vfio_get_vfio_device(Object *obj); +void vfio_put_base_device(VFIODevice *vbasedev); int vfio_kvm_device_add_fd(int fd, Error **errp); int vfio_kvm_device_del_fd(int fd, Error **errp); diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h index 24e48e3a07..1ce93c5b9b 100644 --- a/include/hw/vfio/vfio-container-base.h +++ b/include/hw/vfio/vfio-container-base.h @@ -100,6 +100,7 @@ vfio_container_get_page_size_mask(const VFIOContainerBase *bcontainer) #define TYPE_VFIO_IOMMU_LEGACY TYPE_VFIO_IOMMU "-legacy" #define TYPE_VFIO_IOMMU_SPAPR TYPE_VFIO_IOMMU "-spapr" #define TYPE_VFIO_IOMMU_IOMMUFD TYPE_VFIO_IOMMU "-iommufd" +#define TYPE_VFIO_IOMMU_USER TYPE_VFIO_IOMMU "-user" OBJECT_DECLARE_TYPE(VFIOContainerBase, VFIOIOMMUClass, VFIO_IOMMU) diff --git a/meson_options.txt b/meson_options.txt index 5eeaf3eee5..ba9bc07fcf 100644 --- a/meson_options.txt +++ b/meson_options.txt @@ -109,6 +109,8 @@ option('multiprocess', type: 'feature', value: 'auto', description: 'Out of process device emulation support') option('relocatable', type : 'boolean', value : true, description: 'toggle relocatable install') +option('vfio_user_client', type: 'feature', value: 'disabled', + description: 'vfio-user client support') option('vfio_user_server', type: 'feature', value: 'disabled', description: 'vfio-user server support') option('dbus_display', type: 'feature', value: 'auto', diff --git a/scripts/meson-buildoptions.sh b/scripts/meson-buildoptions.sh index a8066aab03..6ee381df8c 100644 --- a/scripts/meson-buildoptions.sh +++ b/scripts/meson-buildoptions.sh @@ -201,6 +201,8 @@ meson_options_help() { printf "%s\n" ' vdi vdi image format support' printf "%s\n" ' vduse-blk-export' printf "%s\n" ' VDUSE block export support' + printf "%s\n" ' vfio-user-client' + printf "%s\n" ' vfio-user client support' printf "%s\n" ' vfio-user-server' printf "%s\n" ' vfio-user server support' printf "%s\n" ' vhdx vhdx image format support' @@ -529,6 +531,8 @@ _meson_option_parse() { --disable-vdi) printf "%s" -Dvdi=disabled ;; --enable-vduse-blk-export) printf "%s" -Dvduse_blk_export=enabled ;; --disable-vduse-blk-export) printf "%s" -Dvduse_blk_export=disabled ;; + --enable-vfio-user-client) printf "%s" -Dvfio_user_client=enabled ;; + --disable-vfio-user-client) printf "%s" -Dvfio_user_client=disabled ;; --enable-vfio-user-server) printf "%s" -Dvfio_user_server=enabled ;; --disable-vfio-user-server) printf "%s" -Dvfio_user_server=disabled ;; --enable-vhdx) printf "%s" -Dvhdx=enabled ;; -- 2.34.1