Hi Zhenzhong, On 9/19/23 19:23, Cédric Le Goater wrote: > On 8/30/23 12:37, Zhenzhong Duan wrote: >> From: Yi Liu <yi.l....@intel.com> >> >> Abstract the VFIOContainer to be a base object. It is supposed to be >> embedded by legacy VFIO container and later on, into the new iommufd >> based container. >> >> The base container implements generic code such as code related to >> memory_listener and address space management. The VFIOContainerOps >> implements callbacks that depend on the kernel user space being used. >> >> 'common.c' and vfio device code only manipulates the base container with >> wrapper functions that calls the functions defined in >> VFIOContainerOpsClass. >> Existing 'container.c' code is converted to implement the legacy >> container >> ops functions. >> >> Below is the base container. It's named as VFIOContainer, old >> VFIOContainer >> is replaced with VFIOLegacyContainer. > > Usualy, we introduce the new interface solely, port the current models > on top of the new interface, wire the new models in the current > implementation and remove the old implementation. Then, we can start > adding extensions to support other implementations. > > spapr should be taken care of separatly following the principle above. > With my PPC hat, I would not even read such a massive change, too risky > for the subsystem. This path will need (much) further splitting to be > understandable and acceptable. We might split this patch by 1) introducing VFIOLegacyContainer encapsulating the base VFIOContainer, without using the ops in a first place: common.c would call vfio_container_* with harcoded legacy implementation, ie. retrieving the legacy container with container_of. 2) we would introduce the BE interface without using it. 3) we would use the new BE interface
Obviously this needs to be further tried out. If you wish I can try to split it that way ... Please let me know Eric > > Also, please include the .h file first, it helps in reading. Have you > considered using an InterfaceClass ? > > Thanks, > > C. > >> >> struct VFIOContainer { >> VFIOIOMMUBackendOpsClass *ops; >> VFIOAddressSpace *space; >> MemoryListener listener; >> Error *error; >> bool initialized; >> bool dirty_pages_supported; >> uint64_t dirty_pgsizes; >> uint64_t max_dirty_bitmap_size; >> unsigned long pgsizes; >> unsigned int dma_max_mappings; >> QLIST_HEAD(, VFIOGuestIOMMU) giommu_list; >> QLIST_HEAD(, VFIOHostDMAWindow) hostwin_list; >> QLIST_HEAD(, VFIORamDiscardListener) vrdl_list; >> QLIST_ENTRY(VFIOContainer) next; >> }; >> >> struct VFIOLegacyContainer { >> VFIOContainer bcontainer; >> int fd; /* /dev/vfio/vfio, empowered by the attached groups */ >> MemoryListener prereg_listener; >> unsigned iommu_type; >> QLIST_HEAD(, VFIOGroup) group_list; >> }; >> >> Co-authored-by: Eric Auger <eric.au...@redhat.com> >> Signed-off-by: Eric Auger <eric.au...@redhat.com> >> Signed-off-by: Yi Liu <yi.l....@intel.com> >> Signed-off-by: Yi Sun <yi.y....@linux.intel.com> >> Signed-off-by: Zhenzhong Duan <zhenzhong.d...@intel.com> >> --- >> hw/vfio/common.c | 72 +++++--- >> hw/vfio/container-base.c | 160 +++++++++++++++++ >> hw/vfio/container.c | 247 ++++++++++++++++---------- >> hw/vfio/meson.build | 1 + >> hw/vfio/spapr.c | 22 +-- >> hw/vfio/trace-events | 4 +- >> include/hw/vfio/vfio-common.h | 85 ++------- >> include/hw/vfio/vfio-container-base.h | 155 ++++++++++++++++ >> 8 files changed, 540 insertions(+), 206 deletions(-) >> create mode 100644 hw/vfio/container-base.c >> create mode 100644 include/hw/vfio/vfio-container-base.h >> >> diff --git a/hw/vfio/common.c b/hw/vfio/common.c >> index 044710fc1f..86b6af5740 100644 >> --- a/hw/vfio/common.c >> +++ b/hw/vfio/common.c >> @@ -379,19 +379,20 @@ static void vfio_iommu_map_notify(IOMMUNotifier >> *n, IOMMUTLBEntry *iotlb) >> * of vaddr will always be there, even if the memory object is >> * destroyed and its backing memory munmap-ed. >> */ >> - ret = vfio_dma_map(container, iova, >> - iotlb->addr_mask + 1, vaddr, >> - read_only); >> + ret = vfio_container_dma_map(container, iova, >> + iotlb->addr_mask + 1, vaddr, >> + read_only); >> if (ret) { >> - error_report("vfio_dma_map(%p, 0x%"HWADDR_PRIx", " >> + error_report("vfio_container_dma_map(%p, >> 0x%"HWADDR_PRIx", " >> "0x%"HWADDR_PRIx", %p) = %d (%s)", >> container, iova, >> iotlb->addr_mask + 1, vaddr, ret, >> strerror(-ret)); >> } >> } else { >> - ret = vfio_dma_unmap(container, iova, iotlb->addr_mask + 1, >> iotlb); >> + ret = vfio_container_dma_unmap(container, iova, >> + iotlb->addr_mask + 1, iotlb); >> if (ret) { >> - error_report("vfio_dma_unmap(%p, 0x%"HWADDR_PRIx", " >> + error_report("vfio_container_dma_unmap(%p, >> 0x%"HWADDR_PRIx", " >> "0x%"HWADDR_PRIx") = %d (%s)", >> container, iova, >> iotlb->addr_mask + 1, ret, strerror(-ret)); >> @@ -407,14 +408,15 @@ static void >> vfio_ram_discard_notify_discard(RamDiscardListener *rdl, >> { >> VFIORamDiscardListener *vrdl = container_of(rdl, >> VFIORamDiscardListener, >> listener); >> + VFIOContainer *container = vrdl->container; >> const hwaddr size = int128_get64(section->size); >> const hwaddr iova = section->offset_within_address_space; >> int ret; >> /* Unmap with a single call. */ >> - ret = vfio_dma_unmap(vrdl->container, iova, size , NULL); >> + ret = vfio_container_dma_unmap(container, iova, size , NULL); >> if (ret) { >> - error_report("%s: vfio_dma_unmap() failed: %s", __func__, >> + error_report("%s: vfio_container_dma_unmap() failed: %s", >> __func__, >> strerror(-ret)); >> } >> } >> @@ -424,6 +426,7 @@ static int >> vfio_ram_discard_notify_populate(RamDiscardListener *rdl, >> { >> VFIORamDiscardListener *vrdl = container_of(rdl, >> VFIORamDiscardListener, >> listener); >> + VFIOContainer *container = vrdl->container; >> const hwaddr end = section->offset_within_region + >> int128_get64(section->size); >> hwaddr start, next, iova; >> @@ -442,8 +445,8 @@ static int >> vfio_ram_discard_notify_populate(RamDiscardListener *rdl, >> section->offset_within_address_space; >> vaddr = memory_region_get_ram_ptr(section->mr) + start; >> - ret = vfio_dma_map(vrdl->container, iova, next - start, >> - vaddr, section->readonly); >> + ret = vfio_container_dma_map(container, iova, next - start, >> + vaddr, section->readonly); >> if (ret) { >> /* Rollback */ >> vfio_ram_discard_notify_discard(rdl, section); >> @@ -756,10 +759,10 @@ static void >> vfio_listener_region_add(MemoryListener *listener, >> } >> } >> - ret = vfio_dma_map(container, iova, int128_get64(llsize), >> - vaddr, section->readonly); >> + ret = vfio_container_dma_map(container, iova, int128_get64(llsize), >> + vaddr, section->readonly); >> if (ret) { >> - error_setg(&err, "vfio_dma_map(%p, 0x%"HWADDR_PRIx", " >> + error_setg(&err, "vfio_container_dma_map(%p, >> 0x%"HWADDR_PRIx", " >> "0x%"HWADDR_PRIx", %p) = %d (%s)", >> container, iova, int128_get64(llsize), vaddr, ret, >> strerror(-ret)); >> @@ -775,7 +778,7 @@ static void >> vfio_listener_region_add(MemoryListener *listener, >> fail: >> if (memory_region_is_ram_device(section->mr)) { >> - error_report("failed to vfio_dma_map. pci p2p may not work"); >> + error_report("failed to vfio_container_dma_map. pci p2p may >> not work"); >> return; >> } >> /* >> @@ -860,18 +863,20 @@ static void >> vfio_listener_region_del(MemoryListener *listener, >> if (int128_eq(llsize, int128_2_64())) { >> /* The unmap ioctl doesn't accept a full 64-bit span. */ >> llsize = int128_rshift(llsize, 1); >> - ret = vfio_dma_unmap(container, iova, >> int128_get64(llsize), NULL); >> + ret = vfio_container_dma_unmap(container, iova, >> + int128_get64(llsize), NULL); >> if (ret) { >> - error_report("vfio_dma_unmap(%p, 0x%"HWADDR_PRIx", " >> + error_report("vfio_container_dma_unmap(%p, >> 0x%"HWADDR_PRIx", " >> "0x%"HWADDR_PRIx") = %d (%s)", >> container, iova, int128_get64(llsize), >> ret, >> strerror(-ret)); >> } >> iova += int128_get64(llsize); >> } >> - ret = vfio_dma_unmap(container, iova, int128_get64(llsize), >> NULL); >> + ret = vfio_container_dma_unmap(container, iova, >> + int128_get64(llsize), NULL); >> if (ret) { >> - error_report("vfio_dma_unmap(%p, 0x%"HWADDR_PRIx", " >> + error_report("vfio_container_dma_unmap(%p, >> 0x%"HWADDR_PRIx", " >> "0x%"HWADDR_PRIx") = %d (%s)", >> container, iova, int128_get64(llsize), ret, >> strerror(-ret)); >> @@ -1103,7 +1108,7 @@ static void >> vfio_listener_log_global_start(MemoryListener *listener) >> if (vfio_devices_all_device_dirty_tracking(container)) { >> ret = vfio_devices_dma_logging_start(container); >> } else { >> - ret = vfio_set_dirty_page_tracking(container, true); >> + ret = vfio_container_set_dirty_page_tracking(container, true); >> } >> if (ret) { >> @@ -1121,7 +1126,7 @@ static void >> vfio_listener_log_global_stop(MemoryListener *listener) >> if (vfio_devices_all_device_dirty_tracking(container)) { >> vfio_devices_dma_logging_stop(container); >> } else { >> - ret = vfio_set_dirty_page_tracking(container, false); >> + ret = vfio_container_set_dirty_page_tracking(container, false); >> } >> if (ret) { >> @@ -1204,7 +1209,7 @@ int vfio_get_dirty_bitmap(VFIOContainer >> *container, uint64_t iova, >> if (all_device_dirty_tracking) { >> ret = vfio_devices_query_dirty_bitmap(container, &vbmap, >> iova, size); >> } else { >> - ret = vfio_query_dirty_bitmap(container, &vbmap, iova, size); >> + ret = vfio_container_query_dirty_bitmap(container, &vbmap, >> iova, size); >> } >> if (ret) { >> @@ -1214,8 +1219,7 @@ int vfio_get_dirty_bitmap(VFIOContainer >> *container, uint64_t iova, >> dirty_pages = >> cpu_physical_memory_set_dirty_lebitmap(vbmap.bitmap, ram_addr, >> vbmap.pages); >> - trace_vfio_get_dirty_bitmap(container->fd, iova, size, >> vbmap.size, >> - ram_addr, dirty_pages); >> + trace_vfio_get_dirty_bitmap(iova, size, vbmap.size, ram_addr, >> dirty_pages); >> out: >> g_free(vbmap.bitmap); >> @@ -1525,3 +1529,25 @@ retry: >> return info; >> } >> + >> +int vfio_attach_device(char *name, VFIODevice *vbasedev, >> + AddressSpace *as, Error **errp) >> +{ >> + const VFIOIOMMUBackendOpsClass *ops; >> + >> + ops = VFIO_IOMMU_BACKEND_OPS_CLASS( >> + >> object_class_by_name(TYPE_VFIO_IOMMU_BACKEND_LEGACY_OPS)); >> + if (!ops) { >> + error_setg(errp, "VFIO IOMMU Backend not found!"); >> + return -ENODEV; >> + } >> + return ops->attach_device(name, vbasedev, as, errp); >> +} >> + >> +void vfio_detach_device(VFIODevice *vbasedev) >> +{ >> + if (!vbasedev->container) { >> + return; >> + } >> + vbasedev->container->ops->detach_device(vbasedev); >> +} >> diff --git a/hw/vfio/container-base.c b/hw/vfio/container-base.c >> new file mode 100644 >> index 0000000000..876e95c6dd >> --- /dev/null >> +++ b/hw/vfio/container-base.c >> @@ -0,0 +1,160 @@ >> +/* >> + * VFIO BASE CONTAINER >> + * >> + * Copyright (C) 2023 Intel Corporation. >> + * Copyright Red Hat, Inc. 2023 >> + * >> + * Authors: Yi Liu <yi.l....@intel.com> >> + * Eric Auger <eric.au...@redhat.com> >> + * >> + * This program is free software; you can redistribute it and/or modify >> + * it under the terms of the GNU General Public License as published by >> + * the Free Software Foundation; either version 2 of the License, or >> + * (at your option) any later version. >> + >> + * This program is distributed in the hope that it will be useful, >> + * but WITHOUT ANY WARRANTY; without even the implied warranty of >> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the >> + * GNU General Public License for more details. >> + >> + * You should have received a copy of the GNU General Public License >> along >> + * with this program; if not, see <http://www.gnu.org/licenses/>. >> + */ >> + >> +#include "qemu/osdep.h" >> +#include "qapi/error.h" >> +#include "qemu/error-report.h" >> +#include "hw/vfio/vfio-container-base.h" >> + >> +VFIODevice *vfio_container_dev_iter_next(VFIOContainer *container, >> + VFIODevice *curr) >> +{ >> + if (!container->ops->dev_iter_next) { >> + return NULL; >> + } >> + >> + return container->ops->dev_iter_next(container, curr); >> +} >> + >> +int vfio_container_dma_map(VFIOContainer *container, >> + hwaddr iova, ram_addr_t size, >> + void *vaddr, bool readonly) >> +{ >> + if (!container->ops->dma_map) { >> + return -EINVAL; >> + } >> + >> + return container->ops->dma_map(container, iova, size, vaddr, >> readonly); >> +} >> + >> +int vfio_container_dma_unmap(VFIOContainer *container, >> + hwaddr iova, ram_addr_t size, >> + IOMMUTLBEntry *iotlb) >> +{ >> + if (!container->ops->dma_unmap) { >> + return -EINVAL; >> + } >> + >> + return container->ops->dma_unmap(container, iova, size, iotlb); >> +} >> + >> +int vfio_container_set_dirty_page_tracking(VFIOContainer *container, >> + bool start) >> +{ >> + /* Fallback to all pages dirty if dirty page sync isn't >> supported */ >> + if (!container->ops->set_dirty_page_tracking) { >> + return 0; >> + } >> + >> + return container->ops->set_dirty_page_tracking(container, start); >> +} >> + >> +int vfio_container_query_dirty_bitmap(VFIOContainer *container, >> + VFIOBitmap *vbmap, >> + hwaddr iova, hwaddr size) >> +{ >> + if (!container->ops->query_dirty_bitmap) { >> + return -EINVAL; >> + } >> + >> + return container->ops->query_dirty_bitmap(container, vbmap, >> iova, size); >> +} >> + >> +int vfio_container_add_section_window(VFIOContainer *container, >> + MemoryRegionSection *section, >> + Error **errp) >> +{ >> + if (!container->ops->add_window) { >> + return 0; >> + } >> + >> + return container->ops->add_window(container, section, errp); >> +} >> + >> +void vfio_container_del_section_window(VFIOContainer *container, >> + MemoryRegionSection *section) >> +{ >> + if (!container->ops->del_window) { >> + return; >> + } >> + >> + return container->ops->del_window(container, section); >> +} >> + >> +void vfio_container_init(VFIOContainer *container, >> + VFIOAddressSpace *space, >> + struct VFIOIOMMUBackendOpsClass *ops) >> +{ >> + container->ops = ops; >> + container->space = space; >> + container->error = NULL; >> + container->dirty_pages_supported = false; >> + container->dma_max_mappings = 0; >> + QLIST_INIT(&container->giommu_list); >> + QLIST_INIT(&container->hostwin_list); >> + QLIST_INIT(&container->vrdl_list); >> +} >> + >> +void vfio_container_destroy(VFIOContainer *container) >> +{ >> + VFIORamDiscardListener *vrdl, *vrdl_tmp; >> + VFIOGuestIOMMU *giommu, *tmp; >> + VFIOHostDMAWindow *hostwin, *next; >> + >> + QLIST_SAFE_REMOVE(container, next); >> + >> + QLIST_FOREACH_SAFE(vrdl, &container->vrdl_list, next, vrdl_tmp) { >> + RamDiscardManager *rdm; >> + >> + rdm = memory_region_get_ram_discard_manager(vrdl->mr); >> + ram_discard_manager_unregister_listener(rdm, &vrdl->listener); >> + QLIST_REMOVE(vrdl, next); >> + g_free(vrdl); >> + } >> + >> + QLIST_FOREACH_SAFE(giommu, &container->giommu_list, giommu_next, >> tmp) { >> + memory_region_unregister_iommu_notifier( >> + MEMORY_REGION(giommu->iommu_mr), &giommu->n); >> + QLIST_REMOVE(giommu, giommu_next); >> + g_free(giommu); >> + } >> + >> + QLIST_FOREACH_SAFE(hostwin, &container->hostwin_list, hostwin_next, >> + next) { >> + QLIST_REMOVE(hostwin, hostwin_next); >> + g_free(hostwin); >> + } >> +} >> + >> +static const TypeInfo vfio_iommu_backend_ops_type_info = { >> + .name = TYPE_VFIO_IOMMU_BACKEND_OPS, >> + .parent = TYPE_OBJECT, >> + .abstract = true, >> + .class_size = sizeof(VFIOIOMMUBackendOpsClass), >> +}; >> + >> +static void vfio_iommu_backend_ops_register_types(void) >> +{ >> + type_register_static(&vfio_iommu_backend_ops_type_info); >> +} >> +type_init(vfio_iommu_backend_ops_register_types); >> diff --git a/hw/vfio/container.c b/hw/vfio/container.c >> index c71fddc09a..bb29b3612d 100644 >> --- a/hw/vfio/container.c >> +++ b/hw/vfio/container.c >> @@ -42,7 +42,8 @@ >> VFIOGroupList vfio_group_list = >> QLIST_HEAD_INITIALIZER(vfio_group_list); >> -static int vfio_ram_block_discard_disable(VFIOContainer >> *container, bool state) >> +static int vfio_ram_block_discard_disable(VFIOLegacyContainer >> *container, >> + bool state) >> { >> switch (container->iommu_type) { >> case VFIO_TYPE1v2_IOMMU: >> @@ -65,11 +66,18 @@ static int >> vfio_ram_block_discard_disable(VFIOContainer *container, bool state) >> } >> } >> -VFIODevice *vfio_container_dev_iter_next(VFIOContainer *container, >> - VFIODevice *curr) >> +static VFIODevice *vfio_legacy_dev_iter_next(VFIOContainer *bcontainer, >> + VFIODevice *curr) >> { >> VFIOGroup *group; >> + assert(object_class_dynamic_cast(OBJECT_CLASS(bcontainer->ops), >> + >> TYPE_VFIO_IOMMU_BACKEND_LEGACY_OPS)); >> + >> + VFIOLegacyContainer *container = container_of(bcontainer, >> + VFIOLegacyContainer, >> + bcontainer); >> + >> if (!curr) { >> group = QLIST_FIRST(&container->group_list); >> } else { >> @@ -85,10 +93,11 @@ VFIODevice >> *vfio_container_dev_iter_next(VFIOContainer *container, >> return QLIST_FIRST(&group->device_list); >> } >> -static int vfio_dma_unmap_bitmap(VFIOContainer *container, >> +static int vfio_dma_unmap_bitmap(VFIOLegacyContainer *container, >> hwaddr iova, ram_addr_t size, >> IOMMUTLBEntry *iotlb) >> { >> + VFIOContainer *bcontainer = &container->bcontainer; >> struct vfio_iommu_type1_dma_unmap *unmap; >> struct vfio_bitmap *bitmap; >> VFIOBitmap vbmap; >> @@ -116,7 +125,7 @@ static int vfio_dma_unmap_bitmap(VFIOContainer >> *container, >> bitmap->size = vbmap.size; >> bitmap->data = (__u64 *)vbmap.bitmap; >> - if (vbmap.size > container->max_dirty_bitmap_size) { >> + if (vbmap.size > bcontainer->max_dirty_bitmap_size) { >> error_report("UNMAP: Size of bitmap too big 0x%"PRIx64, >> vbmap.size); >> ret = -E2BIG; >> goto unmap_exit; >> @@ -140,9 +149,13 @@ unmap_exit: >> /* >> * DMA - Mapping and unmapping for the "type1" IOMMU interface used >> on x86 >> */ >> -int vfio_dma_unmap(VFIOContainer *container, hwaddr iova, >> - ram_addr_t size, IOMMUTLBEntry *iotlb) >> +static int vfio_legacy_dma_unmap(VFIOContainer *bcontainer, hwaddr >> iova, >> + ram_addr_t size, IOMMUTLBEntry *iotlb) >> { >> + VFIOLegacyContainer *container = container_of(bcontainer, >> + VFIOLegacyContainer, >> + bcontainer); >> + >> struct vfio_iommu_type1_dma_unmap unmap = { >> .argsz = sizeof(unmap), >> .flags = 0, >> @@ -152,9 +165,9 @@ int vfio_dma_unmap(VFIOContainer *container, >> hwaddr iova, >> bool need_dirty_sync = false; >> int ret; >> - if (iotlb && >> vfio_devices_all_running_and_mig_active(container)) { >> - if (!vfio_devices_all_device_dirty_tracking(container) && >> - container->dirty_pages_supported) { >> + if (iotlb && vfio_devices_all_running_and_mig_active(bcontainer)) { >> + if (!vfio_devices_all_device_dirty_tracking(bcontainer) && >> + bcontainer->dirty_pages_supported) { >> return vfio_dma_unmap_bitmap(container, iova, size, >> iotlb); >> } >> @@ -176,8 +189,8 @@ int vfio_dma_unmap(VFIOContainer *container, >> hwaddr iova, >> */ >> if (errno == EINVAL && unmap.size && !(unmap.iova + >> unmap.size) && >> container->iommu_type == VFIO_TYPE1v2_IOMMU) { >> - trace_vfio_dma_unmap_overflow_workaround(); >> - unmap.size -= 1ULL << ctz64(container->pgsizes); >> + trace_vfio_legacy_dma_unmap_overflow_workaround(); >> + unmap.size -= 1ULL << ctz64(bcontainer->pgsizes); >> continue; >> } >> error_report("VFIO_UNMAP_DMA failed: %s", strerror(errno)); >> @@ -185,7 +198,7 @@ int vfio_dma_unmap(VFIOContainer *container, >> hwaddr iova, >> } >> if (need_dirty_sync) { >> - ret = vfio_get_dirty_bitmap(container, iova, size, >> + ret = vfio_get_dirty_bitmap(bcontainer, iova, size, >> iotlb->translated_addr); >> if (ret) { >> return ret; >> @@ -195,9 +208,13 @@ int vfio_dma_unmap(VFIOContainer *container, >> hwaddr iova, >> return 0; >> } >> -int vfio_dma_map(VFIOContainer *container, hwaddr iova, >> - ram_addr_t size, void *vaddr, bool readonly) >> +static int vfio_legacy_dma_map(VFIOContainer *bcontainer, hwaddr iova, >> + ram_addr_t size, void *vaddr, bool >> readonly) >> { >> + VFIOLegacyContainer *container = container_of(bcontainer, >> + VFIOLegacyContainer, >> + bcontainer); >> + >> struct vfio_iommu_type1_dma_map map = { >> .argsz = sizeof(map), >> .flags = VFIO_DMA_MAP_FLAG_READ, >> @@ -216,7 +233,8 @@ int vfio_dma_map(VFIOContainer *container, hwaddr >> iova, >> * the VGA ROM space. >> */ >> if (ioctl(container->fd, VFIO_IOMMU_MAP_DMA, &map) == 0 || >> - (errno == EBUSY && vfio_dma_unmap(container, iova, size, >> NULL) == 0 && >> + (errno == EBUSY && >> + vfio_legacy_dma_unmap(bcontainer, iova, size, NULL) == 0 && >> ioctl(container->fd, VFIO_IOMMU_MAP_DMA, &map) == 0)) { >> return 0; >> } >> @@ -225,14 +243,18 @@ int vfio_dma_map(VFIOContainer *container, >> hwaddr iova, >> return -errno; >> } >> -int vfio_set_dirty_page_tracking(VFIOContainer *container, bool >> start) >> +static int vfio_legacy_set_dirty_page_tracking(VFIOContainer >> *bcontainer, >> + bool start) >> { >> + VFIOLegacyContainer *container = container_of(bcontainer, >> + VFIOLegacyContainer, >> + bcontainer); >> int ret; >> struct vfio_iommu_type1_dirty_bitmap dirty = { >> .argsz = sizeof(dirty), >> }; >> - if (!container->dirty_pages_supported) { >> + if (!bcontainer->dirty_pages_supported) { >> return 0; >> } >> @@ -252,9 +274,13 @@ int vfio_set_dirty_page_tracking(VFIOContainer >> *container, bool start) >> return ret; >> } >> -int vfio_query_dirty_bitmap(VFIOContainer *container, VFIOBitmap >> *vbmap, >> - hwaddr iova, hwaddr size) >> +static int vfio_legacy_query_dirty_bitmap(VFIOContainer *bcontainer, >> + VFIOBitmap *vbmap, >> + hwaddr iova, hwaddr size) >> { >> + VFIOLegacyContainer *container = container_of(bcontainer, >> + VFIOLegacyContainer, >> + bcontainer); >> struct vfio_iommu_type1_dirty_bitmap *dbitmap; >> struct vfio_iommu_type1_dirty_bitmap_get *range; >> int ret; >> @@ -289,18 +315,24 @@ int vfio_query_dirty_bitmap(VFIOContainer >> *container, VFIOBitmap *vbmap, >> return ret; >> } >> -static void vfio_listener_release(VFIOContainer *container) >> +static void vfio_listener_release(VFIOLegacyContainer *container) >> { >> - memory_listener_unregister(&container->listener); >> + VFIOContainer *bcontainer = &container->bcontainer; >> + >> + memory_listener_unregister(&bcontainer->listener); >> if (container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU) { >> memory_listener_unregister(&container->prereg_listener); >> } >> } >> -int vfio_container_add_section_window(VFIOContainer *container, >> - MemoryRegionSection *section, >> - Error **errp) >> +static int >> +vfio_legacy_add_section_window(VFIOContainer *bcontainer, >> + MemoryRegionSection *section, >> + Error **errp) >> { >> + VFIOLegacyContainer *container = container_of(bcontainer, >> + VFIOLegacyContainer, >> + bcontainer); >> VFIOHostDMAWindow *hostwin; >> hwaddr pgsize = 0; >> int ret; >> @@ -310,7 +342,7 @@ int >> vfio_container_add_section_window(VFIOContainer *container, >> } >> /* For now intersections are not allowed, we may relax this >> later */ >> - QLIST_FOREACH(hostwin, &container->hostwin_list, hostwin_next) { >> + QLIST_FOREACH(hostwin, &bcontainer->hostwin_list, hostwin_next) { >> if (ranges_overlap(hostwin->min_iova, >> hostwin->max_iova - hostwin->min_iova + 1, >> section->offset_within_address_space, >> @@ -332,7 +364,7 @@ int >> vfio_container_add_section_window(VFIOContainer *container, >> return ret; >> } >> - vfio_host_win_add(container, >> section->offset_within_address_space, >> + vfio_host_win_add(bcontainer, section->offset_within_address_space, >> section->offset_within_address_space + >> int128_get64(section->size) - 1, pgsize); >> #ifdef CONFIG_KVM >> @@ -365,16 +397,21 @@ int >> vfio_container_add_section_window(VFIOContainer *container, >> return 0; >> } >> -void vfio_container_del_section_window(VFIOContainer *container, >> - MemoryRegionSection *section) >> +static void >> +vfio_legacy_del_section_window(VFIOContainer *bcontainer, >> + MemoryRegionSection *section) >> { >> + VFIOLegacyContainer *container = container_of(bcontainer, >> + VFIOLegacyContainer, >> + bcontainer); >> + >> if (container->iommu_type != VFIO_SPAPR_TCE_v2_IOMMU) { >> return; >> } >> vfio_spapr_remove_window(container, >> section->offset_within_address_space); >> - if (vfio_host_win_del(container, >> + if (vfio_host_win_del(bcontainer, >> section->offset_within_address_space, >> section->offset_within_address_space + >> int128_get64(section->size) - 1) < 0) { >> @@ -427,7 +464,7 @@ static void vfio_kvm_device_del_group(VFIOGroup >> *group) >> /* >> * vfio_get_iommu_type - selects the richest iommu_type (v2 first) >> */ >> -static int vfio_get_iommu_type(VFIOContainer *container, >> +static int vfio_get_iommu_type(VFIOLegacyContainer *container, >> Error **errp) >> { >> int iommu_types[] = { VFIO_TYPE1v2_IOMMU, VFIO_TYPE1_IOMMU, >> @@ -443,7 +480,7 @@ static int vfio_get_iommu_type(VFIOContainer >> *container, >> return -EINVAL; >> } >> -static int vfio_init_container(VFIOContainer *container, int >> group_fd, >> +static int vfio_init_container(VFIOLegacyContainer *container, int >> group_fd, >> Error **errp) >> { >> int iommu_type, ret; >> @@ -478,7 +515,7 @@ static int vfio_init_container(VFIOContainer >> *container, int group_fd, >> return 0; >> } >> -static int vfio_get_iommu_info(VFIOContainer *container, >> +static int vfio_get_iommu_info(VFIOLegacyContainer *container, >> struct vfio_iommu_type1_info **info) >> { >> @@ -522,11 +559,12 @@ vfio_get_iommu_info_cap(struct >> vfio_iommu_type1_info *info, uint16_t id) >> return NULL; >> } >> -static void vfio_get_iommu_info_migration(VFIOContainer *container, >> - struct >> vfio_iommu_type1_info *info) >> +static void vfio_get_iommu_info_migration(VFIOLegacyContainer >> *container, >> + struct >> vfio_iommu_type1_info *info) >> { >> struct vfio_info_cap_header *hdr; >> struct vfio_iommu_type1_info_cap_migration *cap_mig; >> + VFIOContainer *bcontainer = &container->bcontainer; >> hdr = vfio_get_iommu_info_cap(info, >> VFIO_IOMMU_TYPE1_INFO_CAP_MIGRATION); >> if (!hdr) { >> @@ -541,16 +579,19 @@ static void >> vfio_get_iommu_info_migration(VFIOContainer *container, >> * qemu_real_host_page_size to mark those dirty. >> */ >> if (cap_mig->pgsize_bitmap & qemu_real_host_page_size()) { >> - container->dirty_pages_supported = true; >> - container->max_dirty_bitmap_size = >> cap_mig->max_dirty_bitmap_size; >> - container->dirty_pgsizes = cap_mig->pgsize_bitmap; >> + bcontainer->dirty_pages_supported = true; >> + bcontainer->max_dirty_bitmap_size = >> cap_mig->max_dirty_bitmap_size; >> + bcontainer->dirty_pgsizes = cap_mig->pgsize_bitmap; >> } >> } >> static int vfio_connect_container(VFIOGroup *group, AddressSpace >> *as, >> Error **errp) >> { >> - VFIOContainer *container; >> + VFIOIOMMUBackendOpsClass *ops = VFIO_IOMMU_BACKEND_OPS_CLASS( >> + object_class_by_name(TYPE_VFIO_IOMMU_BACKEND_LEGACY_OPS)); >> + VFIOContainer *bcontainer; >> + VFIOLegacyContainer *container; >> int ret, fd; >> VFIOAddressSpace *space; >> @@ -587,7 +628,8 @@ static int vfio_connect_container(VFIOGroup >> *group, AddressSpace *as, >> * details once we know which type of IOMMU we are using. >> */ >> - QLIST_FOREACH(container, &space->containers, next) { >> + QLIST_FOREACH(bcontainer, &space->containers, next) { >> + container = container_of(bcontainer, VFIOLegacyContainer, >> bcontainer); >> if (!ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, >> &container->fd)) { >> ret = vfio_ram_block_discard_disable(container, true); >> if (ret) { >> @@ -623,14 +665,9 @@ static int vfio_connect_container(VFIOGroup >> *group, AddressSpace *as, >> } >> container = g_malloc0(sizeof(*container)); >> - container->space = space; >> container->fd = fd; >> - container->error = NULL; >> - container->dirty_pages_supported = false; >> - container->dma_max_mappings = 0; >> - QLIST_INIT(&container->giommu_list); >> - QLIST_INIT(&container->hostwin_list); >> - QLIST_INIT(&container->vrdl_list); >> + bcontainer = &container->bcontainer; >> + vfio_container_init(bcontainer, space, ops); >> ret = vfio_init_container(container, group->fd, errp); >> if (ret) { >> @@ -656,13 +693,13 @@ static int vfio_connect_container(VFIOGroup >> *group, AddressSpace *as, >> } >> if (info->flags & VFIO_IOMMU_INFO_PGSIZES) { >> - container->pgsizes = info->iova_pgsizes; >> + bcontainer->pgsizes = info->iova_pgsizes; >> } else { >> - container->pgsizes = qemu_real_host_page_size(); >> + bcontainer->pgsizes = qemu_real_host_page_size(); >> } >> - if (!vfio_get_info_dma_avail(info, >> &container->dma_max_mappings)) { >> - container->dma_max_mappings = 65535; >> + if (!vfio_get_info_dma_avail(info, >> &bcontainer->dma_max_mappings)) { >> + bcontainer->dma_max_mappings = 65535; >> } >> vfio_get_iommu_info_migration(container, info); >> g_free(info); >> @@ -672,7 +709,7 @@ static int vfio_connect_container(VFIOGroup >> *group, AddressSpace *as, >> * information to get the actual window extent rather than >> assume >> * a 64-bit IOVA address space. >> */ >> - vfio_host_win_add(container, 0, (hwaddr)-1, >> container->pgsizes); >> + vfio_host_win_add(bcontainer, 0, (hwaddr)-1, >> bcontainer->pgsizes); >> break; >> } >> @@ -699,10 +736,10 @@ static int vfio_connect_container(VFIOGroup >> *group, AddressSpace *as, >> memory_listener_register(&container->prereg_listener, >> &address_space_memory); >> - if (container->error) { >> + if (bcontainer->error) { >> >> memory_listener_unregister(&container->prereg_listener); >> ret = -1; >> - error_propagate_prepend(errp, container->error, >> + error_propagate_prepend(errp, bcontainer->error, >> "RAM memory listener initialization failed: "); >> goto enable_discards_exit; >> } >> @@ -721,7 +758,7 @@ static int vfio_connect_container(VFIOGroup >> *group, AddressSpace *as, >> } >> if (v2) { >> - container->pgsizes = info.ddw.pgsizes; >> + bcontainer->pgsizes = info.ddw.pgsizes; >> /* >> * There is a default window in just created container. >> * To make region_add/del simpler, we better remove this >> @@ -736,8 +773,8 @@ static int vfio_connect_container(VFIOGroup >> *group, AddressSpace *as, >> } >> } else { >> /* The default table uses 4K pages */ >> - container->pgsizes = 0x1000; >> - vfio_host_win_add(container, info.dma32_window_start, >> + bcontainer->pgsizes = 0x1000; >> + vfio_host_win_add(bcontainer, info.dma32_window_start, >> info.dma32_window_start + >> info.dma32_window_size - 1, >> 0x1000); >> @@ -748,28 +785,28 @@ static int vfio_connect_container(VFIOGroup >> *group, AddressSpace *as, >> vfio_kvm_device_add_group(group); >> QLIST_INIT(&container->group_list); >> - QLIST_INSERT_HEAD(&space->containers, container, next); >> + QLIST_INSERT_HEAD(&space->containers, bcontainer, next); >> group->container = container; >> QLIST_INSERT_HEAD(&container->group_list, group, container_next); >> - container->listener = vfio_memory_listener; >> + bcontainer->listener = vfio_memory_listener; >> - memory_listener_register(&container->listener, >> container->space->as); >> + memory_listener_register(&bcontainer->listener, >> bcontainer->space->as); >> - if (container->error) { >> + if (bcontainer->error) { >> ret = -1; >> - error_propagate_prepend(errp, container->error, >> + error_propagate_prepend(errp, bcontainer->error, >> "memory listener initialization failed: "); >> goto listener_release_exit; >> } >> - container->initialized = true; >> + bcontainer->initialized = true; >> return 0; >> listener_release_exit: >> QLIST_REMOVE(group, container_next); >> - QLIST_REMOVE(container, next); >> + QLIST_REMOVE(bcontainer, next); >> vfio_kvm_device_del_group(group); >> vfio_listener_release(container); >> @@ -790,7 +827,8 @@ put_space_exit: >> static void vfio_disconnect_container(VFIOGroup *group) >> { >> - VFIOContainer *container = group->container; >> + VFIOLegacyContainer *container = group->container; >> + VFIOContainer *bcontainer = &container->bcontainer; >> QLIST_REMOVE(group, container_next); >> group->container = NULL; >> @@ -810,25 +848,9 @@ static void vfio_disconnect_container(VFIOGroup >> *group) >> } >> if (QLIST_EMPTY(&container->group_list)) { >> - VFIOAddressSpace *space = container->space; >> - VFIOGuestIOMMU *giommu, *tmp; >> - VFIOHostDMAWindow *hostwin, *next; >> - >> - QLIST_REMOVE(container, next); >> - >> - QLIST_FOREACH_SAFE(giommu, &container->giommu_list, >> giommu_next, tmp) { >> - memory_region_unregister_iommu_notifier( >> - MEMORY_REGION(giommu->iommu_mr), &giommu->n); >> - QLIST_REMOVE(giommu, giommu_next); >> - g_free(giommu); >> - } >> - >> - QLIST_FOREACH_SAFE(hostwin, &container->hostwin_list, >> hostwin_next, >> - next) { >> - QLIST_REMOVE(hostwin, hostwin_next); >> - g_free(hostwin); >> - } >> + VFIOAddressSpace *space = bcontainer->space; >> + vfio_container_destroy(bcontainer); >> trace_vfio_disconnect_container(container->fd); >> close(container->fd); >> g_free(container); >> @@ -840,13 +862,15 @@ static void vfio_disconnect_container(VFIOGroup >> *group) >> static VFIOGroup *vfio_get_group(int groupid, AddressSpace *as, >> Error **errp) >> { >> VFIOGroup *group; >> + VFIOContainer *bcontainer; >> char path[32]; >> struct vfio_group_status status = { .argsz = sizeof(status) }; >> QLIST_FOREACH(group, &vfio_group_list, next) { >> if (group->groupid == groupid) { >> /* Found it. Now is it already in the right context? */ >> - if (group->container->space->as == as) { >> + bcontainer = &group->container->bcontainer; >> + if (bcontainer->space->as == as) { >> return group; >> } else { >> error_setg(errp, "group %d used in multiple address >> spaces", >> @@ -990,7 +1014,7 @@ static void vfio_put_base_device(VFIODevice >> *vbasedev) >> /* >> * Interfaces for IBM EEH (Enhanced Error Handling) >> */ >> -static bool vfio_eeh_container_ok(VFIOContainer *container) >> +static bool vfio_eeh_container_ok(VFIOLegacyContainer *container) >> { >> /* >> * As of 2016-03-04 (linux-4.5) the host kernel EEH/VFIO >> @@ -1018,7 +1042,7 @@ static bool vfio_eeh_container_ok(VFIOContainer >> *container) >> return true; >> } >> -static int vfio_eeh_container_op(VFIOContainer *container, >> uint32_t op) >> +static int vfio_eeh_container_op(VFIOLegacyContainer *container, >> uint32_t op) >> { >> struct vfio_eeh_pe_op pe_op = { >> .argsz = sizeof(pe_op), >> @@ -1041,19 +1065,21 @@ static int >> vfio_eeh_container_op(VFIOContainer *container, uint32_t op) >> return ret; >> } >> -static VFIOContainer *vfio_eeh_as_container(AddressSpace *as) >> +static VFIOLegacyContainer *vfio_eeh_as_container(AddressSpace *as) >> { >> VFIOAddressSpace *space = vfio_get_address_space(as); >> - VFIOContainer *container = NULL; >> + VFIOLegacyContainer *container = NULL; >> + VFIOContainer *bcontainer = NULL; >> if (QLIST_EMPTY(&space->containers)) { >> /* No containers to act on */ >> goto out; >> } >> - container = QLIST_FIRST(&space->containers); >> + bcontainer = QLIST_FIRST(&space->containers); >> + container = container_of(bcontainer, VFIOLegacyContainer, >> bcontainer); >> - if (QLIST_NEXT(container, next)) { >> + if (QLIST_NEXT(bcontainer, next)) { >> /* >> * We don't yet have logic to synchronize EEH state across >> * multiple containers >> @@ -1069,14 +1095,14 @@ out: >> bool vfio_eeh_as_ok(AddressSpace *as) >> { >> - VFIOContainer *container = vfio_eeh_as_container(as); >> + VFIOLegacyContainer *container = vfio_eeh_as_container(as); >> return (container != NULL) && vfio_eeh_container_ok(container); >> } >> int vfio_eeh_as_op(AddressSpace *as, uint32_t op) >> { >> - VFIOContainer *container = vfio_eeh_as_container(as); >> + VFIOLegacyContainer *container = vfio_eeh_as_container(as); >> if (!container) { >> return -ENODEV; >> @@ -1110,8 +1136,8 @@ static int vfio_device_groupid(VFIODevice >> *vbasedev, Error **errp) >> return groupid; >> } >> -int vfio_attach_device(char *name, VFIODevice *vbasedev, >> - AddressSpace *as, Error **errp) >> +static int vfio_legacy_attach_device(char *name, VFIODevice *vbasedev, >> + AddressSpace *as, Error **errp) >> { >> int groupid = vfio_device_groupid(vbasedev, errp); >> VFIODevice *vbasedev_iter; >> @@ -1137,15 +1163,46 @@ int vfio_attach_device(char *name, VFIODevice >> *vbasedev, >> ret = vfio_get_device(group, name, vbasedev, errp); >> if (ret) { >> vfio_put_group(group); >> + return ret; >> } >> + vbasedev->container = &group->container->bcontainer; >> return ret; >> } >> -void vfio_detach_device(VFIODevice *vbasedev) >> +static void vfio_legacy_detach_device(VFIODevice *vbasedev) >> { >> VFIOGroup *group = vbasedev->group; >> vfio_put_base_device(vbasedev); >> vfio_put_group(group); >> + vbasedev->container = NULL; >> +} >> + >> +static void vfio_iommu_backend_legacy_ops_class_init(ObjectClass *oc, >> + void *data) { >> + VFIOIOMMUBackendOpsClass *ops = VFIO_IOMMU_BACKEND_OPS_CLASS(oc); >> + >> + ops->dev_iter_next = vfio_legacy_dev_iter_next; >> + ops->dma_map = vfio_legacy_dma_map; >> + ops->dma_unmap = vfio_legacy_dma_unmap; >> + ops->attach_device = vfio_legacy_attach_device; >> + ops->detach_device = vfio_legacy_detach_device; >> + ops->set_dirty_page_tracking = vfio_legacy_set_dirty_page_tracking; >> + ops->query_dirty_bitmap = vfio_legacy_query_dirty_bitmap; >> + ops->add_window = vfio_legacy_add_section_window; >> + ops->del_window = vfio_legacy_del_section_window; >> +} >> + >> +static const TypeInfo vfio_iommu_backend_legacy_ops_type = { >> + .name = TYPE_VFIO_IOMMU_BACKEND_LEGACY_OPS, >> + >> + .parent = TYPE_VFIO_IOMMU_BACKEND_OPS, >> + .class_init = vfio_iommu_backend_legacy_ops_class_init, >> + .abstract = true, >> +}; >> +static void vfio_iommu_backend_legacy_ops_register_types(void) >> +{ >> + type_register_static(&vfio_iommu_backend_legacy_ops_type); >> } >> +type_init(vfio_iommu_backend_legacy_ops_register_types); >> diff --git a/hw/vfio/meson.build b/hw/vfio/meson.build >> index 2a6912c940..eb6ce6229d 100644 >> --- a/hw/vfio/meson.build >> +++ b/hw/vfio/meson.build >> @@ -2,6 +2,7 @@ vfio_ss = ss.source_set() >> vfio_ss.add(files( >> 'helpers.c', >> 'common.c', >> + 'container-base.c', >> 'container.c', >> 'spapr.c', >> 'migration.c', >> diff --git a/hw/vfio/spapr.c b/hw/vfio/spapr.c >> index 9ec1e95f6d..7647e7d492 100644 >> --- a/hw/vfio/spapr.c >> +++ b/hw/vfio/spapr.c >> @@ -39,8 +39,8 @@ static void >> *vfio_prereg_gpa_to_vaddr(MemoryRegionSection *section, hwaddr gpa) >> static void vfio_prereg_listener_region_add(MemoryListener *listener, >> MemoryRegionSection >> *section) >> { >> - VFIOContainer *container = container_of(listener, VFIOContainer, >> - prereg_listener); >> + VFIOLegacyContainer *container = container_of(listener, >> VFIOLegacyContainer, >> + prereg_listener); >> const hwaddr gpa = section->offset_within_address_space; >> hwaddr end; >> int ret; >> @@ -83,9 +83,9 @@ static void >> vfio_prereg_listener_region_add(MemoryListener *listener, >> * can gracefully fail. Runtime, there's not much we can >> do other >> * than throw a hardware error. >> */ >> - if (!container->initialized) { >> - if (!container->error) { >> - error_setg_errno(&container->error, -ret, >> + if (!container->bcontainer.initialized) { >> + if (!container->bcontainer.error) { >> + error_setg_errno(&container->bcontainer.error, -ret, >> "Memory registering failed"); >> } >> } else { >> @@ -97,8 +97,8 @@ static void >> vfio_prereg_listener_region_add(MemoryListener *listener, >> static void vfio_prereg_listener_region_del(MemoryListener *listener, >> MemoryRegionSection >> *section) >> { >> - VFIOContainer *container = container_of(listener, VFIOContainer, >> - prereg_listener); >> + VFIOLegacyContainer *container = container_of(listener, >> VFIOLegacyContainer, >> + prereg_listener); >> const hwaddr gpa = section->offset_within_address_space; >> hwaddr end; >> int ret; >> @@ -141,7 +141,7 @@ const MemoryListener vfio_prereg_listener = { >> .region_del = vfio_prereg_listener_region_del, >> }; >> -int vfio_spapr_create_window(VFIOContainer *container, >> +int vfio_spapr_create_window(VFIOLegacyContainer *container, >> MemoryRegionSection *section, >> hwaddr *pgsize) >> { >> @@ -159,13 +159,13 @@ int vfio_spapr_create_window(VFIOContainer >> *container, >> if (pagesize > rampagesize) { >> pagesize = rampagesize; >> } >> - pgmask = container->pgsizes & (pagesize | (pagesize - 1)); >> + pgmask = container->bcontainer.pgsizes & (pagesize | (pagesize - >> 1)); >> pagesize = pgmask ? (1ULL << (63 - clz64(pgmask))) : 0; >> if (!pagesize) { >> error_report("Host doesn't support page size 0x%"PRIx64 >> ", the supported mask is 0x%lx", >> memory_region_iommu_get_min_page_size(iommu_mr), >> - container->pgsizes); >> + container->bcontainer.pgsizes); >> return -EINVAL; >> } >> @@ -233,7 +233,7 @@ int vfio_spapr_create_window(VFIOContainer >> *container, >> return 0; >> } >> -int vfio_spapr_remove_window(VFIOContainer *container, >> +int vfio_spapr_remove_window(VFIOLegacyContainer *container, >> hwaddr offset_within_address_space) >> { >> struct vfio_iommu_spapr_tce_remove remove = { >> diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events >> index bd32970854..1692bcd8f1 100644 >> --- a/hw/vfio/trace-events >> +++ b/hw/vfio/trace-events >> @@ -119,8 +119,8 @@ vfio_region_unmap(const char *name, unsigned long >> offset, unsigned long end) "Re >> vfio_region_sparse_mmap_header(const char *name, int index, int >> nr_areas) "Device %s region %d: %d sparse mmap entries" >> vfio_region_sparse_mmap_entry(int i, unsigned long start, unsigned >> long end) "sparse entry %d [0x%lx - 0x%lx]" >> vfio_get_dev_region(const char *name, int index, uint32_t type, >> uint32_t subtype) "%s index %d, %08x/%08x" >> -vfio_dma_unmap_overflow_workaround(void) "" >> -vfio_get_dirty_bitmap(int fd, uint64_t iova, uint64_t size, uint64_t >> bitmap_size, uint64_t start, uint64_t dirty_pages) "container fd=%d, >> iova=0x%"PRIx64" size= 0x%"PRIx64" bitmap_size=0x%"PRIx64" >> start=0x%"PRIx64" dirty_pages=%"PRIu64 >> +vfio_legacy_dma_unmap_overflow_workaround(void) "" >> +vfio_get_dirty_bitmap(uint64_t iova, uint64_t size, uint64_t >> bitmap_size, uint64_t start, uint64_t dirty_pages) "iova=0x%"PRIx64" >> size= 0x%"PRIx64" bitmap_size=0x%"PRIx64" start=0x%"PRIx64" >> dirty_pages=%"PRIu64 >> vfio_iommu_map_dirty_notify(uint64_t iova_start, uint64_t iova_end) >> "iommu dirty @ 0x%"PRIx64" - 0x%"PRIx64 >> # platform.c >> diff --git a/include/hw/vfio/vfio-common.h >> b/include/hw/vfio/vfio-common.h >> index 95bcafdaf6..b1a76dcc9c 100644 >> --- a/include/hw/vfio/vfio-common.h >> +++ b/include/hw/vfio/vfio-common.h >> @@ -30,6 +30,7 @@ >> #include <linux/vfio.h> >> #endif >> #include "sysemu/sysemu.h" >> +#include "hw/vfio/vfio-container-base.h" >> #define VFIO_MSG_PREFIX "vfio %s: " >> @@ -74,64 +75,22 @@ typedef struct VFIOMigration { >> bool initial_data_sent; >> } VFIOMigration; >> -typedef struct VFIOAddressSpace { >> - AddressSpace *as; >> - QLIST_HEAD(, VFIOContainer) containers; >> - QLIST_ENTRY(VFIOAddressSpace) list; >> -} VFIOAddressSpace; >> - >> struct VFIOGroup; >> -typedef struct VFIOContainer { >> - VFIOAddressSpace *space; >> +typedef struct VFIOLegacyContainer { >> + VFIOContainer bcontainer; >> int fd; /* /dev/vfio/vfio, empowered by the attached groups */ >> - MemoryListener listener; >> MemoryListener prereg_listener; >> unsigned iommu_type; >> - Error *error; >> - bool initialized; >> - bool dirty_pages_supported; >> - uint64_t dirty_pgsizes; >> - uint64_t max_dirty_bitmap_size; >> - unsigned long pgsizes; >> - unsigned int dma_max_mappings; >> - QLIST_HEAD(, VFIOGuestIOMMU) giommu_list; >> - QLIST_HEAD(, VFIOHostDMAWindow) hostwin_list; >> QLIST_HEAD(, VFIOGroup) group_list; >> - QLIST_HEAD(, VFIORamDiscardListener) vrdl_list; >> - QLIST_ENTRY(VFIOContainer) next; >> -} VFIOContainer; >> - >> -typedef struct VFIOGuestIOMMU { >> - VFIOContainer *container; >> - IOMMUMemoryRegion *iommu_mr; >> - hwaddr iommu_offset; >> - IOMMUNotifier n; >> - QLIST_ENTRY(VFIOGuestIOMMU) giommu_next; >> -} VFIOGuestIOMMU; >> - >> -typedef struct VFIORamDiscardListener { >> - VFIOContainer *container; >> - MemoryRegion *mr; >> - hwaddr offset_within_address_space; >> - hwaddr size; >> - uint64_t granularity; >> - RamDiscardListener listener; >> - QLIST_ENTRY(VFIORamDiscardListener) next; >> -} VFIORamDiscardListener; >> - >> -typedef struct VFIOHostDMAWindow { >> - hwaddr min_iova; >> - hwaddr max_iova; >> - uint64_t iova_pgsizes; >> - QLIST_ENTRY(VFIOHostDMAWindow) hostwin_next; >> -} VFIOHostDMAWindow; >> +} VFIOLegacyContainer; >> typedef struct VFIODeviceOps VFIODeviceOps; >> typedef struct VFIODevice { >> QLIST_ENTRY(VFIODevice) next; >> struct VFIOGroup *group; >> + VFIOContainer *container; >> char *sysfsdev; >> char *name; >> DeviceState *dev; >> @@ -165,7 +124,7 @@ struct VFIODeviceOps { >> typedef struct VFIOGroup { >> int fd; >> int groupid; >> - VFIOContainer *container; >> + VFIOLegacyContainer *container; >> QLIST_HEAD(, VFIODevice) device_list; >> QLIST_ENTRY(VFIOGroup) next; >> QLIST_ENTRY(VFIOGroup) container_next; >> @@ -198,37 +157,13 @@ typedef struct VFIODisplay { >> } dmabuf; >> } VFIODisplay; >> -typedef struct { >> - unsigned long *bitmap; >> - hwaddr size; >> - hwaddr pages; >> -} VFIOBitmap; >> - >> -void vfio_host_win_add(VFIOContainer *container, >> +void vfio_host_win_add(VFIOContainer *bcontainer, >> hwaddr min_iova, hwaddr max_iova, >> uint64_t iova_pgsizes); >> -int vfio_host_win_del(VFIOContainer *container, hwaddr min_iova, >> +int vfio_host_win_del(VFIOContainer *bcontainer, hwaddr min_iova, >> hwaddr max_iova); >> VFIOAddressSpace *vfio_get_address_space(AddressSpace *as); >> void vfio_put_address_space(VFIOAddressSpace *space); >> -bool vfio_devices_all_running_and_saving(VFIOContainer *container); >> - >> -/* container->fd */ >> -VFIODevice *vfio_container_dev_iter_next(VFIOContainer *container, >> - VFIODevice *curr); >> -int vfio_dma_unmap(VFIOContainer *container, hwaddr iova, >> - ram_addr_t size, IOMMUTLBEntry *iotlb); >> -int vfio_dma_map(VFIOContainer *container, hwaddr iova, >> - ram_addr_t size, void *vaddr, bool readonly); >> -int vfio_set_dirty_page_tracking(VFIOContainer *container, bool start); >> -int vfio_query_dirty_bitmap(VFIOContainer *container, VFIOBitmap >> *vbmap, >> - hwaddr iova, hwaddr size); >> - >> -int vfio_container_add_section_window(VFIOContainer *container, >> - MemoryRegionSection *section, >> - Error **errp); >> -void vfio_container_del_section_window(VFIOContainer *container, >> - MemoryRegionSection *section); >> void vfio_disable_irqindex(VFIODevice *vbasedev, int index); >> void vfio_unmask_single_irqindex(VFIODevice *vbasedev, int index); >> @@ -285,10 +220,10 @@ vfio_get_cap(void *ptr, uint32_t cap_offset, >> uint16_t id); >> #endif >> extern const MemoryListener vfio_prereg_listener; >> -int vfio_spapr_create_window(VFIOContainer *container, >> +int vfio_spapr_create_window(VFIOLegacyContainer *container, >> MemoryRegionSection *section, >> hwaddr *pgsize); >> -int vfio_spapr_remove_window(VFIOContainer *container, >> +int vfio_spapr_remove_window(VFIOLegacyContainer *container, >> hwaddr offset_within_address_space); >> bool vfio_migration_realize(VFIODevice *vbasedev, Error **errp); >> diff --git a/include/hw/vfio/vfio-container-base.h >> b/include/hw/vfio/vfio-container-base.h >> new file mode 100644 >> index 0000000000..b18fa92146 >> --- /dev/null >> +++ b/include/hw/vfio/vfio-container-base.h >> @@ -0,0 +1,155 @@ >> +/* >> + * VFIO BASE CONTAINER >> + * >> + * Copyright (C) 2023 Intel Corporation. >> + * Copyright Red Hat, Inc. 2023 >> + * >> + * Authors: Yi Liu <yi.l....@intel.com> >> + * Eric Auger <eric.au...@redhat.com> >> + * >> + * This program is free software; you can redistribute it and/or modify >> + * it under the terms of the GNU General Public License as published by >> + * the Free Software Foundation; either version 2 of the License, or >> + * (at your option) any later version. >> + >> + * This program is distributed in the hope that it will be useful, >> + * but WITHOUT ANY WARRANTY; without even the implied warranty of >> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the >> + * GNU General Public License for more details. >> + >> + * You should have received a copy of the GNU General Public License >> along >> + * with this program; if not, see <http://www.gnu.org/licenses/>. >> + */ >> + >> +#ifndef HW_VFIO_VFIO_BASE_CONTAINER_H >> +#define HW_VFIO_VFIO_BASE_CONTAINER_H >> + >> +#include "exec/memory.h" >> +#ifndef CONFIG_USER_ONLY >> +#include "exec/hwaddr.h" >> +#endif >> + >> +typedef struct VFIOContainer VFIOContainer; >> + >> +typedef struct VFIOAddressSpace { >> + AddressSpace *as; >> + QLIST_HEAD(, VFIOContainer) containers; >> + QLIST_ENTRY(VFIOAddressSpace) list; >> +} VFIOAddressSpace; >> + >> +typedef struct VFIOGuestIOMMU { >> + VFIOContainer *container; >> + IOMMUMemoryRegion *iommu_mr; >> + hwaddr iommu_offset; >> + IOMMUNotifier n; >> + QLIST_ENTRY(VFIOGuestIOMMU) giommu_next; >> +} VFIOGuestIOMMU; >> + >> +typedef struct VFIORamDiscardListener { >> + VFIOContainer *container; >> + MemoryRegion *mr; >> + hwaddr offset_within_address_space; >> + hwaddr size; >> + uint64_t granularity; >> + RamDiscardListener listener; >> + QLIST_ENTRY(VFIORamDiscardListener) next; >> +} VFIORamDiscardListener; >> + >> +typedef struct VFIOHostDMAWindow { >> + hwaddr min_iova; >> + hwaddr max_iova; >> + uint64_t iova_pgsizes; >> + QLIST_ENTRY(VFIOHostDMAWindow) hostwin_next; >> +} VFIOHostDMAWindow; >> + >> +typedef struct { >> + unsigned long *bitmap; >> + hwaddr size; >> + hwaddr pages; >> +} VFIOBitmap; >> + >> +typedef struct VFIODevice VFIODevice; >> +typedef struct VFIOIOMMUBackendOpsClass VFIOIOMMUBackendOpsClass; >> + >> +/* >> + * This is the base object for vfio container backends >> + */ >> +struct VFIOContainer { >> + VFIOIOMMUBackendOpsClass *ops; >> + VFIOAddressSpace *space; >> + MemoryListener listener; >> + Error *error; >> + bool initialized; >> + bool dirty_pages_supported; >> + uint64_t dirty_pgsizes; >> + uint64_t max_dirty_bitmap_size; >> + unsigned long pgsizes; >> + unsigned int dma_max_mappings; >> + QLIST_HEAD(, VFIOGuestIOMMU) giommu_list; >> + QLIST_HEAD(, VFIOHostDMAWindow) hostwin_list; >> + QLIST_HEAD(, VFIORamDiscardListener) vrdl_list; >> + QLIST_ENTRY(VFIOContainer) next; >> +}; >> + >> +VFIODevice *vfio_container_dev_iter_next(VFIOContainer *container, >> + VFIODevice *curr); >> +int vfio_container_dma_map(VFIOContainer *container, >> + hwaddr iova, ram_addr_t size, >> + void *vaddr, bool readonly); >> +int vfio_container_dma_unmap(VFIOContainer *container, >> + hwaddr iova, ram_addr_t size, >> + IOMMUTLBEntry *iotlb); >> +bool vfio_container_devices_all_dirty_tracking(VFIOContainer >> *container); >> +int vfio_container_set_dirty_page_tracking(VFIOContainer *container, >> + bool start); >> +int vfio_container_query_dirty_bitmap(VFIOContainer *container, >> + VFIOBitmap *vbmap, >> + hwaddr iova, hwaddr size); >> +int vfio_container_add_section_window(VFIOContainer *container, >> + MemoryRegionSection *section, >> + Error **errp); >> +void vfio_container_del_section_window(VFIOContainer *container, >> + MemoryRegionSection *section); >> + >> +void vfio_container_init(VFIOContainer *container, >> + VFIOAddressSpace *space, >> + struct VFIOIOMMUBackendOpsClass *ops); >> +void vfio_container_destroy(VFIOContainer *container); >> + >> +#define TYPE_VFIO_IOMMU_BACKEND_LEGACY_OPS >> "vfio-iommu-backend-legacy-ops" >> +#define TYPE_VFIO_IOMMU_BACKEND_OPS "vfio-iommu-backend-ops" >> + >> +DECLARE_CLASS_CHECKERS(VFIOIOMMUBackendOpsClass, >> + VFIO_IOMMU_BACKEND_OPS, >> TYPE_VFIO_IOMMU_BACKEND_OPS) >> + >> +struct VFIOIOMMUBackendOpsClass { >> + /*< private >*/ >> + ObjectClass parent_class; >> + >> + /*< public >*/ >> + /* required */ >> + VFIODevice *(*dev_iter_next)(VFIOContainer *container, >> VFIODevice *curr); >> + int (*dma_map)(VFIOContainer *container, >> + hwaddr iova, ram_addr_t size, >> + void *vaddr, bool readonly); >> + int (*dma_unmap)(VFIOContainer *container, >> + hwaddr iova, ram_addr_t size, >> + IOMMUTLBEntry *iotlb); >> + int (*attach_device)(char *name, VFIODevice *vbasedev, >> + AddressSpace *as, Error **errp); >> + void (*detach_device)(VFIODevice *vbasedev); >> + /* migration feature */ >> + int (*set_dirty_page_tracking)(VFIOContainer *container, bool >> start); >> + int (*query_dirty_bitmap)(VFIOContainer *bcontainer, VFIOBitmap >> *vbmap, >> + hwaddr iova, hwaddr size); >> + >> + /* SPAPR specific */ >> + int (*add_window)(VFIOContainer *container, >> + MemoryRegionSection *section, >> + Error **errp); >> + void (*del_window)(VFIOContainer *container, >> + MemoryRegionSection *section); >> +}; >> + >> + >> +#endif /* HW_VFIO_VFIO_BASE_CONTAINER_H */ >