This moves SPAPR bits to a separate file to avoid pollution of x86 code. This enables spapr-vfio on CONFIG_SOFTMMU (not CONFIG_PSERIES) as the config options are only visible in makefiles and not in the source code so there is no an obvious way of implementing stubs if hw/vfio/spapr.c is not compiled.
This is a mechanical patch. Signed-off-by: Alexey Kardashevskiy <a...@ozlabs.ru> Reviewed-by: David Gibson <da...@gibson.dropbear.id.au> --- hw/vfio/Makefile.objs | 1 + hw/vfio/common.c | 137 ++----------------------- hw/vfio/spapr.c | 229 ++++++++++++++++++++++++++++++++++++++++++ include/hw/vfio/vfio-common.h | 13 +++ 4 files changed, 249 insertions(+), 131 deletions(-) create mode 100644 hw/vfio/spapr.c diff --git a/hw/vfio/Makefile.objs b/hw/vfio/Makefile.objs index d540c9d..0dd99ed 100644 --- a/hw/vfio/Makefile.objs +++ b/hw/vfio/Makefile.objs @@ -3,4 +3,5 @@ obj-$(CONFIG_SOFTMMU) += common.o obj-$(CONFIG_PCI) += pci.o obj-$(CONFIG_SOFTMMU) += platform.o obj-$(CONFIG_SOFTMMU) += calxeda-xgmac.o +obj-$(CONFIG_SOFTMMU) += spapr.o endif diff --git a/hw/vfio/common.c b/hw/vfio/common.c index b1045da..3e4c685 100644 --- a/hw/vfio/common.c +++ b/hw/vfio/common.c @@ -190,8 +190,8 @@ const MemoryRegionOps vfio_region_ops = { /* * DMA - Mapping and unmapping for the "type1" IOMMU interface used on x86 */ -static int vfio_dma_unmap(VFIOContainer *container, - hwaddr iova, ram_addr_t size) +int vfio_dma_unmap(VFIOContainer *container, + hwaddr iova, ram_addr_t size) { struct vfio_iommu_type1_dma_unmap unmap = { .argsz = sizeof(unmap), @@ -208,8 +208,8 @@ static int vfio_dma_unmap(VFIOContainer *container, return 0; } -static int vfio_dma_map(VFIOContainer *container, hwaddr iova, - ram_addr_t size, void *vaddr, bool readonly) +int vfio_dma_map(VFIOContainer *container, hwaddr iova, + ram_addr_t size, void *vaddr, bool readonly) { struct vfio_iommu_type1_dma_map map = { .argsz = sizeof(map), @@ -238,7 +238,7 @@ static int vfio_dma_map(VFIOContainer *container, hwaddr iova, return -errno; } -static bool vfio_listener_skipped_section(MemoryRegionSection *section) +bool vfio_listener_skipped_section(MemoryRegionSection *section) { return (!memory_region_is_ram(section->mr) && !memory_region_is_iommu(section->mr)) || @@ -251,67 +251,6 @@ static bool vfio_listener_skipped_section(MemoryRegionSection *section) section->offset_within_address_space & (1ULL << 63); } -static void vfio_iommu_map_notify(Notifier *n, void *data) -{ - VFIOGuestIOMMU *giommu = container_of(n, VFIOGuestIOMMU, n); - VFIOContainer *container = giommu->container; - IOMMUTLBEntry *iotlb = data; - MemoryRegion *mr; - hwaddr xlat; - hwaddr len = iotlb->addr_mask + 1; - void *vaddr; - int ret; - - trace_vfio_iommu_map_notify(iotlb->iova, - iotlb->iova + iotlb->addr_mask); - - /* - * The IOMMU TLB entry we have just covers translation through - * this IOMMU to its immediate target. We need to translate - * it the rest of the way through to memory. - */ - rcu_read_lock(); - mr = address_space_translate(&address_space_memory, - iotlb->translated_addr, - &xlat, &len, iotlb->perm & IOMMU_WO); - if (!memory_region_is_ram(mr)) { - error_report("iommu map to non memory area %"HWADDR_PRIx"", - xlat); - goto out; - } - /* - * Translation truncates length to the IOMMU page size, - * check that it did not truncate too much. - */ - if (len & iotlb->addr_mask) { - error_report("iommu has granularity incompatible with target AS"); - goto out; - } - - if ((iotlb->perm & IOMMU_RW) != IOMMU_NONE) { - vaddr = memory_region_get_ram_ptr(mr) + xlat; - ret = vfio_dma_map(container, iotlb->iova, - iotlb->addr_mask + 1, vaddr, - !(iotlb->perm & IOMMU_WO) || mr->readonly); - if (ret) { - error_report("vfio_dma_map(%p, 0x%"HWADDR_PRIx", " - "0x%"HWADDR_PRIx", %p) = %d (%m)", - container, iotlb->iova, - iotlb->addr_mask + 1, vaddr, ret); - } - } else { - ret = vfio_dma_unmap(container, iotlb->iova, iotlb->addr_mask + 1); - if (ret) { - error_report("vfio_dma_unmap(%p, 0x%"HWADDR_PRIx", " - "0x%"HWADDR_PRIx") = %d (%m)", - container, iotlb->iova, - iotlb->addr_mask + 1, ret); - } - } -out: - rcu_read_unlock(); -} - static void vfio_listener_region_add(MemoryListener *listener, MemoryRegionSection *section) { @@ -347,45 +286,6 @@ static void vfio_listener_region_add(MemoryListener *listener, memory_region_ref(section->mr); - if (memory_region_is_iommu(section->mr)) { - VFIOGuestIOMMU *giommu; - - trace_vfio_listener_region_add_iommu(iova, - int128_get64(int128_sub(llend, int128_one()))); - /* - * FIXME: We should do some checking to see if the - * capabilities of the host VFIO IOMMU are adequate to model - * the guest IOMMU - * - * FIXME: For VFIO iommu types which have KVM acceleration to - * avoid bouncing all map/unmaps through qemu this way, this - * would be the right place to wire that up (tell the KVM - * device emulation the VFIO iommu handles to use). - */ - /* - * This assumes that the guest IOMMU is empty of - * mappings at this point. - * - * One way of doing this is: - * 1. Avoid sharing IOMMUs between emulated devices or different - * IOMMU groups. - * 2. Implement VFIO_IOMMU_ENABLE in the host kernel to fail if - * there are some mappings in IOMMU. - * - * VFIO on SPAPR does that. Other IOMMU models may do that different, - * they must make sure there are no existing mappings or - * loop through existing mappings to map them into VFIO. - */ - giommu = g_malloc0(sizeof(*giommu)); - giommu->iommu = section->mr; - giommu->container = container; - giommu->n.notify = vfio_iommu_map_notify; - QLIST_INSERT_HEAD(&container->giommu_list, giommu, giommu_next); - memory_region_register_iommu_notifier(giommu->iommu, &giommu->n); - - return; - } - /* Here we assume that memory_region_is_ram(section->mr)==true */ end = int128_get64(llend); @@ -438,27 +338,6 @@ static void vfio_listener_region_del(MemoryListener *listener, return; } - if (memory_region_is_iommu(section->mr)) { - VFIOGuestIOMMU *giommu; - - QLIST_FOREACH(giommu, &container->giommu_list, giommu_next) { - if (giommu->iommu == section->mr) { - memory_region_unregister_iommu_notifier(&giommu->n); - QLIST_REMOVE(giommu, giommu_next); - g_free(giommu); - break; - } - } - - /* - * FIXME: We assume the one big unmap below is adequate to - * remove any individual page mappings in the IOMMU which - * might have been copied into VFIO. This works for a page table - * based IOMMU where a big unmap flattens a large range of IO-PTEs. - * That may not be true for all IOMMU types. - */ - } - iova = TARGET_PAGE_ALIGN(section->offset_within_address_space); end = (section->offset_within_address_space + int128_get64(section->size)) & TARGET_PAGE_MASK; @@ -724,11 +603,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as) goto free_container_exit; } - container->iommu_data.type1.listener = vfio_memory_listener; - container->iommu_data.release = vfio_listener_release; - - memory_listener_register(&container->iommu_data.type1.listener, - container->space->as); + spapr_memory_listener_register(container); } else { error_report("vfio: No available IOMMU models"); diff --git a/hw/vfio/spapr.c b/hw/vfio/spapr.c new file mode 100644 index 0000000..f03fab1 --- /dev/null +++ b/hw/vfio/spapr.c @@ -0,0 +1,229 @@ +/* + * QEMU sPAPR VFIO IOMMU + * + * Copyright (c) 2015 Alexey Kardashevskiy, IBM Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, + * or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "hw/vfio/vfio-common.h" +#include "qemu/error-report.h" +#include "trace.h" + +static void vfio_iommu_map_notify(Notifier *n, void *data) +{ + VFIOGuestIOMMU *giommu = container_of(n, VFIOGuestIOMMU, n); + VFIOContainer *container = giommu->container; + IOMMUTLBEntry *iotlb = data; + MemoryRegion *mr; + hwaddr xlat; + hwaddr len = iotlb->addr_mask + 1; + void *vaddr; + int ret; + + trace_vfio_iommu_map_notify(iotlb->iova, + iotlb->iova + iotlb->addr_mask); + + /* + * The IOMMU TLB entry we have just covers translation through + * this IOMMU to its immediate target. We need to translate + * it the rest of the way through to memory. + */ + rcu_read_lock(); + mr = address_space_translate(&address_space_memory, + iotlb->translated_addr, + &xlat, &len, iotlb->perm & IOMMU_WO); + if (!memory_region_is_ram(mr)) { + error_report("iommu map to non memory area %"HWADDR_PRIx"", + xlat); + goto out; + } + /* + * Translation truncates length to the IOMMU page size, + * check that it did not truncate too much. + */ + if (len & iotlb->addr_mask) { + error_report("iommu has granularity incompatible with target AS"); + goto out; + } + + if ((iotlb->perm & IOMMU_RW) != IOMMU_NONE) { + vaddr = memory_region_get_ram_ptr(mr) + xlat; + ret = vfio_dma_map(container, iotlb->iova, + iotlb->addr_mask + 1, vaddr, + !(iotlb->perm & IOMMU_WO) || mr->readonly); + if (ret) { + error_report("vfio_dma_map(%p, 0x%"HWADDR_PRIx", " + "0x%"HWADDR_PRIx", %p) = %d (%m)", + container, iotlb->iova, + iotlb->addr_mask + 1, vaddr, ret); + } + } else { + ret = vfio_dma_unmap(container, iotlb->iova, iotlb->addr_mask + 1); + if (ret) { + error_report("vfio_dma_unmap(%p, 0x%"HWADDR_PRIx", " + "0x%"HWADDR_PRIx") = %d (%m)", + container, iotlb->iova, + iotlb->addr_mask + 1, ret); + } + } +out: + rcu_read_unlock(); +} + +static void vfio_spapr_listener_region_add(MemoryListener *listener, + MemoryRegionSection *section) +{ + VFIOContainer *container = container_of(listener, VFIOContainer, + iommu_data.spapr.listener); + hwaddr iova; + Int128 llend; + VFIOGuestIOMMU *giommu; + + if (vfio_listener_skipped_section(section)) { + trace_vfio_listener_region_add_skip( + section->offset_within_address_space, + section->offset_within_address_space + + int128_get64(int128_sub(section->size, int128_one()))); + return; + } + + if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) != + (section->offset_within_region & ~TARGET_PAGE_MASK))) { + error_report("%s received unaligned region", __func__); + return; + } + + iova = TARGET_PAGE_ALIGN(section->offset_within_address_space); + llend = int128_make64(section->offset_within_address_space); + llend = int128_add(llend, section->size); + llend = int128_and(llend, int128_exts64(TARGET_PAGE_MASK)); + + if (int128_ge(int128_make64(iova), llend)) { + return; + } + + memory_region_ref(section->mr); + + trace_vfio_listener_region_add_iommu(iova, + int128_get64(int128_sub(llend, int128_one()))); + /* + * FIXME: We should do some checking to see if the + * capabilities of the host VFIO IOMMU are adequate to model + * the guest IOMMU + * + * FIXME: For VFIO iommu types which have KVM acceleration to + * avoid bouncing all map/unmaps through qemu this way, this + * would be the right place to wire that up (tell the KVM + * device emulation the VFIO iommu handles to use). + */ + /* + * This assumes that the guest IOMMU is empty of + * mappings at this point. + * + * One way of doing this is: + * 1. Avoid sharing IOMMUs between emulated devices or different + * IOMMU groups. + * 2. Implement VFIO_IOMMU_ENABLE in the host kernel to fail if + * there are some mappings in IOMMU. + * + * VFIO on SPAPR does that. Other IOMMU models may do that different, + * they must make sure there are no existing mappings or + * loop through existing mappings to map them into VFIO. + */ + giommu = g_malloc0(sizeof(*giommu)); + giommu->iommu = section->mr; + giommu->container = container; + giommu->n.notify = vfio_iommu_map_notify; + QLIST_INSERT_HEAD(&container->giommu_list, giommu, giommu_next); + memory_region_register_iommu_notifier(giommu->iommu, &giommu->n); +} + +static void vfio_spapr_listener_region_del(MemoryListener *listener, + MemoryRegionSection *section) +{ + VFIOContainer *container = container_of(listener, VFIOContainer, + iommu_data.spapr.listener); + hwaddr iova, end; + int ret; + VFIOGuestIOMMU *giommu; + + if (vfio_listener_skipped_section(section)) { + trace_vfio_listener_region_del_skip( + section->offset_within_address_space, + section->offset_within_address_space + + int128_get64(int128_sub(section->size, int128_one()))); + return; + } + + if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) != + (section->offset_within_region & ~TARGET_PAGE_MASK))) { + error_report("%s received unaligned region", __func__); + return; + } + + QLIST_FOREACH(giommu, &container->giommu_list, giommu_next) { + if (giommu->iommu == section->mr) { + memory_region_unregister_iommu_notifier(&giommu->n); + QLIST_REMOVE(giommu, giommu_next); + g_free(giommu); + break; + } + } + + /* + * FIXME: We assume the one big unmap below is adequate to + * remove any individual page mappings in the IOMMU which + * might have been copied into VFIO. This works for a page table + * based IOMMU where a big unmap flattens a large range of IO-PTEs. + * That may not be true for all IOMMU types. + */ + + iova = TARGET_PAGE_ALIGN(section->offset_within_address_space); + end = (section->offset_within_address_space + int128_get64(section->size)) & + TARGET_PAGE_MASK; + + if (iova >= end) { + return; + } + + trace_vfio_listener_region_del(iova, end - 1); + + ret = vfio_dma_unmap(container, iova, end - iova); + memory_region_unref(section->mr); + if (ret) { + error_report("vfio_dma_unmap(%p, 0x%"HWADDR_PRIx", " + "0x%"HWADDR_PRIx") = %d (%m)", + container, iova, end - iova, ret); + } +} + +static const MemoryListener vfio_spapr_memory_listener = { + .region_add = vfio_spapr_listener_region_add, + .region_del = vfio_spapr_listener_region_del, +}; + +static void vfio_spapr_listener_release(VFIOContainer *container) +{ + memory_listener_unregister(&container->iommu_data.spapr.listener); +} + +void spapr_memory_listener_register(VFIOContainer *container) +{ + container->iommu_data.spapr.listener = vfio_spapr_memory_listener; + container->iommu_data.release = vfio_spapr_listener_release; + + memory_listener_register(&container->iommu_data.spapr.listener, + container->space->as); +} diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h index 59a321d..d513a2f 100644 --- a/include/hw/vfio/vfio-common.h +++ b/include/hw/vfio/vfio-common.h @@ -70,6 +70,10 @@ typedef struct VFIOType1 { bool initialized; } VFIOType1; +typedef struct VFIOSPAPR { + MemoryListener listener; +} VFIOSPAPR; + typedef struct VFIOContainer { VFIOAddressSpace *space; int fd; /* /dev/vfio/vfio, empowered by the attached groups */ @@ -77,6 +81,7 @@ typedef struct VFIOContainer { /* enable abstraction to support various iommu backends */ union { VFIOType1 type1; + VFIOSPAPR spapr; }; void (*release)(struct VFIOContainer *); } iommu_data; @@ -146,4 +151,12 @@ extern const MemoryRegionOps vfio_region_ops; extern QLIST_HEAD(vfio_group_head, VFIOGroup) vfio_group_list; extern QLIST_HEAD(vfio_as_head, VFIOAddressSpace) vfio_address_spaces; +extern int vfio_dma_map(VFIOContainer *container, hwaddr iova, + ram_addr_t size, void *vaddr, bool readonly); +extern int vfio_dma_unmap(VFIOContainer *container, + hwaddr iova, ram_addr_t size); +bool vfio_listener_skipped_section(MemoryRegionSection *section); + +extern void spapr_memory_listener_register(VFIOContainer *container); + #endif /* !HW_VFIO_VFIO_COMMON_H */ -- 2.4.0.rc3.8.gfb3e7d5