VFIO allows devices to be safely handed off to userspace by putting them behind an IOMMU configured to ensure DMA and interrupt isolation. This enables userspace KVM clients, such as kvmtool and qemu, to further map the device into a virtual machine.
With IOMMUs such as the ARM SMMU, it is then possible to provide SMMU translation services to the guest operating system, which are nested with the existing translation installed by VFIO. However, enabling this feature means that the IOMMU driver must be informed that the VFIO domain is being created for the purposes of nested translation. This patch adds a new IOMMU type (VFIO_TYPE1_NESTING_IOMMU) to the VFIO type-1 driver. The new IOMMU type acts identically to the VFIO_TYPE1v2_IOMMU type, but additionally sets the DOMAIN_ATTR_NESTING attribute on its IOMMU domains. Userspace can check whether nesting is actually available using the VFIO_CHECK_EXTENSION ioctl, in a similar manner to checking for cache-coherent DMA. Cc: Joerg Roedel <j...@8bytes.org> Cc: Alex Williamson <alex.william...@redhat.com> Signed-off-by: Will Deacon <will.dea...@arm.com> --- drivers/vfio/vfio_iommu_type1.c | 56 +++++++++++++++++++++++++++++++++++------ include/uapi/linux/vfio.h | 2 ++ 2 files changed, 51 insertions(+), 7 deletions(-) diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index 0734fbe5b651..24cfe69a3c83 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -53,11 +53,15 @@ module_param_named(disable_hugepages, MODULE_PARM_DESC(disable_hugepages, "Disable VFIO IOMMU support for IOMMU hugepages."); +/* Feature flags for VFIO Type-1 IOMMUs */ +#define VFIO_IOMMU_FEAT_V2 (1 << 0) +#define VFIO_IOMMU_FEAT_NESTING (1 << 1) + struct vfio_iommu { struct list_head domain_list; struct mutex lock; struct rb_root dma_list; - bool v2; + int features; }; struct vfio_domain { @@ -441,7 +445,7 @@ static int vfio_dma_do_unmap(struct vfio_iommu *iommu, * will only return success and a size of zero if there were no * mappings within the range. */ - if (iommu->v2) { + if (iommu->features & VFIO_IOMMU_FEAT_V2) { dma = vfio_find_dma(iommu, unmap->iova, 0); if (dma && dma->iova != unmap->iova) { ret = -EINVAL; @@ -455,7 +459,8 @@ static int vfio_dma_do_unmap(struct vfio_iommu *iommu, } while ((dma = vfio_find_dma(iommu, unmap->iova, unmap->size))) { - if (!iommu->v2 && unmap->iova > dma->iova) + if (!(iommu->features & VFIO_IOMMU_FEAT_V2) && + unmap->iova > dma->iova) break; unmapped += dma->size; vfio_remove_dma(iommu, dma); @@ -671,7 +676,7 @@ static int vfio_iommu_type1_attach_group(void *iommu_data, struct vfio_group *group, *g; struct vfio_domain *domain, *d; struct bus_type *bus = NULL; - int ret; + int ret, attr = 1; mutex_lock(&iommu->lock); @@ -705,6 +710,9 @@ static int vfio_iommu_type1_attach_group(void *iommu_data, goto out_free; } + if (iommu->features & VFIO_IOMMU_FEAT_NESTING) + iommu_domain_set_attr(domain->domain, DOMAIN_ATTR_NESTING, &attr); + ret = iommu_attach_group(domain->domain, iommu_group); if (ret) goto out_domain; @@ -818,9 +826,19 @@ done: static void *vfio_iommu_type1_open(unsigned long arg) { struct vfio_iommu *iommu; - - if (arg != VFIO_TYPE1_IOMMU && arg != VFIO_TYPE1v2_IOMMU) + int features = 0; + + switch (arg) { + case VFIO_TYPE1_IOMMU: + break; + case VFIO_TYPE1_NESTING_IOMMU: + features |= VFIO_IOMMU_FEAT_NESTING; + case VFIO_TYPE1v2_IOMMU: + features |= VFIO_IOMMU_FEAT_V2; + break; + default: return ERR_PTR(-EINVAL); + } iommu = kzalloc(sizeof(*iommu), GFP_KERNEL); if (!iommu) @@ -829,7 +847,7 @@ static void *vfio_iommu_type1_open(unsigned long arg) INIT_LIST_HEAD(&iommu->domain_list); iommu->dma_list = RB_ROOT; mutex_init(&iommu->lock); - iommu->v2 = (arg == VFIO_TYPE1v2_IOMMU); + iommu->features = features; return iommu; } @@ -875,6 +893,26 @@ static int vfio_domains_have_iommu_cache(struct vfio_iommu *iommu) return ret; } +static int vfio_domains_have_iommu_nesting(struct vfio_iommu *iommu) +{ + struct vfio_domain *domain; + int ret = 1; + + mutex_lock(&iommu->lock); + list_for_each_entry(domain, &iommu->domain_list, next) { + int nesting; + + if (iommu_domain_get_attr(domain->domain, DOMAIN_ATTR_NESTING, + &nesting) || !nesting) { + ret = 0; + break; + } + } + mutex_unlock(&iommu->lock); + + return ret; +} + static long vfio_iommu_type1_ioctl(void *iommu_data, unsigned int cmd, unsigned long arg) { @@ -886,6 +924,10 @@ static long vfio_iommu_type1_ioctl(void *iommu_data, case VFIO_TYPE1_IOMMU: case VFIO_TYPE1v2_IOMMU: return 1; + case VFIO_TYPE1_NESTING_IOMMU: + if (!iommu) + return 1; + return vfio_domains_have_iommu_nesting(iommu); case VFIO_DMA_CC_IOMMU: if (!iommu) return 0; diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index cb9023d4f063..babcb33a2756 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -24,11 +24,13 @@ #define VFIO_TYPE1_IOMMU 1 #define VFIO_SPAPR_TCE_IOMMU 2 #define VFIO_TYPE1v2_IOMMU 3 + /* * IOMMU enforces DMA cache coherence (ex. PCIe NoSnoop stripping). This * capability is subject to change as groups are added or removed. */ #define VFIO_DMA_CC_IOMMU 4 +#define VFIO_TYPE1_NESTING_IOMMU 5 /* Implies v2 */ /* * The IOCTL interface is designed for extensibility by embedding the -- 2.0.0 _______________________________________________ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu