VFIO allows devices to be safely handed off to userspace by putting
them behind an IOMMU configured to ensure DMA and interrupt isolation.
This enables userspace KVM clients, such as kvmtool and qemu, to further
map the device into a virtual machine.

With IOMMUs such as the ARM SMMU, it is then possible to provide SMMU
translation services to the guest operating system, which are nested
with the existing translation installed by VFIO. However, enabling this
feature means that the IOMMU driver must be informed that the VFIO domain
is being created for the purposes of nested translation.

This patch adds a new IOMMU type (VFIO_TYPE1_NESTING_IOMMU) to the VFIO
type-1 driver. The new IOMMU type acts identically to the
VFIO_TYPE1v2_IOMMU type, but additionally sets the DOMAIN_ATTR_NESTING
attribute on its IOMMU domains. Userspace can check whether nesting is
actually available using the VFIO_CHECK_EXTENSION ioctl, in a similar
manner to checking for cache-coherent DMA.

Cc: Joerg Roedel <j...@8bytes.org>
Cc: Alex Williamson <alex.william...@redhat.com>
Signed-off-by: Will Deacon <will.dea...@arm.com>
---
 drivers/vfio/vfio_iommu_type1.c | 56 +++++++++++++++++++++++++++++++++++------
 include/uapi/linux/vfio.h       |  2 ++
 2 files changed, 51 insertions(+), 7 deletions(-)

diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index 0734fbe5b651..24cfe69a3c83 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -53,11 +53,15 @@ module_param_named(disable_hugepages,
 MODULE_PARM_DESC(disable_hugepages,
                 "Disable VFIO IOMMU support for IOMMU hugepages.");
 
+/* Feature flags for VFIO Type-1 IOMMUs */
+#define VFIO_IOMMU_FEAT_V2     (1 << 0)
+#define VFIO_IOMMU_FEAT_NESTING        (1 << 1)
+
 struct vfio_iommu {
        struct list_head        domain_list;
        struct mutex            lock;
        struct rb_root          dma_list;
-       bool v2;
+       int                     features;
 };
 
 struct vfio_domain {
@@ -441,7 +445,7 @@ static int vfio_dma_do_unmap(struct vfio_iommu *iommu,
         * will only return success and a size of zero if there were no
         * mappings within the range.
         */
-       if (iommu->v2) {
+       if (iommu->features & VFIO_IOMMU_FEAT_V2) {
                dma = vfio_find_dma(iommu, unmap->iova, 0);
                if (dma && dma->iova != unmap->iova) {
                        ret = -EINVAL;
@@ -455,7 +459,8 @@ static int vfio_dma_do_unmap(struct vfio_iommu *iommu,
        }
 
        while ((dma = vfio_find_dma(iommu, unmap->iova, unmap->size))) {
-               if (!iommu->v2 && unmap->iova > dma->iova)
+               if (!(iommu->features & VFIO_IOMMU_FEAT_V2) &&
+                   unmap->iova > dma->iova)
                        break;
                unmapped += dma->size;
                vfio_remove_dma(iommu, dma);
@@ -671,7 +676,7 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
        struct vfio_group *group, *g;
        struct vfio_domain *domain, *d;
        struct bus_type *bus = NULL;
-       int ret;
+       int ret, attr = 1;
 
        mutex_lock(&iommu->lock);
 
@@ -705,6 +710,9 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
                goto out_free;
        }
 
+       if (iommu->features & VFIO_IOMMU_FEAT_NESTING)
+               iommu_domain_set_attr(domain->domain, DOMAIN_ATTR_NESTING, 
&attr);
+
        ret = iommu_attach_group(domain->domain, iommu_group);
        if (ret)
                goto out_domain;
@@ -818,9 +826,19 @@ done:
 static void *vfio_iommu_type1_open(unsigned long arg)
 {
        struct vfio_iommu *iommu;
-
-       if (arg != VFIO_TYPE1_IOMMU && arg != VFIO_TYPE1v2_IOMMU)
+       int features = 0;
+
+       switch (arg) {
+       case VFIO_TYPE1_IOMMU:
+               break;
+       case VFIO_TYPE1_NESTING_IOMMU:
+               features |= VFIO_IOMMU_FEAT_NESTING;
+       case VFIO_TYPE1v2_IOMMU:
+               features |= VFIO_IOMMU_FEAT_V2;
+               break;
+       default:
                return ERR_PTR(-EINVAL);
+       }
 
        iommu = kzalloc(sizeof(*iommu), GFP_KERNEL);
        if (!iommu)
@@ -829,7 +847,7 @@ static void *vfio_iommu_type1_open(unsigned long arg)
        INIT_LIST_HEAD(&iommu->domain_list);
        iommu->dma_list = RB_ROOT;
        mutex_init(&iommu->lock);
-       iommu->v2 = (arg == VFIO_TYPE1v2_IOMMU);
+       iommu->features = features;
 
        return iommu;
 }
@@ -875,6 +893,26 @@ static int vfio_domains_have_iommu_cache(struct vfio_iommu 
*iommu)
        return ret;
 }
 
+static int vfio_domains_have_iommu_nesting(struct vfio_iommu *iommu)
+{
+       struct vfio_domain *domain;
+       int ret = 1;
+
+       mutex_lock(&iommu->lock);
+       list_for_each_entry(domain, &iommu->domain_list, next) {
+               int nesting;
+
+               if (iommu_domain_get_attr(domain->domain, DOMAIN_ATTR_NESTING,
+                                         &nesting) || !nesting) {
+                       ret = 0;
+                       break;
+               }
+       }
+       mutex_unlock(&iommu->lock);
+
+       return ret;
+}
+
 static long vfio_iommu_type1_ioctl(void *iommu_data,
                                   unsigned int cmd, unsigned long arg)
 {
@@ -886,6 +924,10 @@ static long vfio_iommu_type1_ioctl(void *iommu_data,
                case VFIO_TYPE1_IOMMU:
                case VFIO_TYPE1v2_IOMMU:
                        return 1;
+               case VFIO_TYPE1_NESTING_IOMMU:
+                       if (!iommu)
+                               return 1;
+                       return vfio_domains_have_iommu_nesting(iommu);
                case VFIO_DMA_CC_IOMMU:
                        if (!iommu)
                                return 0;
diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index cb9023d4f063..babcb33a2756 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -24,11 +24,13 @@
 #define VFIO_TYPE1_IOMMU               1
 #define VFIO_SPAPR_TCE_IOMMU           2
 #define VFIO_TYPE1v2_IOMMU             3
+
 /*
  * IOMMU enforces DMA cache coherence (ex. PCIe NoSnoop stripping).  This
  * capability is subject to change as groups are added or removed.
  */
 #define VFIO_DMA_CC_IOMMU              4
+#define VFIO_TYPE1_NESTING_IOMMU       5       /* Implies v2 */
 
 /*
  * The IOCTL interface is designed for extensibility by embedding the
-- 
2.0.0

_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

Reply via email to