This patch allows the user-space to retrieve the supported
IOVA range(s), excluding any reserved regions. The implementation
is based on capability chains, added to the VFIO_IOMMU_GET_INFO ioctl.

This is following the discussions here[1] and is based on the RFC patch[2].

ToDo:
 - This currently derives the default supported iova range from the first
   iommu domain. This needs to be changed to go through the domain_list
   instead.
 - Sync with Pierre's patch[3].

1.https://lists.gnu.org/archive/html/qemu-devel/2017-11/msg03651.html
2.https://lists.linuxfoundation.org/pipermail/iommu/2016-November/019002.html
3.https://patchwork.kernel.org/patch/10084655/

Signed-off-by: Shameer Kolothum <shameerali.kolothum.th...@huawei.com>
---
 drivers/vfio/vfio_iommu_type1.c | 172 +++++++++++++++++++++++++++++++++++++++-
 include/uapi/linux/vfio.h       |  13 +++
 2 files changed, 184 insertions(+), 1 deletion(-)

diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index e30e29a..72ca78a 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -28,6 +28,7 @@
 #include <linux/device.h>
 #include <linux/fs.h>
 #include <linux/iommu.h>
+#include <linux/list_sort.h>
 #include <linux/module.h>
 #include <linux/mm.h>
 #include <linux/rbtree.h>
@@ -92,6 +93,12 @@ struct vfio_group {
        struct list_head        next;
 };
 
+struct vfio_iommu_iova {
+       struct list_head        list;
+       phys_addr_t             start;
+       phys_addr_t             end;
+};
+
 /*
  * Guest RAM pinning working set or DMA target
  */
@@ -1537,6 +1544,144 @@ static int vfio_domains_have_iommu_cache(struct 
vfio_iommu *iommu)
        return ret;
 }
 
+static int vfio_add_iova_cap(struct vfio_info_cap *caps, u64 start, u64 end)
+{
+       struct vfio_iommu_type1_info_cap_iova_range *cap;
+       struct vfio_info_cap_header *header;
+
+       header = vfio_info_cap_add(caps, sizeof(*cap),
+                       VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE, 1);
+       if (IS_ERR(header))
+               return PTR_ERR(header);
+
+       cap = container_of(header,
+                          struct vfio_iommu_type1_info_cap_iova_range,
+                          header);
+
+       cap->start = start;
+       cap->end = end;
+
+       return 0;
+}
+
+static int vfio_insert_iova(phys_addr_t start, phys_addr_t end,
+                               struct list_head *head)
+{
+       struct vfio_iommu_iova *region;
+
+       region = kzalloc(sizeof(*region), GFP_KERNEL);
+       if (!region)
+               return -ENOMEM;
+
+       INIT_LIST_HEAD(&region->list);
+       region->start = start;
+       region->end = end;
+
+       list_add_tail(&region->list, head);
+       return 0;
+}
+
+/*
+ * Check and update iova region list in case a reserved region
+ * overlaps the iommu iova range.
+ */
+static int vfio_update_iommu_iova_range(phys_addr_t start, phys_addr_t end,
+                                       struct list_head *iova)
+{
+       struct vfio_iommu_iova *node;
+       phys_addr_t a, b;
+       int ret = 0;
+
+       if (list_empty(iova))
+               return -ENODEV;
+
+       node = list_last_entry(iova, struct vfio_iommu_iova, list);
+       a = node->start;
+       b = node->end;
+
+       /* No overlap */
+       if ((start > b) || (end < a))
+               return 0;
+
+       if (start > a)
+               ret = vfio_insert_iova(a, start - 1, &node->list);
+       if (ret)
+               goto done;
+       if (end < b)
+               ret = vfio_insert_iova(end + 1, b, &node->list);
+
+done:
+       list_del(&node->list);
+       kfree(node);
+
+       return ret;
+}
+
+static int vfio_resv_cmp(void *priv, struct list_head *a, struct list_head *b)
+{
+       struct iommu_resv_region *ra, *rb;
+
+       ra = container_of(a, struct iommu_resv_region, list);
+       rb = container_of(b, struct iommu_resv_region, list);
+
+       if (ra->start < rb->start)
+               return -1;
+       if (ra->start > rb->start)
+               return 1;
+       return 0;
+}
+
+static int vfio_build_iommu_iova_caps(struct vfio_iommu *iommu,
+                               struct vfio_info_cap *caps)
+{
+       struct iommu_resv_region *resv, *resv_next;
+       struct vfio_iommu_iova *iova, *iova_next;
+       struct list_head group_resv_regions, vfio_iova_regions;
+       struct vfio_domain *domain;
+       struct vfio_group *g;
+       phys_addr_t start, end;
+       int ret = 0;
+
+       domain = list_first_entry(&iommu->domain_list,
+                                 struct vfio_domain, next);
+       /* Get the default iova range supported */
+       start = domain->domain->geometry.aperture_start;
+       end = domain->domain->geometry.aperture_end;
+       INIT_LIST_HEAD(&vfio_iova_regions);
+       vfio_insert_iova(start, end, &vfio_iova_regions);
+
+       /* Get reserved regions if any */
+       INIT_LIST_HEAD(&group_resv_regions);
+       list_for_each_entry(g, &domain->group_list, next)
+               iommu_get_group_resv_regions(g->iommu_group,
+                                               &group_resv_regions);
+       list_sort(NULL, &group_resv_regions, vfio_resv_cmp);
+
+       /* Update iova range excluding reserved regions */
+       list_for_each_entry(resv, &group_resv_regions, list) {
+               ret = vfio_update_iommu_iova_range(resv->start,
+                               resv->start + resv->length - 1,
+                               &vfio_iova_regions);
+               if (ret)
+                       goto done;
+       }
+
+       list_for_each_entry(iova, &vfio_iova_regions, list) {
+               ret = vfio_add_iova_cap(caps, iova->start, iova->end);
+               if (ret)
+                       goto done;
+       }
+
+done:
+       list_for_each_entry_safe(resv, resv_next, &group_resv_regions, list)
+               kfree(resv);
+
+       list_for_each_entry_safe(iova, iova_next, &vfio_iova_regions, list)
+               kfree(iova);
+
+       return ret;
+}
+
 static long vfio_iommu_type1_ioctl(void *iommu_data,
                                   unsigned int cmd, unsigned long arg)
 {
@@ -1558,8 +1703,10 @@ static long vfio_iommu_type1_ioctl(void *iommu_data,
                }
        } else if (cmd == VFIO_IOMMU_GET_INFO) {
                struct vfio_iommu_type1_info info;
+               struct vfio_info_cap caps = { .buf = NULL, .size = 0 };
+               int ret;
 
-               minsz = offsetofend(struct vfio_iommu_type1_info, iova_pgsizes);
+               minsz = offsetofend(struct vfio_iommu_type1_info, cap_offset);
 
                if (copy_from_user(&info, (void __user *)arg, minsz))
                        return -EFAULT;
@@ -1571,6 +1718,29 @@ static long vfio_iommu_type1_ioctl(void *iommu_data,
 
                info.iova_pgsizes = vfio_pgsize_bitmap(iommu);
 
+               ret = vfio_build_iommu_iova_caps(iommu, &caps);
+               if (ret)
+                       return ret;
+
+               if (caps.size) {
+                       info.flags |= VFIO_IOMMU_INFO_CAPS;
+                       if (info.argsz < sizeof(info) + caps.size) {
+                               info.argsz = sizeof(info) + caps.size;
+                               info.cap_offset = 0;
+                       } else {
+                               vfio_info_cap_shift(&caps, sizeof(info));
+                               if (copy_to_user((void __user *)arg +
+                                               sizeof(info), caps.buf,
+                                               caps.size)) {
+                                       kfree(caps.buf);
+                                       return -EFAULT;
+                               }
+                               info.cap_offset = sizeof(info);
+                       }
+
+                       kfree(caps.buf);
+               }
+
                return copy_to_user((void __user *)arg, &info, minsz) ?
                        -EFAULT : 0;
 
diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index e3301db..c4e338b 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -517,7 +517,20 @@ struct vfio_iommu_type1_info {
        __u32   argsz;
        __u32   flags;
 #define VFIO_IOMMU_INFO_PGSIZES (1 << 0)       /* supported page sizes info */
+#define VFIO_IOMMU_INFO_CAPS   (1 << 1)        /* Info supports caps */
        __u64   iova_pgsizes;           /* Bitmap of supported page sizes */
+       __u32   cap_offset;     /* Offset within info struct of first cap */
+       __u32   __resv;
+};
+
+/*
+ * The IOVA_RANGE capability allows to report the IOVA range(s),
+ */
+#define VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE  1
+struct vfio_iommu_type1_info_cap_iova_range {
+       struct vfio_info_cap_header header;
+       __u64 start;
+       __u64 end;
 };
 
 #define VFIO_IOMMU_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 12)
-- 
1.9.1


Reply via email to