Add the correspondent APIs for performing VFIO dirty tracking,
particularly VFIO_IOMMU_DIRTY_PAGES ioctl subcmds:
* VFIO_IOMMU_DIRTY_PAGES_FLAG_START: Start dirty tracking and allocates
                                     the area @dirty_bitmap
* VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP: Stop dirty tracking and frees
                                    the area @dirty_bitmap
* VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP: Fetch dirty bitmap while dirty
tracking is active.

Advertise the VFIO_IOMMU_TYPE1_INFO_CAP_MIGRATION
whereas it gets set the domain configured page size the same as
iopt::iova_alignment and maximum dirty bitmap size same
as VFIO. Compared to VFIO type1 iommu, the perpectual dirtying is
not implemented and userspace gets -EOPNOTSUPP which is handled by
today's userspace.

Move iommufd_get_pagesizes() definition prior to unmap for
iommufd_vfio_unmap_dma() dirty support to validate the user bitmap page
size against IOPT pagesize.

Signed-off-by: Joao Martins <joao.m.mart...@oracle.com>
---
 drivers/iommu/iommufd/vfio_compat.c | 221 ++++++++++++++++++++++++++--
 1 file changed, 209 insertions(+), 12 deletions(-)

diff --git a/drivers/iommu/iommufd/vfio_compat.c 
b/drivers/iommu/iommufd/vfio_compat.c
index dbe39404a105..2802f49cc10d 100644
--- a/drivers/iommu/iommufd/vfio_compat.c
+++ b/drivers/iommu/iommufd/vfio_compat.c
@@ -56,6 +56,16 @@ create_compat_ioas(struct iommufd_ctx *ictx)
        return ioas;
 }
 
+static u64 iommufd_get_pagesizes(struct iommufd_ioas *ioas)
+{
+       /* FIXME: See vfio_update_pgsize_bitmap(), for compat this should return
+        * the high bits too, and we need to decide if we should report that
+        * iommufd supports less than PAGE_SIZE alignment or stick to strict
+        * compatibility. qemu only cares about the first set bit.
+        */
+       return ioas->iopt.iova_alignment;
+}
+
 int iommufd_vfio_ioas(struct iommufd_ucmd *ucmd)
 {
        struct iommu_vfio_ioas *cmd = ucmd->cmd;
@@ -130,9 +140,14 @@ static int iommufd_vfio_unmap_dma(struct iommufd_ctx 
*ictx, unsigned int cmd,
                                  void __user *arg)
 {
        size_t minsz = offsetofend(struct vfio_iommu_type1_dma_unmap, size);
-       u32 supported_flags = VFIO_DMA_UNMAP_FLAG_ALL;
+       u32 supported_flags = VFIO_DMA_UNMAP_FLAG_ALL |
+               VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP;
+       struct iommufd_dirty_data dirty, *dirtyp = NULL;
        struct vfio_iommu_type1_dma_unmap unmap;
+       struct vfio_bitmap bitmap;
        struct iommufd_ioas *ioas;
+       unsigned long pgshift;
+       size_t pgsize;
        int rc;
 
        if (copy_from_user(&unmap, arg, minsz))
@@ -141,14 +156,53 @@ static int iommufd_vfio_unmap_dma(struct iommufd_ctx 
*ictx, unsigned int cmd,
        if (unmap.argsz < minsz || unmap.flags & ~supported_flags)
                return -EINVAL;
 
+       if (unmap.flags & VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP) {
+               unsigned long npages;
+
+               if (copy_from_user(&bitmap,
+                                  (void __user *)(arg + minsz),
+                                  sizeof(bitmap)))
+                       return -EFAULT;
+
+               if (!access_ok((void __user *)bitmap.data, bitmap.size))
+                       return -EINVAL;
+
+               pgshift = __ffs(bitmap.pgsize);
+               npages = unmap.size >> pgshift;
+
+               if (!npages || !bitmap.size ||
+                   (bitmap.size > DIRTY_BITMAP_SIZE_MAX) ||
+                   (bitmap.size < dirty_bitmap_bytes(npages)))
+                       return -EINVAL;
+
+               dirty.iova = unmap.iova;
+               dirty.length = unmap.size;
+               dirty.data = bitmap.data;
+               dirty.page_size = 1 << pgshift;
+               dirtyp = &dirty;
+       }
+
        ioas = get_compat_ioas(ictx);
        if (IS_ERR(ioas))
                return PTR_ERR(ioas);
 
+       pgshift = __ffs(iommufd_get_pagesizes(ioas)),
+       pgsize = (size_t)1 << pgshift;
+
+       /* When dirty tracking is enabled, allow only min supported pgsize */
+       if ((unmap.flags & VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP) &&
+           (bitmap.pgsize != pgsize)) {
+               rc = -EINVAL;
+               goto out_put;
+       }
+
        if (unmap.flags & VFIO_DMA_UNMAP_FLAG_ALL)
                rc = iopt_unmap_all(&ioas->iopt);
        else
-               rc = iopt_unmap_iova(&ioas->iopt, unmap.iova, unmap.size, NULL);
+               rc = iopt_unmap_iova(&ioas->iopt, unmap.iova, unmap.size,
+                                    dirtyp);
+
+out_put:
        iommufd_put_object(&ioas->obj);
        return rc;
 }
@@ -222,16 +276,6 @@ static int iommufd_vfio_set_iommu(struct iommufd_ctx 
*ictx, unsigned long type)
        return 0;
 }
 
-static u64 iommufd_get_pagesizes(struct iommufd_ioas *ioas)
-{
-       /* FIXME: See vfio_update_pgsize_bitmap(), for compat this should return
-        * the high bits too, and we need to decide if we should report that
-        * iommufd supports less than PAGE_SIZE alignment or stick to strict
-        * compatibility. qemu only cares about the first set bit.
-        */
-       return ioas->iopt.iova_alignment;
-}
-
 static int iommufd_fill_cap_iova(struct iommufd_ioas *ioas,
                                 struct vfio_info_cap_header __user *cur,
                                 size_t avail)
@@ -289,6 +333,26 @@ static int iommufd_fill_cap_dma_avail(struct iommufd_ioas 
*ioas,
        return sizeof(cap_dma);
 }
 
+static int iommufd_fill_cap_migration(struct iommufd_ioas *ioas,
+                                     struct vfio_info_cap_header __user *cur,
+                                     size_t avail)
+{
+       struct vfio_iommu_type1_info_cap_migration cap_mig = {
+               .header = {
+                       .id = VFIO_IOMMU_TYPE1_INFO_CAP_MIGRATION,
+                       .version = 1,
+               },
+               .flags = 0,
+               .pgsize_bitmap = (size_t) 1 << 
__ffs(iommufd_get_pagesizes(ioas)),
+               .max_dirty_bitmap_size = DIRTY_BITMAP_SIZE_MAX,
+       };
+
+       if (avail >= sizeof(cap_mig) &&
+           copy_to_user(cur, &cap_mig, sizeof(cap_mig)))
+               return -EFAULT;
+       return sizeof(cap_mig);
+}
+
 static int iommufd_vfio_iommu_get_info(struct iommufd_ctx *ictx,
                                       void __user *arg)
 {
@@ -298,6 +362,7 @@ static int iommufd_vfio_iommu_get_info(struct iommufd_ctx 
*ictx,
        static const fill_cap_fn fill_fns[] = {
                iommufd_fill_cap_iova,
                iommufd_fill_cap_dma_avail,
+               iommufd_fill_cap_migration,
        };
        size_t minsz = offsetofend(struct vfio_iommu_type1_info, iova_pgsizes);
        struct vfio_info_cap_header __user *last_cap = NULL;
@@ -364,6 +429,137 @@ static int iommufd_vfio_iommu_get_info(struct iommufd_ctx 
*ictx,
        return rc;
 }
 
+static int iommufd_vfio_dirty_pages_start(struct iommufd_ctx *ictx,
+                               struct vfio_iommu_type1_dirty_bitmap *dirty)
+{
+       struct iommufd_ioas *ioas;
+       int ret = -EINVAL;
+
+       ioas = get_compat_ioas(ictx);
+       if (IS_ERR(ioas))
+               return PTR_ERR(ioas);
+
+       ret = iopt_set_dirty_tracking(&ioas->iopt, NULL, true);
+
+       iommufd_put_object(&ioas->obj);
+
+       return ret;
+}
+
+static int iommufd_vfio_dirty_pages_stop(struct iommufd_ctx *ictx,
+                               struct vfio_iommu_type1_dirty_bitmap *dirty)
+{
+       struct iommufd_ioas *ioas;
+       int ret;
+
+       ioas = get_compat_ioas(ictx);
+       if (IS_ERR(ioas))
+               return PTR_ERR(ioas);
+
+       ret = iopt_set_dirty_tracking(&ioas->iopt, NULL, false);
+
+       iommufd_put_object(&ioas->obj);
+
+       return ret;
+}
+
+static int iommufd_vfio_dirty_pages_get_bitmap(struct iommufd_ctx *ictx,
+                               struct vfio_iommu_type1_dirty_bitmap_get *range)
+{
+       struct iommufd_dirty_data bitmap;
+       uint64_t npages, bitmap_size;
+       struct iommufd_ioas *ioas;
+       unsigned long pgshift;
+       size_t iommu_pgsize;
+       int ret = -EINVAL;
+
+       ioas = get_compat_ioas(ictx);
+       if (IS_ERR(ioas))
+               return PTR_ERR(ioas);
+
+       down_read(&ioas->iopt.iova_rwsem);
+       pgshift = __ffs(range->bitmap.pgsize);
+       npages = range->size >> pgshift;
+       bitmap_size = range->bitmap.size;
+
+       if (!npages || !bitmap_size || (bitmap_size > DIRTY_BITMAP_SIZE_MAX) ||
+           (bitmap_size < dirty_bitmap_bytes(npages)))
+               goto out_put;
+
+       iommu_pgsize = 1 << __ffs(iommufd_get_pagesizes(ioas));
+
+       /* allow only smallest supported pgsize */
+       if (range->bitmap.pgsize != iommu_pgsize)
+               goto out_put;
+
+       if (range->iova & (iommu_pgsize - 1))
+               goto out_put;
+
+       if (!range->size || range->size & (iommu_pgsize - 1))
+               goto out_put;
+
+       bitmap.iova = range->iova;
+       bitmap.length = range->size;
+       bitmap.data = range->bitmap.data;
+       bitmap.page_size = 1 << pgshift;
+
+       ret = iopt_read_and_clear_dirty_data(&ioas->iopt, NULL, &bitmap);
+
+out_put:
+       up_read(&ioas->iopt.iova_rwsem);
+       iommufd_put_object(&ioas->obj);
+       return ret;
+}
+
+static int iommufd_vfio_dirty_pages(struct iommufd_ctx *ictx, unsigned int cmd,
+                                   void __user *arg)
+{
+       size_t minsz = offsetofend(struct vfio_iommu_type1_dirty_bitmap, flags);
+       struct vfio_iommu_type1_dirty_bitmap dirty;
+       u32 supported_flags = VFIO_IOMMU_DIRTY_PAGES_FLAG_START |
+                       VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP |
+                       VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP;
+       int ret = 0;
+
+       if (copy_from_user(&dirty, (void __user *)arg, minsz))
+               return -EFAULT;
+
+       if (dirty.argsz < minsz || dirty.flags & ~supported_flags)
+               return -EINVAL;
+
+       /* only one flag should be set at a time */
+       if (__ffs(dirty.flags) != __fls(dirty.flags))
+               return -EINVAL;
+
+       if (dirty.flags & VFIO_IOMMU_DIRTY_PAGES_FLAG_START) {
+               ret = iommufd_vfio_dirty_pages_start(ictx, &dirty);
+       } else if (dirty.flags & VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP) {
+               ret = iommufd_vfio_dirty_pages_stop(ictx, &dirty);
+       } else if (dirty.flags & VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP) {
+               struct vfio_iommu_type1_dirty_bitmap_get range;
+               size_t data_size = dirty.argsz - minsz;
+
+               if (!data_size || data_size < sizeof(range))
+                       return -EINVAL;
+
+               if (copy_from_user(&range, (void __user *)(arg + minsz),
+                                  sizeof(range)))
+                       return -EFAULT;
+
+               if (range.iova + range.size < range.iova)
+                       return -EINVAL;
+
+               if (!access_ok((void __user *)range.bitmap.data,
+                              range.bitmap.size))
+                       return -EINVAL;
+
+               ret = iommufd_vfio_dirty_pages_get_bitmap(ictx, &range);
+       }
+
+       return ret;
+}
+
+
 /* FIXME TODO:
 PowerPC SPAPR only:
 #define VFIO_IOMMU_ENABLE      _IO(VFIO_TYPE, VFIO_BASE + 15)
@@ -394,6 +590,7 @@ int iommufd_vfio_ioctl(struct iommufd_ctx *ictx, unsigned 
int cmd,
        case VFIO_IOMMU_UNMAP_DMA:
                return iommufd_vfio_unmap_dma(ictx, cmd, uarg);
        case VFIO_IOMMU_DIRTY_PAGES:
+               return iommufd_vfio_dirty_pages(ictx, cmd, uarg);
        default:
                return -ENOIOCTLCMD;
        }
-- 
2.17.2

_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

Reply via email to