This is the way user to invoke hot-reset for the devices opened by cdev
interface. User should check the flag VFIO_PCI_HOT_RESET_FLAG_RESETTABLE
in the output of VFIO_DEVICE_GET_PCI_HOT_RESET_INFO ioctl before doing
hot-reset for cdev devices.

Suggested-by: Jason Gunthorpe <j...@nvidia.com>
Signed-off-by: Jason Gunthorpe <j...@nvidia.com>
Reviewed-by: Jason Gunthorpe <j...@nvidia.com>
Tested-by: Yanting Jiang <yanting.ji...@intel.com>
Signed-off-by: Yi Liu <yi.l....@intel.com>
---
 drivers/vfio/pci/vfio_pci_core.c | 66 +++++++++++++++++++++++++++-----
 include/uapi/linux/vfio.h        | 22 +++++++++++
 2 files changed, 79 insertions(+), 9 deletions(-)

diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c
index 43858d471447..f70e3b948b16 100644
--- a/drivers/vfio/pci/vfio_pci_core.c
+++ b/drivers/vfio/pci/vfio_pci_core.c
@@ -180,7 +180,8 @@ static void vfio_pci_probe_mmaps(struct 
vfio_pci_core_device *vdev)
 struct vfio_pci_group_info;
 static void vfio_pci_dev_set_try_reset(struct vfio_device_set *dev_set);
 static int vfio_pci_dev_set_hot_reset(struct vfio_device_set *dev_set,
-                                     struct vfio_pci_group_info *groups);
+                                     struct vfio_pci_group_info *groups,
+                                     struct iommufd_ctx *iommufd_ctx);
 
 /*
  * INTx masking requires the ability to disable INTx signaling via PCI_COMMAND
@@ -1364,8 +1365,7 @@ vfio_pci_ioctl_pci_hot_reset_groups(struct 
vfio_pci_core_device *vdev,
        if (ret)
                return ret;
 
-       /* Somewhere between 1 and count is OK */
-       if (!array_count || array_count > count)
+       if (array_count > count)
                return -EINVAL;
 
        group_fds = kcalloc(array_count, sizeof(*group_fds), GFP_KERNEL);
@@ -1414,7 +1414,7 @@ vfio_pci_ioctl_pci_hot_reset_groups(struct 
vfio_pci_core_device *vdev,
        info.count = array_count;
        info.files = files;
 
-       ret = vfio_pci_dev_set_hot_reset(vdev->vdev.dev_set, &info);
+       ret = vfio_pci_dev_set_hot_reset(vdev->vdev.dev_set, &info, NULL);
 
 hot_reset_release:
        for (file_idx--; file_idx >= 0; file_idx--)
@@ -1429,6 +1429,7 @@ static int vfio_pci_ioctl_pci_hot_reset(struct 
vfio_pci_core_device *vdev,
 {
        unsigned long minsz = offsetofend(struct vfio_pci_hot_reset, count);
        struct vfio_pci_hot_reset hdr;
+       struct iommufd_ctx *iommufd;
        bool slot = false;
 
        if (copy_from_user(&hdr, arg, minsz))
@@ -1443,7 +1444,12 @@ static int vfio_pci_ioctl_pci_hot_reset(struct 
vfio_pci_core_device *vdev,
        else if (pci_probe_reset_bus(vdev->pdev->bus))
                return -ENODEV;
 
-       return vfio_pci_ioctl_pci_hot_reset_groups(vdev, hdr.count, slot, arg);
+       if (hdr.count)
+               return vfio_pci_ioctl_pci_hot_reset_groups(vdev, hdr.count, 
slot, arg);
+
+       iommufd = vfio_iommufd_physical_ictx(&vdev->vdev);
+
+       return vfio_pci_dev_set_hot_reset(vdev->vdev.dev_set, NULL, iommufd);
 }
 
 static int vfio_pci_ioctl_ioeventfd(struct vfio_pci_core_device *vdev,
@@ -2415,6 +2421,9 @@ static bool vfio_dev_in_groups(struct 
vfio_pci_core_device *vdev,
 {
        unsigned int i;
 
+       if (!groups)
+               return false;
+
        for (i = 0; i < groups->count; i++)
                if (vfio_file_has_dev(groups->files[i], &vdev->vdev))
                        return true;
@@ -2488,13 +2497,38 @@ static int vfio_pci_dev_set_pm_runtime_get(struct 
vfio_device_set *dev_set)
        return ret;
 }
 
+static bool vfio_dev_in_iommufd_ctx(struct vfio_pci_core_device *vdev,
+                                   struct iommufd_ctx *iommufd_ctx)
+{
+       struct iommufd_ctx *iommufd = vfio_iommufd_physical_ictx(&vdev->vdev);
+       struct iommu_group *iommu_group;
+
+       if (!iommufd_ctx)
+               return false;
+
+       if (iommufd == iommufd_ctx)
+               return true;
+
+       iommu_group = iommu_group_get(vdev->vdev.dev);
+       if (!iommu_group)
+               return false;
+
+       /*
+        * Try to check if any device within iommu_group is bound with
+        * the input iommufd_ctx.
+        */
+       return vfio_devset_iommufd_has_group(vdev->vdev.dev_set,
+                                            iommufd_ctx, iommu_group);
+}
+
 /*
  * We need to get memory_lock for each device, but devices can share mmap_lock,
  * therefore we need to zap and hold the vma_lock for each device, and only 
then
  * get each memory_lock.
  */
 static int vfio_pci_dev_set_hot_reset(struct vfio_device_set *dev_set,
-                                     struct vfio_pci_group_info *groups)
+                                     struct vfio_pci_group_info *groups,
+                                     struct iommufd_ctx *iommufd_ctx)
 {
        struct vfio_pci_core_device *cur_mem;
        struct vfio_pci_core_device *cur_vma;
@@ -2525,10 +2559,24 @@ static int vfio_pci_dev_set_hot_reset(struct 
vfio_device_set *dev_set,
 
        list_for_each_entry(cur_vma, &dev_set->device_list, vdev.dev_set_list) {
                /*
-                * Test whether all the affected devices are contained by the
-                * set of groups provided by the user.
+                * Test whether all the affected devices can be reset by the
+                * user.
+                *
+                * If user provides a set of groups, all the opened devices
+                * in the dev_set should be contained by the set of groups
+                * provided by the user.
+                *
+                * If user provides a zero-length group fd array, then all
+                * the affected devices must be bound to same iommufd_ctx as
+                * the input iommufd_ctx.  If there is device that has not
+                * been bound to iommufd_ctx yet, shall check if there is any
+                * device within its iommu_group that has been bound to the
+                * input iommufd_ctx.
+                *
+                * Otherwise, reset is not allowed.
                 */
-               if (!vfio_dev_in_groups(cur_vma, groups)) {
+               if (!vfio_dev_in_groups(cur_vma, groups) &&
+                   !vfio_dev_in_iommufd_ctx(cur_vma, iommufd_ctx)) {
                        ret = -EINVAL;
                        goto err_undo;
                }
diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index 4b4e2c28984b..1241d02d8701 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -710,6 +710,28 @@ struct vfio_pci_hot_reset_info {
  * VFIO_DEVICE_PCI_HOT_RESET - _IOW(VFIO_TYPE, VFIO_BASE + 13,
  *                                 struct vfio_pci_hot_reset)
  *
+ * Userspace requests hot reset for the devices it operates.  Due to the
+ * underlying topology, multiple devices can be affected in the reset
+ * while some might be opened by another user.  To avoid interference
+ * the calling user must ensure all affected devices are owned by itself.
+ * The ownership proof needs to refer the output of
+ * VFIO_DEVICE_GET_PCI_HOT_RESET_INFO.  Ownership can be proved as:
+ *
+ *   1) An array of group fds - This is used for the devices opened via
+ *                             the group/container interface.
+ *   2) A zero-length array - This is used for the devices opened via
+ *                           the cdev interface.  User should check the
+ *                           flag VFIO_PCI_HOT_RESET_FLAG_IOMMUFD_DEV_ID
+ *                           and flag VFIO_PCI_HOT_RESET_FLAG_RESETTABLE
+ *                           before using this method.
+ *
+ * In case a non void group fd array is passed, the devices affected by
+ * the reset must belong to those opened VFIO groups.  In case a zero
+ * length array is passed, the other devices affected by the reset, if
+ * any, must be either bound to the same iommufd as this VFIO device or
+ * in the same iommu_group with a device that does.  Either of the two
+ * methods is applied to check the feasibility of the hot reset.
+ *
  * Return: 0 on success, -errno on failure.
  */
 struct vfio_pci_hot_reset {
-- 
2.34.1

Reply via email to