Jason Gunthorpe <j...@nvidia.com> writes:

....

>> tsm_unbind in vdevice_destroy:
>> 
>> vdevice_destroy() ends up calling tsm_unbind() while holding only the
>> vdev_lock. At first glance, this seems unsafe. But in practice, it's
>> fine because the corresponding iommufd_device has already been destroyed
>> when the VFIO device file descriptor was closed—triggering
>> vfio_df_iommufd_unbind().
>
> This needs some kind of fixing the idevice should destroy the vdevices
> during idevice destruction so we don't get this out of order where the
> idevice is destroyed before the vdevice.
>
> This should be a separate patch as it is an immediate bug fix..
>

Something like below?

diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c
index 86244403b532..a49b293bd516 100644
--- a/drivers/iommu/iommufd/device.c
+++ b/drivers/iommu/iommufd/device.c
@@ -221,6 +221,8 @@ struct iommufd_device *iommufd_device_bind(struct 
iommufd_ctx *ictx,
        refcount_inc(&idev->obj.users);
        /* igroup refcount moves into iommufd_device */
        idev->igroup = igroup;
+       idev->vdev   = NULL;
+       mutex_init(&idev->lock);
 
        /*
         * If the caller fails after this success it must call
@@ -282,6 +284,12 @@ EXPORT_SYMBOL_NS_GPL(iommufd_ctx_has_group, "IOMMUFD");
  */
 void iommufd_device_unbind(struct iommufd_device *idev)
 {
+       /* this will be unlocked while destroying the idev obj */
+       mutex_lock(&idev->lock);
+
+       if (idev->vdev)
+               /* extra refcount taken during vdevice alloc */
+               iommufd_object_destroy_user(idev->ictx, &idev->vdev->obj);
        iommufd_object_destroy_user(idev->ictx, &idev->obj);
 }
 EXPORT_SYMBOL_NS_GPL(iommufd_device_unbind, "IOMMUFD");
diff --git a/drivers/iommu/iommufd/iommufd_private.h 
b/drivers/iommu/iommufd/iommufd_private.h
index 9ccc83341f32..d85bd8b38751 100644
--- a/drivers/iommu/iommufd/iommufd_private.h
+++ b/drivers/iommu/iommufd/iommufd_private.h
@@ -425,6 +425,10 @@ struct iommufd_device {
        /* always the physical device */
        struct device *dev;
        bool enforce_cache_coherency;
+       /* to protect the following members*/
+       struct mutex lock;
+       /* if there is a vdevice mapping the idev */
+       struct iommufd_vdevice *vdev;
 };
 
 static inline struct iommufd_device *
@@ -606,6 +610,7 @@ struct iommufd_vdevice {
        struct iommufd_ctx *ictx;
        struct iommufd_viommu *viommu;
        struct device *dev;
+       struct iommufd_device *idev;
        u64 id; /* per-vIOMMU virtual ID */
 };
 
diff --git a/drivers/iommu/iommufd/main.c b/drivers/iommu/iommufd/main.c
index 3df468f64e7d..c38303df536f 100644
--- a/drivers/iommu/iommufd/main.c
+++ b/drivers/iommu/iommufd/main.c
@@ -172,6 +172,11 @@ int iommufd_object_remove(struct iommufd_ctx *ictx,
                ictx->vfio_ioas = NULL;
        xa_unlock(&ictx->objects);
 
+       if (obj->type == IOMMUFD_OBJ_DEVICE) {
+               /* idevice should be freed with lock held */
+               struct iommufd_device *idev = container_of(obj, struct 
iommufd_device, obj);
+               mutex_unlock(&idev->lock);
+       }
        /*
         * Since users is zero any positive users_shortterm must be racing
         * iommufd_put_object(), or we have a bug.
diff --git a/drivers/iommu/iommufd/viommu.c b/drivers/iommu/iommufd/viommu.c
index 01df2b985f02..17f189bc9e2c 100644
--- a/drivers/iommu/iommufd/viommu.c
+++ b/drivers/iommu/iommufd/viommu.c
@@ -84,15 +84,24 @@ int iommufd_viommu_alloc_ioctl(struct iommufd_ucmd *ucmd)
        return rc;
 }
 
+/* This will be called from iommufd_device_unbind  */
 void iommufd_vdevice_destroy(struct iommufd_object *obj)
 {
        struct iommufd_vdevice *vdev =
                container_of(obj, struct iommufd_vdevice, obj);
        struct iommufd_viommu *viommu = vdev->viommu;
+       struct iommufd_device *idev = vdev->idev;
+
+       /*
+        * since we have an refcount on idev, it can't be freed.
+        */
+       lockdep_assert_held(&idev->lock);
 
        /* xa_cmpxchg is okay to fail if alloc failed xa_cmpxchg previously */
        xa_cmpxchg(&viommu->vdevs, vdev->id, vdev, NULL, GFP_KERNEL);
        refcount_dec(&viommu->obj.users);
+       idev->vdev = NULL;
+       refcount_dec(&idev->obj.users);
        put_device(vdev->dev);
 }
 
@@ -124,10 +133,15 @@ int iommufd_vdevice_alloc_ioctl(struct iommufd_ucmd *ucmd)
                goto out_put_idev;
        }
 
+       mutex_lock(&idev->lock);
+       if (idev->vdev) {
+               rc = -EINVAL;
+               goto out_put_idev_unlock;
+       }
        vdev = iommufd_object_alloc(ucmd->ictx, vdev, IOMMUFD_OBJ_VDEVICE);
        if (IS_ERR(vdev)) {
                rc = PTR_ERR(vdev);
-               goto out_put_idev;
+               goto out_put_idev_unlock;
        }
 
        vdev->id = virt_id;
@@ -147,10 +161,18 @@ int iommufd_vdevice_alloc_ioctl(struct iommufd_ucmd *ucmd)
        if (rc)
                goto out_abort;
        iommufd_object_finalize(ucmd->ictx, &vdev->obj);
-       goto out_put_idev;
+       /* don't allow idev free without vdev free */
+       refcount_inc(&idev->obj.users);
+       vdev->idev = idev;
+       /* vdev lifecycle now managed by idev */
+       idev->vdev = vdev;
+       refcount_inc(&vdev->obj.users);
+       goto out_put_idev_unlock;
 
 out_abort:
        iommufd_object_abort_and_destroy(ucmd->ictx, &vdev->obj);
+out_put_idev_unlock:
+       mutex_unlock(&idev->lock);
 out_put_idev:
        iommufd_put_object(ucmd->ictx, &idev->obj);
 out_put_viommu:

Reply via email to