Jason Gunthorpe <j...@nvidia.com> writes: ....
>> tsm_unbind in vdevice_destroy: >> >> vdevice_destroy() ends up calling tsm_unbind() while holding only the >> vdev_lock. At first glance, this seems unsafe. But in practice, it's >> fine because the corresponding iommufd_device has already been destroyed >> when the VFIO device file descriptor was closed—triggering >> vfio_df_iommufd_unbind(). > > This needs some kind of fixing the idevice should destroy the vdevices > during idevice destruction so we don't get this out of order where the > idevice is destroyed before the vdevice. > > This should be a separate patch as it is an immediate bug fix.. > Something like below? diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c index 86244403b532..a49b293bd516 100644 --- a/drivers/iommu/iommufd/device.c +++ b/drivers/iommu/iommufd/device.c @@ -221,6 +221,8 @@ struct iommufd_device *iommufd_device_bind(struct iommufd_ctx *ictx, refcount_inc(&idev->obj.users); /* igroup refcount moves into iommufd_device */ idev->igroup = igroup; + idev->vdev = NULL; + mutex_init(&idev->lock); /* * If the caller fails after this success it must call @@ -282,6 +284,12 @@ EXPORT_SYMBOL_NS_GPL(iommufd_ctx_has_group, "IOMMUFD"); */ void iommufd_device_unbind(struct iommufd_device *idev) { + /* this will be unlocked while destroying the idev obj */ + mutex_lock(&idev->lock); + + if (idev->vdev) + /* extra refcount taken during vdevice alloc */ + iommufd_object_destroy_user(idev->ictx, &idev->vdev->obj); iommufd_object_destroy_user(idev->ictx, &idev->obj); } EXPORT_SYMBOL_NS_GPL(iommufd_device_unbind, "IOMMUFD"); diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h index 9ccc83341f32..d85bd8b38751 100644 --- a/drivers/iommu/iommufd/iommufd_private.h +++ b/drivers/iommu/iommufd/iommufd_private.h @@ -425,6 +425,10 @@ struct iommufd_device { /* always the physical device */ struct device *dev; bool enforce_cache_coherency; + /* to protect the following members*/ + struct mutex lock; + /* if there is a vdevice mapping the idev */ + struct iommufd_vdevice *vdev; }; static inline struct iommufd_device * @@ -606,6 +610,7 @@ struct iommufd_vdevice { struct iommufd_ctx *ictx; struct iommufd_viommu *viommu; struct device *dev; + struct iommufd_device *idev; u64 id; /* per-vIOMMU virtual ID */ }; diff --git a/drivers/iommu/iommufd/main.c b/drivers/iommu/iommufd/main.c index 3df468f64e7d..c38303df536f 100644 --- a/drivers/iommu/iommufd/main.c +++ b/drivers/iommu/iommufd/main.c @@ -172,6 +172,11 @@ int iommufd_object_remove(struct iommufd_ctx *ictx, ictx->vfio_ioas = NULL; xa_unlock(&ictx->objects); + if (obj->type == IOMMUFD_OBJ_DEVICE) { + /* idevice should be freed with lock held */ + struct iommufd_device *idev = container_of(obj, struct iommufd_device, obj); + mutex_unlock(&idev->lock); + } /* * Since users is zero any positive users_shortterm must be racing * iommufd_put_object(), or we have a bug. diff --git a/drivers/iommu/iommufd/viommu.c b/drivers/iommu/iommufd/viommu.c index 01df2b985f02..17f189bc9e2c 100644 --- a/drivers/iommu/iommufd/viommu.c +++ b/drivers/iommu/iommufd/viommu.c @@ -84,15 +84,24 @@ int iommufd_viommu_alloc_ioctl(struct iommufd_ucmd *ucmd) return rc; } +/* This will be called from iommufd_device_unbind */ void iommufd_vdevice_destroy(struct iommufd_object *obj) { struct iommufd_vdevice *vdev = container_of(obj, struct iommufd_vdevice, obj); struct iommufd_viommu *viommu = vdev->viommu; + struct iommufd_device *idev = vdev->idev; + + /* + * since we have an refcount on idev, it can't be freed. + */ + lockdep_assert_held(&idev->lock); /* xa_cmpxchg is okay to fail if alloc failed xa_cmpxchg previously */ xa_cmpxchg(&viommu->vdevs, vdev->id, vdev, NULL, GFP_KERNEL); refcount_dec(&viommu->obj.users); + idev->vdev = NULL; + refcount_dec(&idev->obj.users); put_device(vdev->dev); } @@ -124,10 +133,15 @@ int iommufd_vdevice_alloc_ioctl(struct iommufd_ucmd *ucmd) goto out_put_idev; } + mutex_lock(&idev->lock); + if (idev->vdev) { + rc = -EINVAL; + goto out_put_idev_unlock; + } vdev = iommufd_object_alloc(ucmd->ictx, vdev, IOMMUFD_OBJ_VDEVICE); if (IS_ERR(vdev)) { rc = PTR_ERR(vdev); - goto out_put_idev; + goto out_put_idev_unlock; } vdev->id = virt_id; @@ -147,10 +161,18 @@ int iommufd_vdevice_alloc_ioctl(struct iommufd_ucmd *ucmd) if (rc) goto out_abort; iommufd_object_finalize(ucmd->ictx, &vdev->obj); - goto out_put_idev; + /* don't allow idev free without vdev free */ + refcount_inc(&idev->obj.users); + vdev->idev = idev; + /* vdev lifecycle now managed by idev */ + idev->vdev = vdev; + refcount_inc(&vdev->obj.users); + goto out_put_idev_unlock; out_abort: iommufd_object_abort_and_destroy(ucmd->ictx, &vdev->obj); +out_put_idev_unlock: + mutex_unlock(&idev->lock); out_put_idev: iommufd_put_object(ucmd->ictx, &idev->obj); out_put_viommu: