Hi Nicolin,

On Tue, Aug 27, 2024 at 09:59:43AM -0700, Nicolin Chen wrote:
> Introduce a pair of new ioctls to set/unset a per-viommu virtual device id
> that should be linked to a physical device id via an idev pointer.
> 
> Continue the support IOMMU_VIOMMU_TYPE_DEFAULT for a core-managed viommu.
> Provide a lookup function for drivers to load device pointer by a virtual
> device id.
> 
> Add a rw_semaphore protection around the vdev_id list. Any future ioctl
> handlers that potentially access the list must grab the lock too.
> 
> Signed-off-by: Nicolin Chen <nicol...@nvidia.com>
> ---
>  drivers/iommu/iommufd/device.c          |  12 +++
>  drivers/iommu/iommufd/iommufd_private.h |  21 ++++
>  drivers/iommu/iommufd/main.c            |   6 ++
>  drivers/iommu/iommufd/viommu.c          | 121 ++++++++++++++++++++++++
>  include/uapi/linux/iommufd.h            |  40 ++++++++
>  5 files changed, 200 insertions(+)
> 
> diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c
> index 5fd3dd420290..3ad759971b32 100644
> --- a/drivers/iommu/iommufd/device.c
> +++ b/drivers/iommu/iommufd/device.c
> @@ -136,6 +136,18 @@ void iommufd_device_destroy(struct iommufd_object *obj)
>       struct iommufd_device *idev =
>               container_of(obj, struct iommufd_device, obj);
>  
> +     /* Unlocked since there should be no race in a destroy() */
> +     if (idev->vdev_id) {
> +             struct iommufd_vdev_id *vdev_id = idev->vdev_id;
> +             struct iommufd_viommu *viommu = vdev_id->viommu;
> +             struct iommufd_vdev_id *old;
> +
> +             old = xa_cmpxchg(&viommu->vdev_ids, vdev_id->id, vdev_id, NULL,
> +                              GFP_KERNEL);
> +             WARN_ON(old != vdev_id);
> +             kfree(vdev_id);
> +             idev->vdev_id = NULL;
> +     }
>       iommu_device_release_dma_owner(idev->dev);
>       iommufd_put_group(idev->igroup);
>       if (!iommufd_selftest_is_mock_dev(idev->dev))
> diff --git a/drivers/iommu/iommufd/iommufd_private.h 
> b/drivers/iommu/iommufd/iommufd_private.h
> index 1f2a1c133b9a..2c6e168c5300 100644
> --- a/drivers/iommu/iommufd/iommufd_private.h
> +++ b/drivers/iommu/iommufd/iommufd_private.h
> @@ -416,6 +416,7 @@ struct iommufd_device {
>       struct iommufd_object obj;
>       struct iommufd_ctx *ictx;
>       struct iommufd_group *igroup;
> +     struct iommufd_vdev_id *vdev_id;
>       struct list_head group_item;
>       /* always the physical device */
>       struct device *dev;
> @@ -533,11 +534,31 @@ struct iommufd_viommu {
>       struct iommufd_ctx *ictx;
>       struct iommufd_hwpt_paging *hwpt;
>  
> +     /* The locking order is vdev_ids_rwsem -> igroup::lock */
> +     struct rw_semaphore vdev_ids_rwsem;
> +     struct xarray vdev_ids;
> +
>       unsigned int type;
>  };
>  
> +struct iommufd_vdev_id {
> +     struct iommufd_viommu *viommu;
> +     struct iommufd_device *idev;
> +     u64 id;
> +};
> +
> +static inline struct iommufd_viommu *
> +iommufd_get_viommu(struct iommufd_ucmd *ucmd, u32 id)
> +{
> +     return container_of(iommufd_get_object(ucmd->ictx, id,
> +                                            IOMMUFD_OBJ_VIOMMU),
> +                         struct iommufd_viommu, obj);
> +}
> +
>  int iommufd_viommu_alloc_ioctl(struct iommufd_ucmd *ucmd);
>  void iommufd_viommu_destroy(struct iommufd_object *obj);
> +int iommufd_viommu_set_vdev_id(struct iommufd_ucmd *ucmd);
> +int iommufd_viommu_unset_vdev_id(struct iommufd_ucmd *ucmd);
>  
>  #ifdef CONFIG_IOMMUFD_TEST
>  int iommufd_test(struct iommufd_ucmd *ucmd);
> diff --git a/drivers/iommu/iommufd/main.c b/drivers/iommu/iommufd/main.c
> index 288ee51b6829..199ad90fa36b 100644
> --- a/drivers/iommu/iommufd/main.c
> +++ b/drivers/iommu/iommufd/main.c
> @@ -334,6 +334,8 @@ union ucmd_buffer {
>       struct iommu_option option;
>       struct iommu_vfio_ioas vfio_ioas;
>       struct iommu_viommu_alloc viommu;
> +     struct iommu_viommu_set_vdev_id set_vdev_id;
> +     struct iommu_viommu_unset_vdev_id unset_vdev_id;
>  #ifdef CONFIG_IOMMUFD_TEST
>       struct iommu_test_cmd test;
>  #endif
> @@ -387,6 +389,10 @@ static const struct iommufd_ioctl_op iommufd_ioctl_ops[] 
> = {
>                __reserved),
>       IOCTL_OP(IOMMU_VIOMMU_ALLOC, iommufd_viommu_alloc_ioctl,
>                struct iommu_viommu_alloc, out_viommu_id),
> +     IOCTL_OP(IOMMU_VIOMMU_SET_VDEV_ID, iommufd_viommu_set_vdev_id,
> +              struct iommu_viommu_set_vdev_id, vdev_id),
> +     IOCTL_OP(IOMMU_VIOMMU_UNSET_VDEV_ID, iommufd_viommu_unset_vdev_id,
> +              struct iommu_viommu_unset_vdev_id, vdev_id),
>  #ifdef CONFIG_IOMMUFD_TEST
>       IOCTL_OP(IOMMU_TEST_CMD, iommufd_test, struct iommu_test_cmd, last),
>  #endif
> diff --git a/drivers/iommu/iommufd/viommu.c b/drivers/iommu/iommufd/viommu.c
> index 200653a4bf57..8ffcd72b16b8 100644
> --- a/drivers/iommu/iommufd/viommu.c
> +++ b/drivers/iommu/iommufd/viommu.c
> @@ -8,6 +8,15 @@ void iommufd_viommu_destroy(struct iommufd_object *obj)
>  {
>       struct iommufd_viommu *viommu =
>               container_of(obj, struct iommufd_viommu, obj);
> +     struct iommufd_vdev_id *vdev_id;
> +     unsigned long index;
> +
> +     xa_for_each(&viommu->vdev_ids, index, vdev_id) {
> +             /* Unlocked since there should be no race in a destroy() */
> +             vdev_id->idev->vdev_id = NULL;
> +             kfree(vdev_id);
> +     }
> +     xa_destroy(&viommu->vdev_ids);
>  
>       refcount_dec(&viommu->hwpt->common.obj.users);
>  }
> @@ -53,6 +62,9 @@ int iommufd_viommu_alloc_ioctl(struct iommufd_ucmd *ucmd)
>       viommu->ictx = ucmd->ictx;
>       viommu->hwpt = hwpt_paging;
>  
> +     xa_init(&viommu->vdev_ids);
> +     init_rwsem(&viommu->vdev_ids_rwsem);
> +
>       refcount_inc(&viommu->hwpt->common.obj.users);
>  
>       cmd->out_viommu_id = viommu->obj.id;
> @@ -70,3 +82,112 @@ int iommufd_viommu_alloc_ioctl(struct iommufd_ucmd *ucmd)
>       iommufd_put_object(ucmd->ictx, &idev->obj);
>       return rc;
>  }
> +
> +int iommufd_viommu_set_vdev_id(struct iommufd_ucmd *ucmd)
> +{
> +     struct iommu_viommu_set_vdev_id *cmd = ucmd->cmd;
> +     struct iommufd_vdev_id *vdev_id, *curr;
> +     struct iommufd_viommu *viommu;
> +     struct iommufd_device *idev;
> +     int rc = 0;
> +
> +     if (cmd->vdev_id > ULONG_MAX)
> +             return -EINVAL;
> +
> +     viommu = iommufd_get_viommu(ucmd, cmd->viommu_id);
> +     if (IS_ERR(viommu))
> +             return PTR_ERR(viommu);
> +
> +     idev = iommufd_get_device(ucmd, cmd->dev_id);
> +     if (IS_ERR(idev)) {
> +             rc = PTR_ERR(idev);
> +             goto out_put_viommu;
> +     }
> +
> +     down_write(&viommu->vdev_ids_rwsem);
> +     mutex_lock(&idev->igroup->lock);
> +     if (idev->vdev_id) {
> +             rc = -EEXIST;
> +             goto out_unlock_igroup;
> +     }
> +
> +     vdev_id = kzalloc(sizeof(*vdev_id), GFP_KERNEL);
> +     if (!vdev_id) {
> +             rc = -ENOMEM;
> +             goto out_unlock_igroup;
> +     }
> +
> +     vdev_id->idev = idev;
> +     vdev_id->viommu = viommu;
> +     vdev_id->id = cmd->vdev_id;

My understanding of IOMMUFD is very little, but AFAICT, that means that
it’s assumed that each device can only have one stream ID(RID)?

As I can see in patch 17 in arm_smmu_convert_viommu_vdev_id(), it
converts the virtual ID to a physical one using master->streams[0].id.

Is that correct or am I missing something?

As I am looking at similar problem for paravirtual IOMMU with pKVM, where
the UAPI would be something similar to:

        GET_NUM_END_POINTS(dev) => nr_sids

        SET_END_POINT_VSID(dev, sid_index, vsid)

Similar to what VFIO does with IRQs.

As a device can have many SIDs.

Thanks,
Mostafa

> +
> +     curr = xa_cmpxchg(&viommu->vdev_ids, cmd->vdev_id, NULL, vdev_id,
> +                       GFP_KERNEL);
> +     if (curr) {
> +             rc = xa_err(curr) ? : -EBUSY;
> +             goto out_free;
> +     }
> +
> +     idev->vdev_id = vdev_id;
> +     goto out_unlock_igroup;
> +
> +out_free:
> +     kfree(vdev_id);
> +out_unlock_igroup:
> +     mutex_unlock(&idev->igroup->lock);
> +     up_write(&viommu->vdev_ids_rwsem);
> +     iommufd_put_object(ucmd->ictx, &idev->obj);
> +out_put_viommu:
> +     iommufd_put_object(ucmd->ictx, &viommu->obj);
> +     return rc;
> +}
> +
> +int iommufd_viommu_unset_vdev_id(struct iommufd_ucmd *ucmd)
> +{
> +     struct iommu_viommu_unset_vdev_id *cmd = ucmd->cmd;
> +     struct iommufd_viommu *viommu;
> +     struct iommufd_vdev_id *old;
> +     struct iommufd_device *idev;
> +     int rc = 0;
> +
> +     if (cmd->vdev_id > ULONG_MAX)
> +             return -EINVAL;
> +
> +     viommu = iommufd_get_viommu(ucmd, cmd->viommu_id);
> +     if (IS_ERR(viommu))
> +             return PTR_ERR(viommu);
> +
> +     idev = iommufd_get_device(ucmd, cmd->dev_id);
> +     if (IS_ERR(idev)) {
> +             rc = PTR_ERR(idev);
> +             goto out_put_viommu;
> +     }
> +
> +     down_write(&viommu->vdev_ids_rwsem);
> +     mutex_lock(&idev->igroup->lock);
> +     if (!idev->vdev_id) {
> +             rc = -ENOENT;
> +             goto out_unlock_igroup;
> +     }
> +     if (idev->vdev_id->id != cmd->vdev_id) {
> +             rc = -EINVAL;
> +             goto out_unlock_igroup;
> +     }
> +
> +     old = xa_cmpxchg(&viommu->vdev_ids, idev->vdev_id->id,
> +                      idev->vdev_id, NULL, GFP_KERNEL);
> +     if (xa_is_err(old)) {
> +             rc = xa_err(old);
> +             goto out_unlock_igroup;
> +     }
> +     kfree(old);
> +     idev->vdev_id = NULL;
> +
> +out_unlock_igroup:
> +     mutex_unlock(&idev->igroup->lock);
> +     up_write(&viommu->vdev_ids_rwsem);
> +     iommufd_put_object(ucmd->ictx, &idev->obj);
> +out_put_viommu:
> +     iommufd_put_object(ucmd->ictx, &viommu->obj);
> +     return rc;
> +}
> diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h
> index 51ce6a019c34..1816e89c922d 100644
> --- a/include/uapi/linux/iommufd.h
> +++ b/include/uapi/linux/iommufd.h
> @@ -52,6 +52,8 @@ enum {
>       IOMMUFD_CMD_HWPT_INVALIDATE = 0x8d,
>       IOMMUFD_CMD_FAULT_QUEUE_ALLOC = 0x8e,
>       IOMMUFD_CMD_VIOMMU_ALLOC = 0x8f,
> +     IOMMUFD_CMD_VIOMMU_SET_VDEV_ID = 0x90,
> +     IOMMUFD_CMD_VIOMMU_UNSET_VDEV_ID = 0x91,
>  };
>  
>  /**
> @@ -882,4 +884,42 @@ struct iommu_viommu_alloc {
>       __u32 out_viommu_id;
>  };
>  #define IOMMU_VIOMMU_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_VIOMMU_ALLOC)
> +
> +/**
> + * struct iommu_viommu_set_vdev_id - ioctl(IOMMU_VIOMMU_SET_VDEV_ID)
> + * @size: sizeof(struct iommu_viommu_set_vdev_id)
> + * @viommu_id: viommu ID to associate with the device to store its virtual ID
> + * @dev_id: device ID to set its virtual ID
> + * @__reserved: Must be 0
> + * @vdev_id: Virtual device ID
> + *
> + * Set a viommu-specific virtual ID of a device
> + */
> +struct iommu_viommu_set_vdev_id {
> +     __u32 size;
> +     __u32 viommu_id;
> +     __u32 dev_id;
> +     __u32 __reserved;
> +     __aligned_u64 vdev_id;
> +};
> +#define IOMMU_VIOMMU_SET_VDEV_ID _IO(IOMMUFD_TYPE, 
> IOMMUFD_CMD_VIOMMU_SET_VDEV_ID)
> +
> +/**
> + * struct iommu_viommu_unset_vdev_id - ioctl(IOMMU_VIOMMU_UNSET_VDEV_ID)
> + * @size: sizeof(struct iommu_viommu_unset_vdev_id)
> + * @viommu_id: viommu ID associated with the device to delete its virtual ID
> + * @dev_id: device ID to unset its virtual ID
> + * @__reserved: Must be 0
> + * @vdev_id: Virtual device ID (for verification)
> + *
> + * Unset a viommu-specific virtual ID of a device
> + */
> +struct iommu_viommu_unset_vdev_id {
> +     __u32 size;
> +     __u32 viommu_id;
> +     __u32 dev_id;
> +     __u32 __reserved;
> +     __aligned_u64 vdev_id;
> +};
> +#define IOMMU_VIOMMU_UNSET_VDEV_ID _IO(IOMMUFD_TYPE, 
> IOMMUFD_CMD_VIOMMU_UNSET_VDEV_ID)
>  #endif
> -- 
> 2.43.0
> 

Reply via email to