On 28/8/24 02:59, Nicolin Chen wrote:
Introduce a pair of new ioctls to set/unset a per-viommu virtual device id
that should be linked to a physical device id via an idev pointer.

Continue the support IOMMU_VIOMMU_TYPE_DEFAULT for a core-managed viommu.
Provide a lookup function for drivers to load device pointer by a virtual
device id.

Add a rw_semaphore protection around the vdev_id list. Any future ioctl
handlers that potentially access the list must grab the lock too.

Signed-off-by: Nicolin Chen <nicol...@nvidia.com>
---
  drivers/iommu/iommufd/device.c          |  12 +++
  drivers/iommu/iommufd/iommufd_private.h |  21 ++++
  drivers/iommu/iommufd/main.c            |   6 ++
  drivers/iommu/iommufd/viommu.c          | 121 ++++++++++++++++++++++++
  include/uapi/linux/iommufd.h            |  40 ++++++++
  5 files changed, 200 insertions(+)

diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c
index 5fd3dd420290..3ad759971b32 100644
--- a/drivers/iommu/iommufd/device.c
+++ b/drivers/iommu/iommufd/device.c
@@ -136,6 +136,18 @@ void iommufd_device_destroy(struct iommufd_object *obj)
        struct iommufd_device *idev =
                container_of(obj, struct iommufd_device, obj);
+ /* Unlocked since there should be no race in a destroy() */
+       if (idev->vdev_id) {
+               struct iommufd_vdev_id *vdev_id = idev->vdev_id;
+               struct iommufd_viommu *viommu = vdev_id->viommu;
+               struct iommufd_vdev_id *old;
+
+               old = xa_cmpxchg(&viommu->vdev_ids, vdev_id->id, vdev_id, NULL,
+                                GFP_KERNEL);
+               WARN_ON(old != vdev_id);
+               kfree(vdev_id);
+               idev->vdev_id = NULL;
+       }
        iommu_device_release_dma_owner(idev->dev);
        iommufd_put_group(idev->igroup);
        if (!iommufd_selftest_is_mock_dev(idev->dev))
diff --git a/drivers/iommu/iommufd/iommufd_private.h 
b/drivers/iommu/iommufd/iommufd_private.h
index 1f2a1c133b9a..2c6e168c5300 100644
--- a/drivers/iommu/iommufd/iommufd_private.h
+++ b/drivers/iommu/iommufd/iommufd_private.h
@@ -416,6 +416,7 @@ struct iommufd_device {
        struct iommufd_object obj;
        struct iommufd_ctx *ictx;
        struct iommufd_group *igroup;
+       struct iommufd_vdev_id *vdev_id;
        struct list_head group_item;
        /* always the physical device */
        struct device *dev;
@@ -533,11 +534,31 @@ struct iommufd_viommu {
        struct iommufd_ctx *ictx;
        struct iommufd_hwpt_paging *hwpt;
+ /* The locking order is vdev_ids_rwsem -> igroup::lock */
+       struct rw_semaphore vdev_ids_rwsem;
+       struct xarray vdev_ids;
+
        unsigned int type;
  };
+struct iommufd_vdev_id {
+       struct iommufd_viommu *viommu;
+       struct iommufd_device *idev;
+       u64 id;
+};
+
+static inline struct iommufd_viommu *
+iommufd_get_viommu(struct iommufd_ucmd *ucmd, u32 id)
+{
+       return container_of(iommufd_get_object(ucmd->ictx, id,
+                                              IOMMUFD_OBJ_VIOMMU),
+                           struct iommufd_viommu, obj);
+}
+
  int iommufd_viommu_alloc_ioctl(struct iommufd_ucmd *ucmd);
  void iommufd_viommu_destroy(struct iommufd_object *obj);
+int iommufd_viommu_set_vdev_id(struct iommufd_ucmd *ucmd);
+int iommufd_viommu_unset_vdev_id(struct iommufd_ucmd *ucmd);
#ifdef CONFIG_IOMMUFD_TEST
  int iommufd_test(struct iommufd_ucmd *ucmd);
diff --git a/drivers/iommu/iommufd/main.c b/drivers/iommu/iommufd/main.c
index 288ee51b6829..199ad90fa36b 100644
--- a/drivers/iommu/iommufd/main.c
+++ b/drivers/iommu/iommufd/main.c
@@ -334,6 +334,8 @@ union ucmd_buffer {
        struct iommu_option option;
        struct iommu_vfio_ioas vfio_ioas;
        struct iommu_viommu_alloc viommu;
+       struct iommu_viommu_set_vdev_id set_vdev_id;
+       struct iommu_viommu_unset_vdev_id unset_vdev_id;
  #ifdef CONFIG_IOMMUFD_TEST
        struct iommu_test_cmd test;
  #endif
@@ -387,6 +389,10 @@ static const struct iommufd_ioctl_op iommufd_ioctl_ops[] = 
{
                 __reserved),
        IOCTL_OP(IOMMU_VIOMMU_ALLOC, iommufd_viommu_alloc_ioctl,
                 struct iommu_viommu_alloc, out_viommu_id),
+       IOCTL_OP(IOMMU_VIOMMU_SET_VDEV_ID, iommufd_viommu_set_vdev_id,
+                struct iommu_viommu_set_vdev_id, vdev_id),
+       IOCTL_OP(IOMMU_VIOMMU_UNSET_VDEV_ID, iommufd_viommu_unset_vdev_id,
+                struct iommu_viommu_unset_vdev_id, vdev_id),
  #ifdef CONFIG_IOMMUFD_TEST
        IOCTL_OP(IOMMU_TEST_CMD, iommufd_test, struct iommu_test_cmd, last),
  #endif
diff --git a/drivers/iommu/iommufd/viommu.c b/drivers/iommu/iommufd/viommu.c
index 200653a4bf57..8ffcd72b16b8 100644
--- a/drivers/iommu/iommufd/viommu.c
+++ b/drivers/iommu/iommufd/viommu.c
@@ -8,6 +8,15 @@ void iommufd_viommu_destroy(struct iommufd_object *obj)
  {
        struct iommufd_viommu *viommu =
                container_of(obj, struct iommufd_viommu, obj);
+       struct iommufd_vdev_id *vdev_id;
+       unsigned long index;
+
+       xa_for_each(&viommu->vdev_ids, index, vdev_id) {
+               /* Unlocked since there should be no race in a destroy() */
+               vdev_id->idev->vdev_id = NULL;
+               kfree(vdev_id);
+       }
+       xa_destroy(&viommu->vdev_ids);
refcount_dec(&viommu->hwpt->common.obj.users);
  }
@@ -53,6 +62,9 @@ int iommufd_viommu_alloc_ioctl(struct iommufd_ucmd *ucmd)
        viommu->ictx = ucmd->ictx;
        viommu->hwpt = hwpt_paging;
+ xa_init(&viommu->vdev_ids);
+       init_rwsem(&viommu->vdev_ids_rwsem);
+
        refcount_inc(&viommu->hwpt->common.obj.users);
cmd->out_viommu_id = viommu->obj.id;
@@ -70,3 +82,112 @@ int iommufd_viommu_alloc_ioctl(struct iommufd_ucmd *ucmd)
        iommufd_put_object(ucmd->ictx, &idev->obj);
        return rc;
  }
+
+int iommufd_viommu_set_vdev_id(struct iommufd_ucmd *ucmd)
+{
+       struct iommu_viommu_set_vdev_id *cmd = ucmd->cmd;
+       struct iommufd_vdev_id *vdev_id, *curr;
+       struct iommufd_viommu *viommu;
+       struct iommufd_device *idev;
+       int rc = 0;
+
+       if (cmd->vdev_id > ULONG_MAX)
+               return -EINVAL;
+
+       viommu = iommufd_get_viommu(ucmd, cmd->viommu_id);
+       if (IS_ERR(viommu))
+               return PTR_ERR(viommu);
+
+       idev = iommufd_get_device(ucmd, cmd->dev_id);
+       if (IS_ERR(idev)) {
+               rc = PTR_ERR(idev);
+               goto out_put_viommu;
+       }
+
+       down_write(&viommu->vdev_ids_rwsem);
+       mutex_lock(&idev->igroup->lock);
+       if (idev->vdev_id) {
+               rc = -EEXIST;
+               goto out_unlock_igroup;
+       }
+
+       vdev_id = kzalloc(sizeof(*vdev_id), GFP_KERNEL);
+       if (!vdev_id) {
+               rc = -ENOMEM;
+               goto out_unlock_igroup;
+       }
+
+       vdev_id->idev = idev;
+       vdev_id->viommu = viommu;
+       vdev_id->id = cmd->vdev_id;
+
+       curr = xa_cmpxchg(&viommu->vdev_ids, cmd->vdev_id, NULL, vdev_id,
+                         GFP_KERNEL);
+       if (curr) {
+               rc = xa_err(curr) ? : -EBUSY;
+               goto out_free;
+       }
+
+       idev->vdev_id = vdev_id;
+       goto out_unlock_igroup;
+
+out_free:
+       kfree(vdev_id);
+out_unlock_igroup:
+       mutex_unlock(&idev->igroup->lock);
+       up_write(&viommu->vdev_ids_rwsem);
+       iommufd_put_object(ucmd->ictx, &idev->obj);
+out_put_viommu:
+       iommufd_put_object(ucmd->ictx, &viommu->obj);
+       return rc;
+}
+
+int iommufd_viommu_unset_vdev_id(struct iommufd_ucmd *ucmd)
+{
+       struct iommu_viommu_unset_vdev_id *cmd = ucmd->cmd;
+       struct iommufd_viommu *viommu;
+       struct iommufd_vdev_id *old;
+       struct iommufd_device *idev;
+       int rc = 0;
+
+       if (cmd->vdev_id > ULONG_MAX)
+               return -EINVAL;
+
+       viommu = iommufd_get_viommu(ucmd, cmd->viommu_id);
+       if (IS_ERR(viommu))
+               return PTR_ERR(viommu);
+
+       idev = iommufd_get_device(ucmd, cmd->dev_id);
+       if (IS_ERR(idev)) {
+               rc = PTR_ERR(idev);
+               goto out_put_viommu;
+       }
+
+       down_write(&viommu->vdev_ids_rwsem);
+       mutex_lock(&idev->igroup->lock);
+       if (!idev->vdev_id) {
+               rc = -ENOENT;
+               goto out_unlock_igroup;
+       }
+       if (idev->vdev_id->id != cmd->vdev_id) {
+               rc = -EINVAL;
+               goto out_unlock_igroup;
+       }
+
+       old = xa_cmpxchg(&viommu->vdev_ids, idev->vdev_id->id,
+                        idev->vdev_id, NULL, GFP_KERNEL);
+       if (xa_is_err(old)) {
+               rc = xa_err(old);
+               goto out_unlock_igroup;
+       }
+       kfree(old);
+       idev->vdev_id = NULL;
+
+out_unlock_igroup:
+       mutex_unlock(&idev->igroup->lock);
+       up_write(&viommu->vdev_ids_rwsem);
+       iommufd_put_object(ucmd->ictx, &idev->obj);
+out_put_viommu:
+       iommufd_put_object(ucmd->ictx, &viommu->obj);
+       return rc;
+}
diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h
index 51ce6a019c34..1816e89c922d 100644
--- a/include/uapi/linux/iommufd.h
+++ b/include/uapi/linux/iommufd.h
@@ -52,6 +52,8 @@ enum {
        IOMMUFD_CMD_HWPT_INVALIDATE = 0x8d,
        IOMMUFD_CMD_FAULT_QUEUE_ALLOC = 0x8e,
        IOMMUFD_CMD_VIOMMU_ALLOC = 0x8f,
+       IOMMUFD_CMD_VIOMMU_SET_VDEV_ID = 0x90,
+       IOMMUFD_CMD_VIOMMU_UNSET_VDEV_ID = 0x91,
  };
/**
@@ -882,4 +884,42 @@ struct iommu_viommu_alloc {
        __u32 out_viommu_id;
  };
  #define IOMMU_VIOMMU_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_VIOMMU_ALLOC)
+
+/**
+ * struct iommu_viommu_set_vdev_id - ioctl(IOMMU_VIOMMU_SET_VDEV_ID)
+ * @size: sizeof(struct iommu_viommu_set_vdev_id)
+ * @viommu_id: viommu ID to associate with the device to store its virtual ID
+ * @dev_id: device ID to set its virtual ID
+ * @__reserved: Must be 0
+ * @vdev_id: Virtual device ID
+ *
+ * Set a viommu-specific virtual ID of a device
+ */
+struct iommu_viommu_set_vdev_id {
+       __u32 size;
+       __u32 viommu_id;
+       __u32 dev_id;

Is this ID from vfio_device_bind_iommufd.out_devid?

+       __u32 __reserved;
+       __aligned_u64 vdev_id;

What is the nature of this id? It is not the guest's BDFn, is it? The code suggests it is ARM's "SID" == "stream ID" and "a device might be able to generate multiple StreamIDs" (how, why?) 🤯 And these streams seem to have nothing to do with PCIe IDE streams, right?

For my SEV-TIO exercise ("trusted IO"), I am looking for a kernel interface to pass the guest's BDFs for a specific host device (which is passed through) and nothing in the kernel has any knowledge of it atm, is this the right place, or another ioctl() is needed here?

Sorry, I am too ignorant about ARM :)


+};
+#define IOMMU_VIOMMU_SET_VDEV_ID _IO(IOMMUFD_TYPE, 
IOMMUFD_CMD_VIOMMU_SET_VDEV_ID)
+
+/**
+ * struct iommu_viommu_unset_vdev_id - ioctl(IOMMU_VIOMMU_UNSET_VDEV_ID)
+ * @size: sizeof(struct iommu_viommu_unset_vdev_id)
+ * @viommu_id: viommu ID associated with the device to delete its virtual ID
+ * @dev_id: device ID to unset its virtual ID
+ * @__reserved: Must be 0
+ * @vdev_id: Virtual device ID (for verification)
+ *
+ * Unset a viommu-specific virtual ID of a device
+ */
+struct iommu_viommu_unset_vdev_id {
+       __u32 size;
+       __u32 viommu_id;
+       __u32 dev_id;
+       __u32 __reserved;
+       __aligned_u64 vdev_id;
+};
+#define IOMMU_VIOMMU_UNSET_VDEV_ID _IO(IOMMUFD_TYPE, 
IOMMUFD_CMD_VIOMMU_UNSET_VDEV_ID)
  #endif

Nit: "git format-patch -O orderfile" makes patches nicer by putting the documentation first (.h before .c, in this case) with the "ordefile" looking like this:

===
*.txt
configure
*Makefile*
*.json
*.h
*.c
===

Thanks,

--
Alexey


Reply via email to