Introduce a new IOMMUFD_OBJ_VEVENTQ object for vIOMMU Event Queue that
provides user space (VMM) another FD to read the vIOMMU Events.

Allow a vIOMMU object to allocate vEVENTQs, with a condition that each
vIOMMU can only have one single vEVENTQ per type.

Add iommufd_veventq_alloc() with iommufd_veventq_ops for the new ioctl.

Signed-off-by: Nicolin Chen <nicol...@nvidia.com>
---
 drivers/iommu/iommufd/iommufd_private.h |  58 +++++++++++
 include/linux/iommufd.h                 |   3 +
 include/uapi/linux/iommufd.h            |  31 ++++++
 drivers/iommu/iommufd/eventq.c          | 129 ++++++++++++++++++++++++
 drivers/iommu/iommufd/main.c            |   7 ++
 drivers/iommu/iommufd/viommu.c          |   2 +
 6 files changed, 230 insertions(+)

diff --git a/drivers/iommu/iommufd/iommufd_private.h 
b/drivers/iommu/iommufd/iommufd_private.h
index dfbc5cfbd164..9410f6275b5a 100644
--- a/drivers/iommu/iommufd/iommufd_private.h
+++ b/drivers/iommu/iommufd/iommufd_private.h
@@ -547,6 +547,50 @@ static inline int iommufd_hwpt_replace_device(struct 
iommufd_device *idev,
        return iommu_group_replace_domain(idev->igroup->group, hwpt->domain);
 }
 
+/*
+ * An iommufd_veventq object represents an interface to deliver vIOMMU events 
to
+ * the user space. It is created/destroyed by the user space and associated 
with
+ * vIOMMU object(s) during the allocations.
+ */
+struct iommufd_veventq {
+       struct iommufd_eventq common;
+       struct iommufd_viommu *viommu;
+       struct list_head node; /* for iommufd_viommu::veventqs */
+
+       unsigned int type;
+};
+
+static inline struct iommufd_veventq *
+eventq_to_veventq(struct iommufd_eventq *eventq)
+{
+       return container_of(eventq, struct iommufd_veventq, common);
+}
+
+static inline struct iommufd_veventq *
+iommufd_get_veventq(struct iommufd_ucmd *ucmd, u32 id)
+{
+       return container_of(iommufd_get_object(ucmd->ictx, id,
+                                              IOMMUFD_OBJ_VEVENTQ),
+                           struct iommufd_veventq, common.obj);
+}
+
+int iommufd_veventq_alloc(struct iommufd_ucmd *ucmd);
+void iommufd_veventq_destroy(struct iommufd_object *obj);
+void iommufd_veventq_abort(struct iommufd_object *obj);
+
+/* An iommufd_vevent represents a vIOMMU event in an iommufd_veventq */
+struct iommufd_vevent {
+       struct list_head node; /* for iommufd_eventq::deliver */
+       ssize_t data_len;
+       u64 event_data[] __counted_by(data_len);
+};
+
+static inline int iommufd_vevent_handler(struct iommufd_veventq *veventq,
+                                         struct iommufd_vevent *vevent)
+{
+       return iommufd_eventq_notify(&veventq->common, &vevent->node);
+}
+
 static inline struct iommufd_viommu *
 iommufd_get_viommu(struct iommufd_ucmd *ucmd, u32 id)
 {
@@ -555,6 +599,20 @@ iommufd_get_viommu(struct iommufd_ucmd *ucmd, u32 id)
                            struct iommufd_viommu, obj);
 }
 
+static inline struct iommufd_veventq *
+iommufd_viommu_find_veventq(struct iommufd_viommu *viommu, u32 type)
+{
+       struct iommufd_veventq *veventq, *next;
+
+       lockdep_assert_held(&viommu->veventqs_rwsem);
+
+       list_for_each_entry_safe(veventq, next, &viommu->veventqs, node) {
+               if (veventq->type == type)
+                       return veventq;
+       }
+       return NULL;
+}
+
 int iommufd_viommu_alloc_ioctl(struct iommufd_ucmd *ucmd);
 void iommufd_viommu_destroy(struct iommufd_object *obj);
 int iommufd_vdevice_alloc_ioctl(struct iommufd_ucmd *ucmd);
diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h
index 11110c749200..8948b1836940 100644
--- a/include/linux/iommufd.h
+++ b/include/linux/iommufd.h
@@ -34,6 +34,7 @@ enum iommufd_object_type {
        IOMMUFD_OBJ_FAULT,
        IOMMUFD_OBJ_VIOMMU,
        IOMMUFD_OBJ_VDEVICE,
+       IOMMUFD_OBJ_VEVENTQ,
 #ifdef CONFIG_IOMMUFD_TEST
        IOMMUFD_OBJ_SELFTEST,
 #endif
@@ -93,6 +94,8 @@ struct iommufd_viommu {
        const struct iommufd_viommu_ops *ops;
 
        struct xarray vdevs;
+       struct list_head veventqs;
+       struct rw_semaphore veventqs_rwsem;
 
        unsigned int type;
 };
diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h
index 34810f6ae2b5..0a08aa82e7cc 100644
--- a/include/uapi/linux/iommufd.h
+++ b/include/uapi/linux/iommufd.h
@@ -55,6 +55,7 @@ enum {
        IOMMUFD_CMD_VIOMMU_ALLOC = 0x90,
        IOMMUFD_CMD_VDEVICE_ALLOC = 0x91,
        IOMMUFD_CMD_IOAS_CHANGE_PROCESS = 0x92,
+       IOMMUFD_CMD_VEVENTQ_ALLOC = 0x93,
 };
 
 /**
@@ -1012,4 +1013,34 @@ struct iommu_ioas_change_process {
 #define IOMMU_IOAS_CHANGE_PROCESS \
        _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_CHANGE_PROCESS)
 
+/**
+ * enum iommu_veventq_type - Virtual Event Queue Type
+ * @IOMMU_VEVENTQ_TYPE_DEFAULT: Reserved for future use
+ */
+enum iommu_veventq_type {
+       IOMMU_VEVENTQ_TYPE_DEFAULT = 0,
+};
+
+/**
+ * struct iommu_veventq_alloc - ioctl(IOMMU_VEVENTQ_ALLOC)
+ * @size: sizeof(struct iommu_veventq_alloc)
+ * @flags: Must be 0
+ * @viommu: virtual IOMMU ID to associate the vEVENTQ with
+ * @type: Type of the vEVENTQ. Must be defined in enum iommu_veventq_type
+ * @out_veventq_id: The ID of the new vEVENTQ
+ * @out_veventq_fd: The fd of the new vEVENTQ. User space must close the
+ *                  successfully returned fd after using it
+ *
+ * Explicitly allocate a virtual event queue interface for a vIOMMU. A vIOMMU
+ * can have multiple FDs for different types, but is confined to one per @type.
+ */
+struct iommu_veventq_alloc {
+       __u32 size;
+       __u32 flags;
+       __u32 viommu_id;
+       __u32 type;
+       __u32 out_veventq_id;
+       __u32 out_veventq_fd;
+};
+#define IOMMU_VEVENTQ_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_VEVENTQ_ALLOC)
 #endif
diff --git a/drivers/iommu/iommufd/eventq.c b/drivers/iommu/iommufd/eventq.c
index e386b6c3e6ab..bf0c04f571a7 100644
--- a/drivers/iommu/iommufd/eventq.c
+++ b/drivers/iommu/iommufd/eventq.c
@@ -346,6 +346,73 @@ static const struct iommufd_eventq_ops iommufd_fault_ops = 
{
        .write = &iommufd_fault_fops_write,
 };
 
+/* IOMMUFD_OBJ_VEVENTQ Functions */
+
+void iommufd_veventq_abort(struct iommufd_object *obj)
+{
+       struct iommufd_eventq *eventq =
+               container_of(obj, struct iommufd_eventq, obj);
+       struct iommufd_veventq *veventq = eventq_to_veventq(eventq);
+       struct iommufd_viommu *viommu = veventq->viommu;
+       struct iommufd_vevent *cur, *next;
+
+       lockdep_assert_held_write(&viommu->veventqs_rwsem);
+
+       list_for_each_entry_safe(cur, next, &eventq->deliver, node) {
+               list_del(&cur->node);
+               kfree(cur);
+       }
+
+       refcount_dec(&viommu->obj.users);
+       mutex_destroy(&eventq->mutex);
+       list_del(&veventq->node);
+}
+
+void iommufd_veventq_destroy(struct iommufd_object *obj)
+{
+       struct iommufd_veventq *veventq = eventq_to_veventq(
+               container_of(obj, struct iommufd_eventq, obj));
+
+       down_write(&veventq->viommu->veventqs_rwsem);
+       iommufd_veventq_abort(obj);
+       up_write(&veventq->viommu->veventqs_rwsem);
+}
+
+static ssize_t iommufd_veventq_fops_read(struct iommufd_eventq *eventq,
+                                        char __user *buf, size_t count,
+                                        loff_t *ppos)
+{
+       size_t done = 0;
+       int rc = 0;
+
+       if (*ppos)
+               return -ESPIPE;
+
+       mutex_lock(&eventq->mutex);
+       while (!list_empty(&eventq->deliver) && count > done) {
+               struct iommufd_vevent *cur = list_first_entry(
+                       &eventq->deliver, struct iommufd_vevent, node);
+
+               if (cur->data_len > count - done)
+                       break;
+
+               if (copy_to_user(buf + done, cur->event_data, cur->data_len)) {
+                       rc = -EFAULT;
+                       break;
+               }
+               done += cur->data_len;
+               list_del(&cur->node);
+               kfree(cur);
+       }
+       mutex_unlock(&eventq->mutex);
+
+       return done == 0 ? rc : done;
+}
+
+static const struct iommufd_eventq_ops iommufd_veventq_ops = {
+       .read = &iommufd_veventq_fops_read,
+};
+
 /* Common Event Queue Functions */
 
 static ssize_t iommufd_eventq_fops_read(struct file *filep, char __user *buf,
@@ -473,3 +540,65 @@ int iommufd_fault_alloc(struct iommufd_ucmd *ucmd)
 
        return rc;
 }
+
+int iommufd_veventq_alloc(struct iommufd_ucmd *ucmd)
+{
+       struct iommu_veventq_alloc *cmd = ucmd->cmd;
+       struct iommufd_veventq *veventq;
+       struct iommufd_viommu *viommu;
+       int fdno;
+       int rc;
+
+       if (cmd->flags || cmd->type == IOMMU_VEVENTQ_TYPE_DEFAULT)
+               return -EOPNOTSUPP;
+
+       viommu = iommufd_get_viommu(ucmd, cmd->viommu_id);
+       if (IS_ERR(viommu))
+               return PTR_ERR(viommu);
+       down_write(&viommu->veventqs_rwsem);
+
+       if (iommufd_viommu_find_veventq(viommu, cmd->type)) {
+               rc = -EEXIST;
+               goto out_unlock_veventqs;
+       }
+
+       veventq = __iommufd_object_alloc(ucmd->ictx, veventq,
+                                        IOMMUFD_OBJ_VEVENTQ, common.obj);
+       if (IS_ERR(veventq)) {
+               rc = PTR_ERR(veventq);
+               goto out_unlock_veventqs;
+       }
+
+       veventq->type = cmd->type;
+       veventq->viommu = viommu;
+       refcount_inc(&viommu->obj.users);
+       list_add_tail(&veventq->node, &viommu->veventqs);
+
+       fdno = iommufd_eventq_init(&veventq->common, "[iommufd-viommu-event]",
+                                  ucmd->ictx, &iommufd_veventq_ops);
+       if (fdno < 0) {
+               rc = fdno;
+               goto out_abort;
+       }
+
+       cmd->out_veventq_id = veventq->common.obj.id;
+       cmd->out_veventq_fd = fdno;
+
+       rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd));
+       if (rc)
+               goto out_put_fdno;
+
+       iommufd_object_finalize(ucmd->ictx, &veventq->common.obj);
+       fd_install(fdno, veventq->common.filep);
+       goto out_unlock_veventqs;
+
+out_put_fdno:
+       put_unused_fd(fdno);
+       fput(veventq->common.filep);
+out_abort:
+       iommufd_object_abort_and_destroy(ucmd->ictx, &veventq->common.obj);
+out_unlock_veventqs:
+       up_write(&viommu->veventqs_rwsem);
+       iommufd_put_object(ucmd->ictx, &viommu->obj);
+       return rc;
+}
diff --git a/drivers/iommu/iommufd/main.c b/drivers/iommu/iommufd/main.c
index 49be19305d98..985661a4af07 100644
--- a/drivers/iommu/iommufd/main.c
+++ b/drivers/iommu/iommufd/main.c
@@ -310,6 +310,7 @@ union ucmd_buffer {
        struct iommu_vdevice_alloc vdev;
        struct iommu_vfio_ioas vfio_ioas;
        struct iommu_viommu_alloc viommu;
+       struct iommu_veventq_alloc veventq;
 #ifdef CONFIG_IOMMUFD_TEST
        struct iommu_test_cmd test;
 #endif
@@ -367,6 +368,8 @@ static const struct iommufd_ioctl_op iommufd_ioctl_ops[] = {
                 __reserved),
        IOCTL_OP(IOMMU_VIOMMU_ALLOC, iommufd_viommu_alloc_ioctl,
                 struct iommu_viommu_alloc, out_viommu_id),
+       IOCTL_OP(IOMMU_VEVENTQ_ALLOC, iommufd_veventq_alloc,
+                struct iommu_veventq_alloc, out_veventq_fd),
 #ifdef CONFIG_IOMMUFD_TEST
        IOCTL_OP(IOMMU_TEST_CMD, iommufd_test, struct iommu_test_cmd, last),
 #endif
@@ -502,6 +505,10 @@ static const struct iommufd_object_ops 
iommufd_object_ops[] = {
        [IOMMUFD_OBJ_FAULT] = {
                .destroy = iommufd_fault_destroy,
        },
+       [IOMMUFD_OBJ_VEVENTQ] = {
+               .destroy = iommufd_veventq_destroy,
+               .abort = iommufd_veventq_abort,
+       },
        [IOMMUFD_OBJ_VIOMMU] = {
                .destroy = iommufd_viommu_destroy,
        },
diff --git a/drivers/iommu/iommufd/viommu.c b/drivers/iommu/iommufd/viommu.c
index 69b88e8c7c26..01df2b985f02 100644
--- a/drivers/iommu/iommufd/viommu.c
+++ b/drivers/iommu/iommufd/viommu.c
@@ -59,6 +59,8 @@ int iommufd_viommu_alloc_ioctl(struct iommufd_ucmd *ucmd)
        viommu->ictx = ucmd->ictx;
        viommu->hwpt = hwpt_paging;
        refcount_inc(&viommu->hwpt->common.obj.users);
+       INIT_LIST_HEAD(&viommu->veventqs);
+       init_rwsem(&viommu->veventqs_rwsem);
        /*
         * It is the most likely case that a physical IOMMU is unpluggable. A
         * pluggable IOMMU instance (if exists) is responsible for refcounting
-- 
2.43.0


Reply via email to