Add a callback function for uring_cmd to support the io_uring
interface in DRM devices. The drm_uring_cmd() function allows
for dispatching ioctls to DRM devices. If the uring_cmd
callback is specified in the device's file_operations structure,
the SQE cmd_op field can be set to DRM_URING_CMD_IOCTL to issue
ioctls to that device from the ring.

Additionally, create a 16 byte drm_uring_cmd_ioctl struct that
contains traditional ioctl argument values, such as the device
specific ioctl command and an optional argument pointer, that
can be passed directly to the callback function through the SQE's
16 byte command field. This design allows io_uring to handle
ioctls seamlessly for DRM/accel devices without requiring any
updates to existing drivers.

Initial benchmarks on our Qualcomm Cloud AI 100 device show
speedups of 50% in ioctl execution time in the best case for
large batches of ioctls (128) issued together via drm_uring_cmd()
compared to issuing these ioctls directly.

Signed-off-by: Zack McKevitt <zachary.mckev...@oss.qualcomm.com>
---

When issuing a batch of ioctl commands to a device, many context
switches are performed. To minimize this overhead, we propose using
io_uring to submit large batches of ioctl commands to a device all
at once. Instead of calling ioctls directly, io_uring provides a
uring_cmd calback that may be specified within any file or device's
file_operations structure that may be invoked by the ring.

For DRM devices that may need to issue large amounts of ioctls, we
believe performance can be improved by placing uring_cmds to issue
these ioctls in the ring and submitting them all at once.

This patch does not update the file_operations to include the
uring_cmd callback function for all DRM devices. However, this may
be easily done in the future without requiring modifications to
existing drivers. Furthermore, this design could be extended
to define new op codes within the drm_uring_cmd() callback which
would allow for more customized handling, assuming individual
driver support.

This patch was inspired by the talk "io_uring for DRM" at XDC 2024.

Thanks in advance for any feedback.
Zack

 drivers/gpu/drm/drm_ioctl.c | 30 ++++++++++++++++++++++++++++++
 include/drm/drm_accel.h     |  3 ++-
 include/drm/drm_ioctl.h     |  3 +++
 include/uapi/drm/drm.h      | 22 ++++++++++++++++++++++
 4 files changed, 57 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c
index f593dc569d31..c57c21cc16ec 100644
--- a/drivers/gpu/drm/drm_ioctl.c
+++ b/drivers/gpu/drm/drm_ioctl.c
@@ -39,6 +39,7 @@
 #include <drm/drm_file.h>
 #include <drm/drm_ioctl.h>
 #include <drm/drm_print.h>
+#include <uapi/drm/drm.h>
 
 #include "drm_crtc_internal.h"
 #include "drm_internal.h"
@@ -936,3 +937,32 @@ bool drm_ioctl_flags(unsigned int nr, unsigned int *flags)
        return true;
 }
 EXPORT_SYMBOL(drm_ioctl_flags);
+
+/**
+ * drm_uring_cmd - Implement uring_cmd callback for io_uring
+ * @cmd: pointer to io_uring_cmd struct
+ * @issue_flags: flags specified by io_uring's issue implementation
+ *
+ * This function implements the uring_cmd file operation to incorporate
+ * arbitrary io_uring functionality for drm. Currently, it acts as a way
+ * for io_uring to issue ioctls to a drm device, so this function
+ * dispatches ioctls to the standard drm ioctl interface.
+ *
+ * Returns:
+ * Zero on success, negative error code on failure.
+ */
+int drm_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags)
+{
+       switch (cmd->cmd_op) {
+       case DRM_URING_CMD_IOCTL:
+               const struct drm_uring_cmd_ioctl *drm_cmd = 
io_uring_sqe_cmd(cmd->sqe);
+               unsigned int ioctl_cmd = drm_cmd->ioctl_cmd;
+               unsigned long ioctl_arg = drm_cmd->arg;
+               struct file *filp = cmd->file;
+
+               return drm_ioctl(filp, ioctl_cmd, ioctl_arg);
+       default:
+               return -EOPNOTSUPP;
+       }
+}
+EXPORT_SYMBOL(drm_uring_cmd);
diff --git a/include/drm/drm_accel.h b/include/drm/drm_accel.h
index 038ccb02f9a3..4f923e101a06 100644
--- a/include/drm/drm_accel.h
+++ b/include/drm/drm_accel.h
@@ -29,7 +29,8 @@
        .read           = drm_read,\
        .llseek         = noop_llseek, \
        .mmap           = drm_gem_mmap, \
-       .fop_flags      = FOP_UNSIGNED_OFFSET
+       .fop_flags      = FOP_UNSIGNED_OFFSET, \
+       .uring_cmd      = drm_uring_cmd
 
 /**
  * DEFINE_DRM_ACCEL_FOPS() - macro to generate file operations for 
accelerators drivers
diff --git a/include/drm/drm_ioctl.h b/include/drm/drm_ioctl.h
index 171760b6c4a1..cbb474254e1c 100644
--- a/include/drm/drm_ioctl.h
+++ b/include/drm/drm_ioctl.h
@@ -34,6 +34,7 @@
 
 #include <linux/types.h>
 #include <linux/bitops.h>
+#include <linux/io_uring/cmd.h>
 
 #include <asm/ioctl.h>
 
@@ -171,4 +172,6 @@ int drm_noop(struct drm_device *dev, void *data,
 int drm_invalid_op(struct drm_device *dev, void *data,
                   struct drm_file *file_priv);
 
+int drm_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags);
+
 #endif /* _DRM_IOCTL_H_ */
diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
index e63a71d3c607..9316470f1286 100644
--- a/include/uapi/drm/drm.h
+++ b/include/uapi/drm/drm.h
@@ -1385,6 +1385,28 @@ struct drm_event_crtc_sequence {
        __u64                   sequence;
 };
 
+/**
+ * enum drm_uring_cmd_op - Opcodes for io_uring based drm_uring_cmd callback
+ * DRM_URING_CMD_IOCTL - issue DRM ioctl from drm_uring_cmd
+ */
+enum drm_uring_cmd_op {
+       DRM_URING_CMD_IOCTL = 1,
+};
+
+/**
+ * struct drm_uring_cmd_ioctl - arguments for DRM_URING_CMD_IOCTL
+ */
+struct drm_uring_cmd_ioctl {
+       /* Device specific ioctl number */
+       __u32 ioctl_cmd;
+
+       /* Pad to 16 byte SQE cmd */
+       __u32 pad;
+
+       /* Opaque ioctl argument pointer */
+       __u64 arg;
+};
+
 /* typedef area */
 #ifndef __KERNEL__
 typedef struct drm_clip_rect drm_clip_rect_t;
-- 
2.34.1

Reply via email to