On 24/03/2025 23:06, Maíra Canal wrote:
Hi Tvrtko,

Some nits inline, mostly personal comments. In any case,

Reviewed-by: Maíra Canal <mca...@igalia.com>


On 18/03/25 12:54, Tvrtko Ursulin wrote:
Running the Cyberpunk 2077 benchmark we can observe that the lookup helper
is relatively hot, but the 97% of the calls are for a single object. (~3%
for two points, and never more than three points. While a more trivial
workload like vkmark under Plasma is even more skewed to single point
lookups.)

Therefore lets add a fast path to bypass the kmalloc_array/kfree and use a
pre-allocated stack array for those cases.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursu...@igalia.com>
---
  drivers/gpu/drm/drm_syncobj.c | 53 +++++++++++++++++++++++++++--------
  1 file changed, 41 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/ drm_syncobj.c
index 94932b89298f..233bdef53c87 100644
--- a/drivers/gpu/drm/drm_syncobj.c
+++ b/drivers/gpu/drm/drm_syncobj.c
@@ -1223,6 +1223,8 @@ EXPORT_SYMBOL(drm_timeout_abs_to_jiffies);
  static int drm_syncobj_array_find(struct drm_file *file_private,
                    u32 __user *handles,
                    uint32_t count,
+                  struct drm_syncobj **stack_syncobjs,
+                  u32 stack_count,
                    struct drm_syncobj ***syncobjs_out)
  {
      struct drm_syncobj **syncobjs;
@@ -1232,9 +1234,13 @@ static int drm_syncobj_array_find(struct drm_file *file_private,
      if (!access_ok(handles, count * sizeof(*handles)))
          return -EFAULT;
-    syncobjs = kmalloc_array(count, sizeof(*syncobjs), GFP_KERNEL);
-    if (!syncobjs)
-        return -ENOMEM;
+    if (count > stack_count) {

I believe it's worth adding a comment mentioning that using the stack
syncobj is a fast-path that covers most cases.

Yep. But it didn't feel like here is the place so I added comments to where callers size the arrays. That however means there are two duplicated comments. Okay with you?

+        syncobjs = kmalloc_array(count, sizeof(*syncobjs), GFP_KERNEL);
+        if (!syncobjs)
+            return -ENOMEM;
+    } else {
+        syncobjs = stack_syncobjs;
+    }
      for (i = 0; i < count; i++) {
          u64 handle;
@@ -1260,25 +1266,31 @@ static int drm_syncobj_array_find(struct drm_file *file_private,
              drm_syncobj_put(syncobjs[i]);
          i--;
      }
-    kfree(syncobjs);
+
+    if (syncobjs != stack_syncobjs)

Again, I have a slight preference to make `syncobjs = NULL` and avoid
this if condition. But it's just a personal preference.

Pending clarifications from the other patch.


+        kfree(syncobjs);
      return ret;
  }
  static void drm_syncobj_array_free(struct drm_syncobj **syncobjs,
-                   uint32_t count)
+                   uint32_t count,
+                   struct drm_syncobj **stack_syncobjs)

IMO, I think the order `syncobjs, stack_syncobjs, count` is a bit more
intuitive.

But count is not directly related to the size of the stack array in this function. I could make it a boolean perhaps like this:

static void drm_syncobj_array_free(struct drm_syncobj **syncobjs,
                                   uint32_t count,
                                   bool free_array)

And then in the callers:

drm_syncobj_array_free(syncobjs, count, syncobjs != stack_syncobjs);

Would that be clearer?


  {
      uint32_t i;
      for (i = 0; i < count; i++)
          drm_syncobj_put(syncobjs[i]);
-    kfree(syncobjs);
+
+    if (syncobjs != stack_syncobjs)
+        kfree(syncobjs);
  }
  int
  drm_syncobj_wait_ioctl(struct drm_device *dev, void *data,
                 struct drm_file *file_private)
  {
+    struct drm_syncobj *stack_syncobjs[4];
      struct drm_syncobj_wait *args = data;
      ktime_t deadline, *pdeadline = NULL;
      u32 count = args->count_handles;
@@ -1304,6 +1316,8 @@ drm_syncobj_wait_ioctl(struct drm_device *dev, void *data,
      ret = drm_syncobj_array_find(file_private,
                       u64_to_user_ptr(args->handles),
                       count,
+                     stack_syncobjs,
+                     ARRAY_SIZE(stack_syncobjs),
                       &syncobjs);
      if (ret < 0)
          return ret;
@@ -1321,7 +1335,7 @@ drm_syncobj_wait_ioctl(struct drm_device *dev, void *data,
                           &first,
                           pdeadline);
-    drm_syncobj_array_free(syncobjs, count);
+    drm_syncobj_array_free(syncobjs, count, stack_syncobjs);
      if (timeout < 0)
          return timeout;
@@ -1336,6 +1350,7 @@ drm_syncobj_timeline_wait_ioctl(struct drm_device *dev, void *data,
                  struct drm_file *file_private)
  {
      struct drm_syncobj_timeline_wait *args = data;
+    struct drm_syncobj *stack_syncobjs[4];

Zero initialize it?

Do you see it is required?

Regards,

Tvrtko

      ktime_t deadline, *pdeadline = NULL;
      u32 count = args->count_handles;
      struct drm_syncobj **syncobjs;
@@ -1361,6 +1376,8 @@ drm_syncobj_timeline_wait_ioctl(struct drm_device *dev, void *data,
      ret = drm_syncobj_array_find(file_private,
                       u64_to_user_ptr(args->handles),
                       count,
+                     stack_syncobjs,
+                     ARRAY_SIZE(stack_syncobjs),
                       &syncobjs);
      if (ret < 0)
          return ret;
@@ -1378,7 +1395,7 @@ drm_syncobj_timeline_wait_ioctl(struct drm_device *dev, void *data,
                           &first,
                           pdeadline);
-    drm_syncobj_array_free(syncobjs, count);
+    drm_syncobj_array_free(syncobjs, count, stack_syncobjs);
      if (timeout < 0)
          return timeout;
@@ -1496,6 +1513,7 @@ drm_syncobj_reset_ioctl(struct drm_device *dev, void *data,
              struct drm_file *file_private)
  {
      struct drm_syncobj_array *args = data;
+    struct drm_syncobj *stack_syncobjs[4];
      struct drm_syncobj **syncobjs;
      uint32_t i;
      int ret;
@@ -1512,6 +1530,8 @@ drm_syncobj_reset_ioctl(struct drm_device *dev, void *data,
      ret = drm_syncobj_array_find(file_private,
                       u64_to_user_ptr(args->handles),
                       args->count_handles,
+                     stack_syncobjs,
+                     ARRAY_SIZE(stack_syncobjs),
                       &syncobjs);
      if (ret < 0)
          return ret;
@@ -1519,7 +1539,7 @@ drm_syncobj_reset_ioctl(struct drm_device *dev, void *data,
      for (i = 0; i < args->count_handles; i++)
          drm_syncobj_replace_fence(syncobjs[i], NULL);
-    drm_syncobj_array_free(syncobjs, args->count_handles);
+    drm_syncobj_array_free(syncobjs, args->count_handles, stack_syncobjs);
      return 0;
  }
@@ -1529,6 +1549,7 @@ drm_syncobj_signal_ioctl(struct drm_device *dev, void *data,
               struct drm_file *file_private)
  {
      struct drm_syncobj_array *args = data;
+    struct drm_syncobj *stack_syncobjs[4];
      struct drm_syncobj **syncobjs;
      uint32_t i;
      int ret;
@@ -1545,6 +1566,8 @@ drm_syncobj_signal_ioctl(struct drm_device *dev, void *data,
      ret = drm_syncobj_array_find(file_private,
                       u64_to_user_ptr(args->handles),
                       args->count_handles,
+                     stack_syncobjs,
+                     ARRAY_SIZE(stack_syncobjs),
                       &syncobjs);
      if (ret < 0)
          return ret;
@@ -1555,7 +1578,7 @@ drm_syncobj_signal_ioctl(struct drm_device *dev, void *data,
              break;
      }
-    drm_syncobj_array_free(syncobjs, args->count_handles);
+    drm_syncobj_array_free(syncobjs, args->count_handles, stack_syncobjs);
      return ret;
  }
@@ -1567,6 +1590,7 @@ drm_syncobj_timeline_signal_ioctl(struct drm_device *dev, void *data,
      struct drm_syncobj_timeline_array *args = data;
      uint64_t __user *points = u64_to_user_ptr(args->points);
      uint32_t i, j, count = args->count_handles;
+    struct drm_syncobj *stack_syncobjs[4];
      struct drm_syncobj **syncobjs;
      struct dma_fence_chain **chains;
      int ret;
@@ -1586,6 +1610,8 @@ drm_syncobj_timeline_signal_ioctl(struct drm_device *dev, void *data,
      ret = drm_syncobj_array_find(file_private,
                       u64_to_user_ptr(args->handles),
                       count,
+                     stack_syncobjs,
+                     ARRAY_SIZE(stack_syncobjs),
                       &syncobjs);
      if (ret < 0)
          return ret;
@@ -1622,7 +1648,7 @@ drm_syncobj_timeline_signal_ioctl(struct drm_device *dev, void *data,
  err_chains:
      kfree(chains);
  out:
-    drm_syncobj_array_free(syncobjs, count);
+    drm_syncobj_array_free(syncobjs, count, stack_syncobjs);
      return ret;
  }
@@ -1631,6 +1657,7 @@ int drm_syncobj_query_ioctl(struct drm_device *dev, void *data,
                  struct drm_file *file_private)
  {
      struct drm_syncobj_timeline_array *args = data;
+    struct drm_syncobj *stack_syncobjs[4];
      struct drm_syncobj **syncobjs;
      uint64_t __user *points = u64_to_user_ptr(args->points);
      uint32_t i;
@@ -1651,6 +1678,8 @@ int drm_syncobj_query_ioctl(struct drm_device *dev, void *data,
      ret = drm_syncobj_array_find(file_private,
                       u64_to_user_ptr(args->handles),
                       args->count_handles,
+                     stack_syncobjs,
+                     ARRAY_SIZE(stack_syncobjs),
                       &syncobjs);
      if (ret < 0)
          return ret;
@@ -1694,7 +1723,7 @@ int drm_syncobj_query_ioctl(struct drm_device *dev, void *data,
              break;
          }
      }
-    drm_syncobj_array_free(syncobjs, args->count_handles);
+    drm_syncobj_array_free(syncobjs, args->count_handles, stack_syncobjs);
      return ret;
  }


Reply via email to