On 24/03/2025 23:06, Maíra Canal wrote:
Hi Tvrtko, Some nits inline, mostly personal comments. In any case, Reviewed-by: Maíra Canal <mca...@igalia.com> On 18/03/25 12:54, Tvrtko Ursulin wrote:Running the Cyberpunk 2077 benchmark we can observe that the lookup helperis relatively hot, but the 97% of the calls are for a single object. (~3% for two points, and never more than three points. While a more trivial workload like vkmark under Plasma is even more skewed to single point lookups.)Therefore lets add a fast path to bypass the kmalloc_array/kfree and use apre-allocated stack array for those cases. Signed-off-by: Tvrtko Ursulin <tvrtko.ursu...@igalia.com> --- drivers/gpu/drm/drm_syncobj.c | 53 +++++++++++++++++++++++++++-------- 1 file changed, 41 insertions(+), 12 deletions(-)diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/ drm_syncobj.cindex 94932b89298f..233bdef53c87 100644 --- a/drivers/gpu/drm/drm_syncobj.c +++ b/drivers/gpu/drm/drm_syncobj.c @@ -1223,6 +1223,8 @@ EXPORT_SYMBOL(drm_timeout_abs_to_jiffies); static int drm_syncobj_array_find(struct drm_file *file_private, u32 __user *handles, uint32_t count, + struct drm_syncobj **stack_syncobjs, + u32 stack_count, struct drm_syncobj ***syncobjs_out) { struct drm_syncobj **syncobjs;@@ -1232,9 +1234,13 @@ static int drm_syncobj_array_find(struct drm_file *file_private,if (!access_ok(handles, count * sizeof(*handles))) return -EFAULT; - syncobjs = kmalloc_array(count, sizeof(*syncobjs), GFP_KERNEL); - if (!syncobjs) - return -ENOMEM; + if (count > stack_count) {I believe it's worth adding a comment mentioning that using the stack syncobj is a fast-path that covers most cases.
Yep. But it didn't feel like here is the place so I added comments to where callers size the arrays. That however means there are two duplicated comments. Okay with you?
+ syncobjs = kmalloc_array(count, sizeof(*syncobjs), GFP_KERNEL); + if (!syncobjs) + return -ENOMEM; + } else { + syncobjs = stack_syncobjs; + } for (i = 0; i < count; i++) { u64 handle;@@ -1260,25 +1266,31 @@ static int drm_syncobj_array_find(struct drm_file *file_private,drm_syncobj_put(syncobjs[i]); i--; } - kfree(syncobjs); + + if (syncobjs != stack_syncobjs)Again, I have a slight preference to make `syncobjs = NULL` and avoid this if condition. But it's just a personal preference.
Pending clarifications from the other patch.
+ kfree(syncobjs); return ret; } static void drm_syncobj_array_free(struct drm_syncobj **syncobjs, - uint32_t count) + uint32_t count, + struct drm_syncobj **stack_syncobjs)IMO, I think the order `syncobjs, stack_syncobjs, count` is a bit more intuitive.
But count is not directly related to the size of the stack array in this function. I could make it a boolean perhaps like this:
static void drm_syncobj_array_free(struct drm_syncobj **syncobjs, uint32_t count, bool free_array) And then in the callers: drm_syncobj_array_free(syncobjs, count, syncobjs != stack_syncobjs); Would that be clearer?
{ uint32_t i; for (i = 0; i < count; i++) drm_syncobj_put(syncobjs[i]); - kfree(syncobjs); + + if (syncobjs != stack_syncobjs) + kfree(syncobjs); } int drm_syncobj_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file_private) { + struct drm_syncobj *stack_syncobjs[4]; struct drm_syncobj_wait *args = data; ktime_t deadline, *pdeadline = NULL; u32 count = args->count_handles;@@ -1304,6 +1316,8 @@ drm_syncobj_wait_ioctl(struct drm_device *dev, void *data,ret = drm_syncobj_array_find(file_private, u64_to_user_ptr(args->handles), count, + stack_syncobjs, + ARRAY_SIZE(stack_syncobjs), &syncobjs); if (ret < 0) return ret;@@ -1321,7 +1335,7 @@ drm_syncobj_wait_ioctl(struct drm_device *dev, void *data,&first, pdeadline); - drm_syncobj_array_free(syncobjs, count); + drm_syncobj_array_free(syncobjs, count, stack_syncobjs); if (timeout < 0) return timeout;@@ -1336,6 +1350,7 @@ drm_syncobj_timeline_wait_ioctl(struct drm_device *dev, void *data,struct drm_file *file_private) { struct drm_syncobj_timeline_wait *args = data; + struct drm_syncobj *stack_syncobjs[4];Zero initialize it?
Do you see it is required? Regards, Tvrtko
ktime_t deadline, *pdeadline = NULL; u32 count = args->count_handles; struct drm_syncobj **syncobjs;@@ -1361,6 +1376,8 @@ drm_syncobj_timeline_wait_ioctl(struct drm_device *dev, void *data,ret = drm_syncobj_array_find(file_private, u64_to_user_ptr(args->handles), count, + stack_syncobjs, + ARRAY_SIZE(stack_syncobjs), &syncobjs); if (ret < 0) return ret;@@ -1378,7 +1395,7 @@ drm_syncobj_timeline_wait_ioctl(struct drm_device *dev, void *data,&first, pdeadline); - drm_syncobj_array_free(syncobjs, count); + drm_syncobj_array_free(syncobjs, count, stack_syncobjs); if (timeout < 0) return timeout;@@ -1496,6 +1513,7 @@ drm_syncobj_reset_ioctl(struct drm_device *dev, void *data,struct drm_file *file_private) { struct drm_syncobj_array *args = data; + struct drm_syncobj *stack_syncobjs[4]; struct drm_syncobj **syncobjs; uint32_t i; int ret;@@ -1512,6 +1530,8 @@ drm_syncobj_reset_ioctl(struct drm_device *dev, void *data,ret = drm_syncobj_array_find(file_private, u64_to_user_ptr(args->handles), args->count_handles, + stack_syncobjs, + ARRAY_SIZE(stack_syncobjs), &syncobjs); if (ret < 0) return ret;@@ -1519,7 +1539,7 @@ drm_syncobj_reset_ioctl(struct drm_device *dev, void *data,for (i = 0; i < args->count_handles; i++) drm_syncobj_replace_fence(syncobjs[i], NULL); - drm_syncobj_array_free(syncobjs, args->count_handles);+ drm_syncobj_array_free(syncobjs, args->count_handles, stack_syncobjs);return 0; }@@ -1529,6 +1549,7 @@ drm_syncobj_signal_ioctl(struct drm_device *dev, void *data,struct drm_file *file_private) { struct drm_syncobj_array *args = data; + struct drm_syncobj *stack_syncobjs[4]; struct drm_syncobj **syncobjs; uint32_t i; int ret;@@ -1545,6 +1566,8 @@ drm_syncobj_signal_ioctl(struct drm_device *dev, void *data,ret = drm_syncobj_array_find(file_private, u64_to_user_ptr(args->handles), args->count_handles, + stack_syncobjs, + ARRAY_SIZE(stack_syncobjs), &syncobjs); if (ret < 0) return ret;@@ -1555,7 +1578,7 @@ drm_syncobj_signal_ioctl(struct drm_device *dev, void *data,break; } - drm_syncobj_array_free(syncobjs, args->count_handles);+ drm_syncobj_array_free(syncobjs, args->count_handles, stack_syncobjs);return ret; }@@ -1567,6 +1590,7 @@ drm_syncobj_timeline_signal_ioctl(struct drm_device *dev, void *data,struct drm_syncobj_timeline_array *args = data; uint64_t __user *points = u64_to_user_ptr(args->points); uint32_t i, j, count = args->count_handles; + struct drm_syncobj *stack_syncobjs[4]; struct drm_syncobj **syncobjs; struct dma_fence_chain **chains; int ret;@@ -1586,6 +1610,8 @@ drm_syncobj_timeline_signal_ioctl(struct drm_device *dev, void *data,ret = drm_syncobj_array_find(file_private, u64_to_user_ptr(args->handles), count, + stack_syncobjs, + ARRAY_SIZE(stack_syncobjs), &syncobjs); if (ret < 0) return ret;@@ -1622,7 +1648,7 @@ drm_syncobj_timeline_signal_ioctl(struct drm_device *dev, void *data,err_chains: kfree(chains); out: - drm_syncobj_array_free(syncobjs, count); + drm_syncobj_array_free(syncobjs, count, stack_syncobjs); return ret; }@@ -1631,6 +1657,7 @@ int drm_syncobj_query_ioctl(struct drm_device *dev, void *data,struct drm_file *file_private) { struct drm_syncobj_timeline_array *args = data; + struct drm_syncobj *stack_syncobjs[4]; struct drm_syncobj **syncobjs; uint64_t __user *points = u64_to_user_ptr(args->points); uint32_t i;@@ -1651,6 +1678,8 @@ int drm_syncobj_query_ioctl(struct drm_device *dev, void *data,ret = drm_syncobj_array_find(file_private, u64_to_user_ptr(args->handles), args->count_handles, + stack_syncobjs, + ARRAY_SIZE(stack_syncobjs), &syncobjs); if (ret < 0) return ret;@@ -1694,7 +1723,7 @@ int drm_syncobj_query_ioctl(struct drm_device *dev, void *data,break; } } - drm_syncobj_array_free(syncobjs, args->count_handles);+ drm_syncobj_array_free(syncobjs, args->count_handles, stack_syncobjs);return ret; }