On 6/11/25 16:00, Tvrtko Ursulin wrote: > Running the Cyberpunk 2077 benchmark we can observe that the lookup helper > is relatively hot, but the 97% of the calls are for a single object. (~3% > for two points, and never more than three points. While a more trivial > workload like vkmark under Plasma is even more skewed to single point > lookups.) > > Therefore lets add a fast path to bypass the kmalloc_array/kfree and use a > pre-allocated stack array for those cases.
Have you considered using memdup_user()? That's using a separate bucket IIRC and might give similar performance. If that is still not sufficient I'm really wondering if we shouldn't have a macro for doing this. It's a really common use case as far as I can see. Regards, Christian. > > Signed-off-by: Tvrtko Ursulin <tvrtko.ursu...@igalia.com> > Reviewed-by: Maíra Canal <mca...@igalia.com> > --- > v2: > * Added comments describing how the fast path arrays were sized. > * Make container freeing criteria clearer by using a boolean. > --- > drivers/gpu/drm/drm_syncobj.c | 56 +++++++++++++++++++++++++++-------- > 1 file changed, 44 insertions(+), 12 deletions(-) > > diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c > index be5905dca87f..65c301852f0d 100644 > --- a/drivers/gpu/drm/drm_syncobj.c > +++ b/drivers/gpu/drm/drm_syncobj.c > @@ -1259,6 +1259,8 @@ EXPORT_SYMBOL(drm_timeout_abs_to_jiffies); > static int drm_syncobj_array_find(struct drm_file *file_private, > u32 __user *handles, > uint32_t count, > + struct drm_syncobj **stack_syncobjs, > + u32 stack_count, > struct drm_syncobj ***syncobjs_out) > { > struct drm_syncobj **syncobjs; > @@ -1268,9 +1270,13 @@ static int drm_syncobj_array_find(struct drm_file > *file_private, > if (!access_ok(handles, count * sizeof(*handles))) > return -EFAULT; > > - syncobjs = kmalloc_array(count, sizeof(*syncobjs), GFP_KERNEL); > - if (!syncobjs) > - return -ENOMEM; > + if (count > stack_count) { > + syncobjs = kmalloc_array(count, sizeof(*syncobjs), GFP_KERNEL); > + if (!syncobjs) > + return -ENOMEM; > + } else { > + syncobjs = stack_syncobjs; > + } > > for (i = 0; i < count; i++) { > u32 handle; > @@ -1292,25 +1298,31 @@ static int drm_syncobj_array_find(struct drm_file > *file_private, > err_put_syncobjs: > while (i-- > 0) > drm_syncobj_put(syncobjs[i]); > - kfree(syncobjs); > + > + if (syncobjs != stack_syncobjs) > + kfree(syncobjs); > > return ret; > } > > static void drm_syncobj_array_free(struct drm_syncobj **syncobjs, > - uint32_t count) > + uint32_t count, > + bool free_container) > { > uint32_t i; > > for (i = 0; i < count; i++) > drm_syncobj_put(syncobjs[i]); > - kfree(syncobjs); > + > + if (free_container) > + kfree(syncobjs); > } > > int > drm_syncobj_wait_ioctl(struct drm_device *dev, void *data, > struct drm_file *file_private) > { > + struct drm_syncobj *stack_syncobjs[DRM_SYNCOBJ_FAST_PATH_ENTRIES]; > struct drm_syncobj_wait *args = data; > ktime_t deadline, *pdeadline = NULL; > u32 count = args->count_handles; > @@ -1336,6 +1348,8 @@ drm_syncobj_wait_ioctl(struct drm_device *dev, void > *data, > ret = drm_syncobj_array_find(file_private, > u64_to_user_ptr(args->handles), > count, > + stack_syncobjs, > + ARRAY_SIZE(stack_syncobjs), > &syncobjs); > if (ret < 0) > return ret; > @@ -1354,7 +1368,7 @@ drm_syncobj_wait_ioctl(struct drm_device *dev, void > *data, > &first, > pdeadline); > > - drm_syncobj_array_free(syncobjs, count); > + drm_syncobj_array_free(syncobjs, count, syncobjs != stack_syncobjs); > > if (timeout < 0) > return timeout; > @@ -1368,6 +1382,7 @@ int > drm_syncobj_timeline_wait_ioctl(struct drm_device *dev, void *data, > struct drm_file *file_private) > { > + struct drm_syncobj *stack_syncobjs[DRM_SYNCOBJ_FAST_PATH_ENTRIES]; > struct drm_syncobj_timeline_wait *args = data; > ktime_t deadline, *pdeadline = NULL; > u32 count = args->count_handles; > @@ -1394,6 +1409,8 @@ drm_syncobj_timeline_wait_ioctl(struct drm_device *dev, > void *data, > ret = drm_syncobj_array_find(file_private, > u64_to_user_ptr(args->handles), > count, > + stack_syncobjs, > + ARRAY_SIZE(stack_syncobjs), > &syncobjs); > if (ret < 0) > return ret; > @@ -1412,7 +1429,7 @@ drm_syncobj_timeline_wait_ioctl(struct drm_device *dev, > void *data, > &first, > pdeadline); > > - drm_syncobj_array_free(syncobjs, count); > + drm_syncobj_array_free(syncobjs, count, syncobjs != stack_syncobjs); > > if (timeout < 0) > return timeout; > @@ -1529,6 +1546,7 @@ int > drm_syncobj_reset_ioctl(struct drm_device *dev, void *data, > struct drm_file *file_private) > { > + struct drm_syncobj *stack_syncobjs[DRM_SYNCOBJ_FAST_PATH_ENTRIES]; > struct drm_syncobj_array *args = data; > struct drm_syncobj **syncobjs; > uint32_t i; > @@ -1546,6 +1564,8 @@ drm_syncobj_reset_ioctl(struct drm_device *dev, void > *data, > ret = drm_syncobj_array_find(file_private, > u64_to_user_ptr(args->handles), > args->count_handles, > + stack_syncobjs, > + ARRAY_SIZE(stack_syncobjs), > &syncobjs); > if (ret < 0) > return ret; > @@ -1553,7 +1573,8 @@ drm_syncobj_reset_ioctl(struct drm_device *dev, void > *data, > for (i = 0; i < args->count_handles; i++) > drm_syncobj_replace_fence(syncobjs[i], NULL); > > - drm_syncobj_array_free(syncobjs, args->count_handles); > + drm_syncobj_array_free(syncobjs, args->count_handles, > + syncobjs != stack_syncobjs); > > return 0; > } > @@ -1562,6 +1583,7 @@ int > drm_syncobj_signal_ioctl(struct drm_device *dev, void *data, > struct drm_file *file_private) > { > + struct drm_syncobj *stack_syncobjs[DRM_SYNCOBJ_FAST_PATH_ENTRIES]; > struct drm_syncobj_array *args = data; > struct drm_syncobj **syncobjs; > uint32_t i; > @@ -1579,6 +1601,8 @@ drm_syncobj_signal_ioctl(struct drm_device *dev, void > *data, > ret = drm_syncobj_array_find(file_private, > u64_to_user_ptr(args->handles), > args->count_handles, > + stack_syncobjs, > + ARRAY_SIZE(stack_syncobjs), > &syncobjs); > if (ret < 0) > return ret; > @@ -1589,7 +1613,8 @@ drm_syncobj_signal_ioctl(struct drm_device *dev, void > *data, > break; > } > > - drm_syncobj_array_free(syncobjs, args->count_handles); > + drm_syncobj_array_free(syncobjs, args->count_handles, > + syncobjs != stack_syncobjs); > > return ret; > } > @@ -1598,6 +1623,7 @@ int > drm_syncobj_timeline_signal_ioctl(struct drm_device *dev, void *data, > struct drm_file *file_private) > { > + struct drm_syncobj *stack_syncobjs[DRM_SYNCOBJ_FAST_PATH_ENTRIES]; > struct drm_syncobj_timeline_array *args = data; > uint64_t __user *points = u64_to_user_ptr(args->points); > uint32_t i, j, count = args->count_handles; > @@ -1617,6 +1643,8 @@ drm_syncobj_timeline_signal_ioctl(struct drm_device > *dev, void *data, > ret = drm_syncobj_array_find(file_private, > u64_to_user_ptr(args->handles), > count, > + stack_syncobjs, > + ARRAY_SIZE(stack_syncobjs), > &syncobjs); > if (ret < 0) > return ret; > @@ -1653,7 +1681,7 @@ drm_syncobj_timeline_signal_ioctl(struct drm_device > *dev, void *data, > err_chains: > kfree(chains); > out: > - drm_syncobj_array_free(syncobjs, count); > + drm_syncobj_array_free(syncobjs, count, syncobjs != stack_syncobjs); > > return ret; > } > @@ -1661,6 +1689,7 @@ drm_syncobj_timeline_signal_ioctl(struct drm_device > *dev, void *data, > int drm_syncobj_query_ioctl(struct drm_device *dev, void *data, > struct drm_file *file_private) > { > + struct drm_syncobj *stack_syncobjs[DRM_SYNCOBJ_FAST_PATH_ENTRIES]; > struct drm_syncobj_timeline_array *args = data; > struct drm_syncobj **syncobjs; > uint64_t __user *points = u64_to_user_ptr(args->points); > @@ -1679,6 +1708,8 @@ int drm_syncobj_query_ioctl(struct drm_device *dev, > void *data, > ret = drm_syncobj_array_find(file_private, > u64_to_user_ptr(args->handles), > args->count_handles, > + stack_syncobjs, > + ARRAY_SIZE(stack_syncobjs), > &syncobjs); > if (ret < 0) > return ret; > @@ -1722,7 +1753,8 @@ int drm_syncobj_query_ioctl(struct drm_device *dev, > void *data, > if (ret) > break; > } > - drm_syncobj_array_free(syncobjs, args->count_handles); > + drm_syncobj_array_free(syncobjs, args->count_handles, > + syncobjs != stack_syncobjs); > > return ret; > }