On 27-02-2026 02:20 pm, Jesse.Zhang wrote:
The amdgpu_userq_signal_ioctl function was triggering kernel page faults
due to missing null pointer checks when accessing gobj_read/gobj_write
arrays, and improper handling of memory allocation for these arrays.

The crash stack showed the failure originated from the ioctl path:
[   64.977695] Call Trace:
[   64.977696]  <TASK>
[   64.977700]  amdgpu_userq_signal_ioctl+0x8e4/0xda0 [amdgpu]
[   64.977830]  ? tty_ldisc_deref+0x1a/0x20
[   64.977834]  ? __pfx_amdgpu_userq_signal_ioctl+0x10/0x10 [amdgpu]
[   64.977934]  drm_ioctl_kernel+0xab/0x110 [drm]
[   64.977955]  ? __pfx_amdgpu_userq_signal_ioctl+0x10/0x10 [amdgpu]
[   64.978071]  drm_ioctl+0x2cb/0x5a0 [drm]
[   64.978088]  ? ttm_bo_vm_fault_reserved+0x1ef/0x410 [ttm]
[   64.978093]  amdgpu_drm_ioctl+0x4f/0x90 [amdgpu]
[   64.978179]  __x64_sys_ioctl+0x9e/0xf0
[   64.978182]  x64_sys_call+0x1274/0x2190
[   64.978185]  do_syscall_64+0x74/0x950
[   64.978189]  ? ___pte_offset_map+0x20/0x170
[   64.978191]  ? __handle_mm_fault+0x986/0xfb0
[   64.978194]  ? count_memcg_events+0xe7/0x1e0
[   64.978197]  ? handle_mm_fault+0x1cc/0x2b0
[   64.978199]  ? do_user_addr_fault+0x394/0x8a0
[   64.978202]  ? irqentry_exit_to_user_mode+0x2a/0x1e0
[   64.978205]  ? irqentry_exit+0x3f/0x50
[   64.978206]  ? exc_page_fault+0x97/0x190
[   64.978208]  entry_SYSCALL_64_after_hwframe+0x76/0x7e
[   64.978210] RIP: 0033:0x7f3c08b24ded

Fixes: fd4fde1df18b ("drm/amdgpu/userq: Use drm_gem_objects_lookup in 
amdgpu_userq_signal_ioctl")

V2: initialize gobj_write

Signed-off-by: Jesse Zhang<[email protected]>
---
  .../gpu/drm/amd/amdgpu/amdgpu_userq_fence.c   | 22 +++++++++++++------
  1 file changed, 15 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
index 3c30512a6266..af934374df94 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
@@ -467,7 +467,7 @@ int amdgpu_userq_signal_ioctl(struct drm_device *dev, void 
*data,
        const unsigned int num_read_bo_handles = args->num_bo_read_handles;
        struct amdgpu_fpriv *fpriv = filp->driver_priv;
        struct amdgpu_userq_mgr *userq_mgr = &fpriv->userq_mgr;
-       struct drm_gem_object **gobj_write, **gobj_read;
+       struct drm_gem_object **gobj_write = NULL, **gobj_read = NULL;

We don’t need to set these to NULL anymore, drm_gem_objects_lookup already check and set it to NULL if num objects is 0. I Don’t think this patch is needed anymore.

I have already dropped such patch few days back before i pulled in drm_gem_objects_lookup fix patch from drm-misc. I think u might have not pulled in recent code changes.

Regards
Sunil khatri


        u32 *syncobj_handles, num_syncobj_handles;
        struct amdgpu_userq_fence *userq_fence;
        struct amdgpu_usermode_queue *queue;
@@ -597,13 +597,21 @@ int amdgpu_userq_signal_ioctl(struct drm_device *dev, 
void *data,
  exec_fini:
        drm_exec_fini(&exec);
  put_gobj_write:
-       for (i = 0; i < num_write_bo_handles; i++)
-               drm_gem_object_put(gobj_write[i]);
-       kfree(gobj_write);
+       for (i = 0; i < num_write_bo_handles; i++) {
+               if (gobj_write)
+                       drm_gem_object_put(gobj_write[i]);
+       }
+
+       if (gobj_write)
+               kfree(gobj_write);
  put_gobj_read:
-       for (i = 0; i < num_read_bo_handles; i++)
-               drm_gem_object_put(gobj_read[i]);
-       kfree(gobj_read);
+       for (i = 0; i < num_read_bo_handles; i++) {
+               if (gobj_read)
+                       drm_gem_object_put(gobj_read[i]);
+       }
+
+       if (gobj_read)
+               kfree(gobj_read);
  free_syncobj:
        while (entry-- > 0)
                if (syncobj[entry])

Reply via email to