On 4/9/25 07:48, Arunpravin Paneer Selvam wrote: > The ttm_bo_pin and ttm_bo_unpin warnings are resolved by moving the > doorbell bo reserve up before pin/unpin. > > WARNING: CPU: 11 PID: 1818 at drivers/gpu/drm/ttm/ttm_bo.c:592 > ttm_bo_pin+0x1f6/0x270 [ttm] > [ +0.000277] CPU: 11 UID: 1000 PID: 1818 Comm: Xwayland Tainted: G W > 6.12.0+ #15 > [ +0.000006] Tainted: [W]=WARN > [ +0.000004] Hardware name: ASUS System Product Name/TUF GAMING B650-PLUS, > BIOS 3072 12/20/2024 > [ +0.000004] RIP: 0010:ttm_bo_pin+0x1f6/0x270 [ttm] > [ +0.000005] RSP: 0018:ffff88846ca879d0 EFLAGS: 00010246 > [ +0.000007] RAX: 0000000000000000 RBX: ffff88810b7ca848 RCX: > 0000000000000000 > [ +0.000004] RDX: 0000000000000000 RSI: 0000000000000000 RDI: > 0000000000000000 > [ +0.000005] RBP: ffff88846ca879e8 R08: 0000000000000000 R09: > 0000000000000000 > [ +0.000004] R10: 0000000000000000 R11: 0000000000000000 R12: > ffff88810b7ca848 > [ +0.000004] R13: ffff88846c666250 R14: 1ffff1108d950f44 R15: > ffff88846ca87aa0 > [ +0.000005] FS: 00007c45ff436d00(0000) GS:ffff888409580000(0000) > knlGS:0000000000000000 > [ +0.000004] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 > [ +0.000005] CR2: 00005b0c142a60e0 CR3: 000000012ce5a000 CR4: > 0000000000f50ef0 > [ +0.000004] PKRU: 55555554 > [ +0.000004] Call Trace: > [ +0.000004] <TASK> > [ +0.000005] ? show_regs+0x6c/0x80 > [ +0.000007] ? __warn+0xd2/0x2d0 > [ +0.000007] ? ttm_bo_pin+0x1f6/0x270 [ttm] > [ +0.000031] ? report_bug+0x282/0x2f0 > [ +0.000012] ? handle_bug+0x6e/0xc0 > [ +0.000007] ? exc_invalid_op+0x18/0x50 > [ +0.000007] ? asm_exc_invalid_op+0x1b/0x20 > [ +0.000017] ? ttm_bo_pin+0x1f6/0x270 [ttm] > [ +0.000014] amdgpu_bo_pin+0x365/0x9d0 [amdgpu] > [ +0.000191] ? __pfx_amdgpu_bo_pin+0x10/0x10 [amdgpu] > [ +0.000185] ? drm_gem_object_lookup+0x81/0xc0 > [ +0.000008] ? kasan_save_alloc_info+0x37/0x60 > [ +0.000007] ? __kasan_kmalloc+0xc3/0xd0 > [ +0.000013] amdgpu_userqueue_get_doorbell_index+0xee/0x5f0 [amdgpu] > [ +0.000209] amdgpu_userq_ioctl+0x6b4/0xd40 [amdgpu] > [ +0.000193] ? __pfx_amdgpu_userq_ioctl+0x10/0x10 [amdgpu] > [ +0.000211] ? lock_acquire+0x7c/0xc0 > [ +0.000006] ? drm_dev_enter+0x51/0x190 > [ +0.000015] drm_ioctl_kernel+0x18b/0x330 > [ +0.000007] ? __pfx_amdgpu_userq_ioctl+0x10/0x10 [amdgpu] > [ +0.000190] ? __pfx_drm_ioctl_kernel+0x10/0x10 > [ +0.000005] ? lock_acquire+0x7c/0xc0 > [ +0.000009] ? srso_alias_return_thunk+0x5/0xfbef5 > [ +0.000005] ? __kasan_check_write+0x14/0x30 > [ +0.000005] ? srso_alias_return_thunk+0x5/0xfbef5 > [ +0.000011] drm_ioctl+0x589/0xd00 > [ +0.000005] ? srso_alias_return_thunk+0x5/0xfbef5 > [ +0.000006] ? __pfx_amdgpu_userq_ioctl+0x10/0x10 [amdgpu] > [ +0.000194] ? __pfx_drm_ioctl+0x10/0x10 > [ +0.000006] ? __pm_runtime_resume+0x80/0x110 > [ +0.000021] ? srso_alias_return_thunk+0x5/0xfbef5 > [ +0.000005] ? trace_hardirqs_on+0x53/0x60 > [ +0.000005] ? srso_alias_return_thunk+0x5/0xfbef5 > [ +0.000005] ? _raw_spin_unlock_irqrestore+0x51/0x80 > [ +0.000013] amdgpu_drm_ioctl+0xd2/0x1c0 [amdgpu] > [ +0.000185] __x64_sys_ioctl+0x13a/0x1c0 > [ +0.000010] x64_sys_call+0x11ad/0x25f0 > [ +0.000007] do_syscall_64+0x91/0x180 > [ +0.000007] ? srso_alias_return_thunk+0x5/0xfbef5 > [ +0.000005] ? irqentry_exit+0x77/0xb0 > [ +0.000005] ? srso_alias_return_thunk+0x5/0xfbef5 > [ +0.000005] ? exc_page_fault+0x93/0x150 > [ +0.000009] entry_SYSCALL_64_after_hwframe+0x76/0x7e > [ +0.000005] RIP: 0033:0x7c45ff924ded > [ +0.000005] RSP: 002b:00007ffff7167810 EFLAGS: 00000246 ORIG_RAX: > 0000000000000010 > [ +0.000008] RAX: ffffffffffffffda RBX: 00000000c0486456 RCX: > 00007c45ff924ded > [ +0.000004] RDX: 00007ffff7167870 RSI: 00000000c0486456 RDI: > 000000000000000b > [ +0.000004] RBP: 00007ffff7167860 R08: ffff800100000000 R09: > 0000000000010000 > [ +0.000005] R10: 00007ffff7167950 R11: 0000000000000246 R12: > 00005b0c2a51bc48 > [ +0.000004] R13: 000000000000000b R14: 0000000000000000 R15: > 00007ffff7167950 > [ +0.000022] </TASK> > [ +0.000004] irq event stamp: 80693 > [ +0.000004] hardirqs last enabled at (80699): [<ffffffff86a693a9>] > __up_console_sem+0x79/0xa0 > [ +0.000005] hardirqs last disabled at (80704): [<ffffffff86a6938e>] > __up_console_sem+0x5e/0xa0 > [ +0.000005] softirqs last enabled at (80390): [<ffffffff8687377e>] > __irq_exit_rcu+0x17e/0x1d0 > [ +0.000005] softirqs last disabled at (80385): [<ffffffff8687377e>] > __irq_exit_rcu+0x17e/0x1d0 > [ +0.000006] ---[ end trace 0000000000000000 ]--- > ------------------------------------------------------------------------------------------------------ > > [ +0.000006] WARNING: CPU: 10 PID: 1818 at drivers/gpu/drm/ttm/ttm_bo.c:611 > ttm_bo_unpin+0x21f/0x2c0 [ttm] > [ +0.000280] CPU: 10 UID: 1000 PID: 1818 Comm: Xwayland Tainted: G W > 6.12.0+ #15 > [ +0.000006] Tainted: [W]=WARN > [ +0.000004] Hardware name: ASUS System Product Name/TUF GAMING B650-PLUS, > BIOS 3072 12/20/2024 > [ +0.000004] RIP: 0010:ttm_bo_unpin+0x21f/0x2c0 [ttm] > [ +0.000005] RSP: 0018:ffff88846ca87888 EFLAGS: 00010246 > [ +0.000007] RAX: 0000000000000000 RBX: ffff88810b7ca848 RCX: > 0000000000000000 > [ +0.000005] RDX: 0000000000000000 RSI: 0000000000000000 RDI: > 0000000000000000 > [ +0.000004] RBP: ffff88846ca878a0 R08: 0000000000000000 R09: > 0000000000000000 > [ +0.000004] R10: 0000000000000000 R11: 0000000000000000 R12: > ffff888164e90050 > [ +0.000005] R13: ffff88846c666200 R14: 0000000000000001 R15: > ffff888168402d28 > [ +0.000004] FS: 00007c45ff436d00(0000) GS:ffff888409500000(0000) > knlGS:0000000000000000 > [ +0.000005] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 > [ +0.000004] CR2: 00007c45f7373b20 CR3: 000000012ce5a000 CR4: > 0000000000f50ef0 > [ +0.000005] PKRU: 55555554 > [ +0.000004] Call Trace: > [ +0.000004] <TASK> > [ +0.000005] ? show_regs+0x6c/0x80 > [ +0.000008] ? __warn+0xd2/0x2d0 > [ +0.000007] ? ttm_bo_unpin+0x21f/0x2c0 [ttm] > [ +0.000012] ? report_bug+0x282/0x2f0 > [ +0.000013] ? handle_bug+0x6e/0xc0 > [ +0.000006] ? exc_invalid_op+0x18/0x50 > [ +0.000008] ? asm_exc_invalid_op+0x1b/0x20 > [ +0.000017] ? ttm_bo_unpin+0x21f/0x2c0 [ttm] > [ +0.000011] ? ttm_bo_unpin+0x217/0x2c0 [ttm] > [ +0.000011] amdgpu_bo_unpin+0x45/0x250 [amdgpu] > [ +0.000216] amdgpu_userq_ioctl+0x2c3/0xd40 [amdgpu] > [ +0.000226] ? drm_dev_exit+0x2d/0x60 > [ +0.000010] ? __pfx_amdgpu_userq_ioctl+0x10/0x10 [amdgpu] > [ +0.000201] ? srso_alias_return_thunk+0x5/0xfbef5 > [ +0.000005] ? lock_acquire+0x7c/0xc0 > [ +0.000006] ? drm_dev_enter+0x51/0x190 > [ +0.000015] drm_ioctl_kernel+0x18b/0x330 > [ +0.000007] ? __pfx_amdgpu_userq_ioctl+0x10/0x10 [amdgpu] > [ +0.000188] ? __pfx_drm_ioctl_kernel+0x10/0x10 > [ +0.000006] ? lock_acquire+0x7c/0xc0 > [ +0.000008] ? srso_alias_return_thunk+0x5/0xfbef5 > [ +0.000005] ? __kasan_check_write+0x14/0x30 > [ +0.000006] ? srso_alias_return_thunk+0x5/0xfbef5 > [ +0.000010] drm_ioctl+0x589/0xd00 > [ +0.000005] ? srso_alias_return_thunk+0x5/0xfbef5 > [ +0.000006] ? __pfx_amdgpu_userq_ioctl+0x10/0x10 [amdgpu] > [ +0.000211] ? __pfx_drm_ioctl+0x10/0x10 > [ +0.000006] ? __pm_runtime_resume+0x80/0x110 > [ +0.000020] ? srso_alias_return_thunk+0x5/0xfbef5 > [ +0.000006] ? trace_hardirqs_on+0x53/0x60 > [ +0.000005] ? srso_alias_return_thunk+0x5/0xfbef5 > [ +0.000005] ? _raw_spin_unlock_irqrestore+0x51/0x80 > [ +0.000013] amdgpu_drm_ioctl+0xd2/0x1c0 [amdgpu] > [ +0.000186] __x64_sys_ioctl+0x13a/0x1c0 > [ +0.000010] x64_sys_call+0x11ad/0x25f0 > [ +0.000007] do_syscall_64+0x91/0x180 > [ +0.000007] ? srso_alias_return_thunk+0x5/0xfbef5 > [ +0.000005] ? do_syscall_64+0x9d/0x180 > [ +0.000007] ? srso_alias_return_thunk+0x5/0xfbef5 > [ +0.000010] ? __pfx___rseq_handle_notify_resume+0x10/0x10 > [ +0.000005] ? __pfx_blkcg_maybe_throttle_current+0x10/0x10 > [ +0.000013] ? srso_alias_return_thunk+0x5/0xfbef5 > [ +0.000009] ? srso_alias_return_thunk+0x5/0xfbef5 > [ +0.000008] ? srso_alias_return_thunk+0x5/0xfbef5 > [ +0.000005] ? syscall_exit_to_user_mode+0x95/0x260 > [ +0.000008] ? srso_alias_return_thunk+0x5/0xfbef5 > [ +0.000005] ? do_syscall_64+0x9d/0x180 > [ +0.000007] ? srso_alias_return_thunk+0x5/0xfbef5 > [ +0.000005] ? do_syscall_64+0x9d/0x180 > [ +0.000011] ? srso_alias_return_thunk+0x5/0xfbef5 > [ +0.000010] ? srso_alias_return_thunk+0x5/0xfbef5 > [ +0.000009] ? srso_alias_return_thunk+0x5/0xfbef5 > [ +0.000008] ? srso_alias_return_thunk+0x5/0xfbef5 > [ +0.000005] ? irqentry_exit_to_user_mode+0x8b/0x260 > [ +0.000007] ? srso_alias_return_thunk+0x5/0xfbef5 > [ +0.000006] ? irqentry_exit+0x77/0xb0 > [ +0.000004] ? srso_alias_return_thunk+0x5/0xfbef5 > [ +0.000005] ? exc_page_fault+0x93/0x150 > [ +0.000010] entry_SYSCALL_64_after_hwframe+0x76/0x7e > [ +0.000005] RIP: 0033:0x7c45ff924ded > [ +0.000005] RSP: 002b:00007ffff7168790 EFLAGS: 00000246 ORIG_RAX: > 0000000000000010 > [ +0.000008] RAX: ffffffffffffffda RBX: 00000000c0486456 RCX: > 00007c45ff924ded > [ +0.000005] RDX: 00007ffff71687f0 RSI: 00000000c0486456 RDI: > 000000000000000b > [ +0.000004] RBP: 00007ffff71687e0 R08: 00005b0c2a49b010 R09: > 0000000000000007 > [ +0.000004] R10: 00005b0c2a4d7140 R11: 0000000000000246 R12: > 000000000000000b > [ +0.000004] R13: 00007c45ff19e5cc R14: 00005b0c2a51c538 R15: > 00005b0c2a51bbd8 > [ +0.000022] </TASK> > [ +0.000005] irq event stamp: 87419 > [ +0.000004] hardirqs last enabled at (87425): [<ffffffff86a693a9>] > __up_console_sem+0x79/0xa0 > [ +0.000005] hardirqs last disabled at (87430): [<ffffffff86a6938e>] > __up_console_sem+0x5e/0xa0 > [ +0.000005] softirqs last enabled at (87058): [<ffffffff8687377e>] > __irq_exit_rcu+0x17e/0x1d0 > [ +0.000006] softirqs last disabled at (87053): [<ffffffff8687377e>] > __irq_exit_rcu+0x17e/0x1d0 > [ +0.000005] ---[ end trace 0000000000000000 ]--- > > Signed-off-by: Arunpravin Paneer Selvam <arunpravin.paneersel...@amd.com>
The patch itself is Reviewed-by: Christian König <christian.koe...@amd.com> But somebody should work on removing the pinning alltogether. We now have the eviction fence for that instead. Regards, Christian. > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 13 +++++++++---- > 1 file changed, 9 insertions(+), 4 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c > index beae931152a3..7427e080b389 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c > @@ -208,14 +208,14 @@ amdgpu_userqueue_get_doorbell_index(struct > amdgpu_userq_mgr *uq_mgr, > db_obj->obj = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj)); > drm_gem_object_put(gobj); > > - /* Pin the BO before generating the index, unpin in queue destroy */ > - r = amdgpu_bo_pin(db_obj->obj, AMDGPU_GEM_DOMAIN_DOORBELL); > + r = amdgpu_bo_reserve(db_obj->obj, true); > if (r) { > DRM_ERROR("[Usermode queues] Failed to pin doorbell object\n"); > goto unref_bo; > } > > - r = amdgpu_bo_reserve(db_obj->obj, true); > + /* Pin the BO before generating the index, unpin in queue destroy */ > + r = amdgpu_bo_pin(db_obj->obj, AMDGPU_GEM_DOMAIN_DOORBELL); > if (r) { > DRM_ERROR("[Usermode queues] Failed to pin doorbell object\n"); > goto unpin_bo; > @@ -264,6 +264,7 @@ amdgpu_userqueue_destroy(struct drm_file *filp, int > queue_id) > struct amdgpu_fpriv *fpriv = filp->driver_priv; > struct amdgpu_userq_mgr *uq_mgr = &fpriv->userq_mgr; > struct amdgpu_usermode_queue *queue; > + int r; > > cancel_delayed_work(&uq_mgr->resume_work); > mutex_lock(&uq_mgr->userq_mutex); > @@ -275,7 +276,11 @@ amdgpu_userqueue_destroy(struct drm_file *filp, int > queue_id) > return -EINVAL; > } > > - amdgpu_bo_unpin(queue->db_obj.obj); > + r = amdgpu_bo_reserve(queue->db_obj.obj, true); > + if (!r) { > + amdgpu_bo_unpin(queue->db_obj.obj); > + amdgpu_bo_unreserve(queue->db_obj.obj); > + } > amdgpu_bo_unref(&queue->db_obj.obj); > amdgpu_userqueue_cleanup(uq_mgr, queue, queue_id); > mutex_unlock(&uq_mgr->userq_mutex);