On Wed, Apr 30, 2025 at 09:28:59AM +0000, Sharma, Shashank wrote: > [AMD Official Use Only - AMD Internal Distribution Only] > > Hello Dan, > > ________________________________ > From: Dan Carpenter > Sent: Wednesday, April 30, 2025 10:05 AM > To: Deucher, Alexander > Cc: Koenig, Christian; David Airlie; Simona Vetter; Sharma, Shashank; Khatri, > Sunil; Yadav, Arvind; Paneer Selvam, Arunpravin; > amd-gfx@lists.freedesktop.org; dri-de...@lists.freedesktop.org; > linux-ker...@vger.kernel.org; kernel-janit...@vger.kernel.org > Subject: [PATCH] drm/amdgpu/userq: remove unnecessary NULL check > > The "ticket" pointer points to in the middle of the &exec struct so it > can't be NULL. Remove the check. > > Signed-off-by: Dan Carpenter <dan.carpen...@linaro.org> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c > index b0e8098a3988..7505d920fb3d 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c > @@ -631,7 +631,7 @@ amdgpu_userq_validate_bos(struct amdgpu_userq_mgr *uq_mgr) > clear = false; > unlock = true; > /* The caller is already holding the reservation lock */ > - } else if (ticket && dma_resv_locking_ctx(resv) == ticket) { > + } else if (dma_resv_locking_ctx(resv) == ticket) { > > Its a Nack for me, There are a few situations (particularly during the > first launch of the desktop, and also when eviction fence and new queue > creation are working in parallel) where this ticket can be NULL, we > observed it during the stress validation and hence added this check, >
It shouldn't be NULL. It sounds like you are experiencing stack corruption and this is just a bandaid. drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c 566 static int 567 amdgpu_userq_validate_bos(struct amdgpu_userq_mgr *uq_mgr) 568 { 569 struct amdgpu_fpriv *fpriv = uq_mgr_to_fpriv(uq_mgr); 570 struct amdgpu_vm *vm = &fpriv->vm; 571 struct amdgpu_device *adev = uq_mgr->adev; 572 struct amdgpu_bo_va *bo_va; 573 struct ww_acquire_ctx *ticket; 574 struct drm_exec exec; ^^^^^^^^^^^^^^^^^^^^^ The "exec" struct is declared on the stack. 575 struct amdgpu_bo *bo; 576 struct dma_resv *resv; 577 bool clear, unlock; 578 int ret = 0; 579 580 drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES, 0); 581 drm_exec_until_all_locked(&exec) { 582 ret = amdgpu_vm_lock_pd(vm, &exec, 2); 583 drm_exec_retry_on_contention(&exec); 584 if (unlikely(ret)) { 585 DRM_ERROR("Failed to lock PD\n"); 586 goto unlock_all; 587 } 588 589 /* Lock the done list */ 590 list_for_each_entry(bo_va, &vm->done, base.vm_status) { 591 bo = bo_va->base.bo; 592 if (!bo) 593 continue; 594 595 ret = drm_exec_lock_obj(&exec, &bo->tbo.base); 596 drm_exec_retry_on_contention(&exec); 597 if (unlikely(ret)) 598 goto unlock_all; 599 } 600 } 601 602 spin_lock(&vm->status_lock); 603 while (!list_empty(&vm->moved)) { 604 bo_va = list_first_entry(&vm->moved, struct amdgpu_bo_va, 605 base.vm_status); 606 spin_unlock(&vm->status_lock); 607 608 /* Per VM BOs never need to bo cleared in the page tables */ 609 ret = amdgpu_vm_bo_update(adev, bo_va, false); 610 if (ret) 611 goto unlock_all; 612 spin_lock(&vm->status_lock); 613 } 614 615 ticket = &exec.ticket; ^^^^^^^^^^^^^^^^^^^^^ ticket is only set here. We know that &exec is non-NULL because it's declared on the stack. ticket is 4 bytes into the middle of a non-NULL struct. It is impossible for ticket to be NULL here. 616 while (!list_empty(&vm->invalidated)) { 617 bo_va = list_first_entry(&vm->invalidated, struct amdgpu_bo_va, 618 base.vm_status); 619 resv = bo_va->base.bo->tbo.base.resv; 620 spin_unlock(&vm->status_lock); 621 622 bo = bo_va->base.bo; 623 ret = amdgpu_userq_validate_vm_bo(NULL, bo); 624 if (ret) { 625 DRM_ERROR("Failed to validate BO\n"); 626 goto unlock_all; 627 } 628 629 /* Try to reserve the BO to avoid clearing its ptes */ 630 if (!adev->debug_vm && dma_resv_trylock(resv)) { 631 clear = false; 632 unlock = true; 633 /* The caller is already holding the reservation lock */ 634 } else if (ticket && dma_resv_locking_ctx(resv) == ticket) { I've included the whole rest of the function so that we can see it is not set a second time. regards, dan carpenter 635 clear = false; 636 unlock = false; 637 /* Somebody else is using the BO right now */ 638 } else { 639 clear = true; 640 unlock = false; 641 } 642 643 ret = amdgpu_vm_bo_update(adev, bo_va, clear); 644 645 if (unlock) 646 dma_resv_unlock(resv); 647 if (ret) 648 goto unlock_all; 649 650 spin_lock(&vm->status_lock); 651 } 652 spin_unlock(&vm->status_lock); 653 654 ret = amdgpu_eviction_fence_replace_fence(&fpriv->evf_mgr, &exec); 655 if (ret) 656 DRM_ERROR("Failed to replace eviction fence\n"); 657 658 unlock_all: 659 drm_exec_fini(&exec); 660 return ret; 661 }