On 4/25/25 09:07, Prike Liang wrote: > Before the user queue BOs resume workqueue is scheduled; > there's no valid eviction fence to attach the gem obj. > For this case, it doesn't need to attach/detach the eviction > fence. Also, it needs to unlock the bo first before returning > from the eviction fence attached error. > > Signed-off-by: Prike Liang <prike.li...@amd.com> > --- > .../gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c | 3 +++ > drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 16 ++++++++++------ > 2 files changed, 13 insertions(+), 6 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c > index d2271c10498d..375f15b6fd58 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c > @@ -216,6 +216,9 @@ void amdgpu_eviction_fence_detach(struct > amdgpu_eviction_fence_mgr *evf_mgr, > { > struct dma_fence *stub = dma_fence_get_stub(); > > + if (dma_fence_is_signaled(&evf_mgr->ev_fence->base)) > + return; > +
Clear NAK, that is racy. You can only access evf_mgr->ev_fence while holding the spinlock to make sure that it isn't replaced. > dma_resv_replace_fences(bo->tbo.base.resv, evf_mgr->ev_fence_ctx, > stub, DMA_RESV_USAGE_BOOKKEEP); > dma_fence_put(stub); > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c > index c1d8cee7894b..04256de4bee9 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c > @@ -292,11 +292,14 @@ static int amdgpu_gem_object_open(struct drm_gem_object > *obj, > else > ++bo_va->ref_count; > > - /* attach gfx eviction fence */ > - r = amdgpu_eviction_fence_attach(&fpriv->evf_mgr, abo); That here is buggy, fpriv->evf_mgr can only be accessed while holding the spinlock. > - if (r) { > - DRM_DEBUG_DRIVER("Failed to attach eviction fence to BO\n"); > - return r; > + /* attach gfx the validated eviction fence */ > + if (!IS_ERR_OR_NULL(fpriv->evf_mgr.ev_fence)) { Please don't use ERR_PTR functions on members. > + r = amdgpu_eviction_fence_attach(&fpriv->evf_mgr, abo); > + if (r) { > + DRM_DEBUG_DRIVER("Failed to attach eviction fence to > BO\n"); > + amdgpu_bo_unreserve(abo); > + return r; > + } We should always have a stub fence in fpriv->evf_mgr.ev_fence, so those checks are unnecessary. Regards, Christian. > } > > amdgpu_bo_unreserve(abo); > @@ -362,7 +365,8 @@ static void amdgpu_gem_object_close(struct drm_gem_object > *obj, > goto out_unlock; > } > > - if (!amdgpu_vm_is_bo_always_valid(vm, bo)) > + if (!amdgpu_vm_is_bo_always_valid(vm, bo) && > + !IS_ERR_OR_NULL(fpriv->evf_mgr.ev_fence)) > amdgpu_eviction_fence_detach(&fpriv->evf_mgr, bo); > > bo_va = amdgpu_vm_bo_find(vm, bo);