On 3/11/26 13:27, Khatri, Sunil wrote:
>> static bool amdgpu_eviction_fence_enable_signaling(struct dma_fence *f)
>> {
>> struct amdgpu_eviction_fence_mgr *evf_mgr;
>> struct amdgpu_eviction_fence *ev_fence;
>> - if (!f)
>> - return true;
> Isn't there a possibility of the fence being signaled or f to be NULL?
No, I have no idea why everybody things adding such checks is a good idea.
The function is called through the function table of f, so a NULL pointer would
have crashed long before calling that here.
Additional to that such NULL checks should only be used in combination with a
WARN_ON() since that is clearly incorrect use of the function.
Regards,
Christian.
>> -
>> ev_fence = to_ev_fence(f);
>> evf_mgr = ev_fence->evf_mgr;
>> -
>> - schedule_delayed_work(&evf_mgr->suspend_work, 0);
>> + schedule_work(&evf_mgr->suspend_work);
>
> We can avoid to use evf_mgr instead directly use ev_fence->evf_mgr as it is
> only one time usage.
>
> Regards
> Sunil Khatri
>
>> return true;
>> }
>> @@ -148,22 +57,52 @@ static const struct dma_fence_ops
>> amdgpu_eviction_fence_ops = {
>> .enable_signaling = amdgpu_eviction_fence_enable_signaling,
>> };
>> -void amdgpu_eviction_fence_signal(struct amdgpu_eviction_fence_mgr
>> *evf_mgr,
>> - struct amdgpu_eviction_fence *ev_fence)
>> +static void
>> +amdgpu_eviction_fence_suspend_worker(struct work_struct *work)
>> {
>> - spin_lock(&evf_mgr->ev_fence_lock);
>> - dma_fence_signal(&ev_fence->base);
>> - spin_unlock(&evf_mgr->ev_fence_lock);
>> + struct amdgpu_eviction_fence_mgr *evf_mgr =
>> + container_of(work, struct amdgpu_eviction_fence_mgr,
>> + suspend_work);
>> + struct amdgpu_fpriv *fpriv =
>> + container_of(evf_mgr, struct amdgpu_fpriv, evf_mgr);
>> + struct amdgpu_userq_mgr *uq_mgr = &fpriv->userq_mgr;
>> + struct dma_fence *ev_fence;
>> +
>> + mutex_lock(&uq_mgr->userq_mutex);
>> + ev_fence = amdgpu_evf_mgr_get_fence(evf_mgr);
>> + amdgpu_userq_evict(uq_mgr, !evf_mgr->shutdown);
>> +
>> + /*
>> + * Signaling the eviction fence must be done while holding the
>> + * userq_mutex. Otherwise we won't resume the queues before issuing the
>> + * next fence.
>> + */
>> + dma_fence_signal(ev_fence);
>> + dma_fence_put(ev_fence);
>> + mutex_unlock(&uq_mgr->userq_mutex);
>> +}
>> +
>> +void amdgpu_evf_mgr_attach_fence(struct amdgpu_eviction_fence_mgr *evf_mgr,
>> + struct amdgpu_bo *bo)
>> +{
>> + struct dma_fence *ev_fence = amdgpu_evf_mgr_get_fence(evf_mgr);
>> + struct dma_resv *resv = bo->tbo.base.resv;
>> +
>> + dma_resv_add_fence(resv, ev_fence, DMA_RESV_USAGE_BOOKKEEP);
>> + dma_fence_put(ev_fence);
>> }
>> -struct amdgpu_eviction_fence *
>> -amdgpu_eviction_fence_create(struct amdgpu_eviction_fence_mgr *evf_mgr)
>> +int amdgpu_evf_mgr_rearm(struct amdgpu_eviction_fence_mgr *evf_mgr,
>> + struct drm_exec *exec)
>> {
>> struct amdgpu_eviction_fence *ev_fence;
>> + struct drm_gem_object *obj;
>> + unsigned long index;
>> + /* Create and initialize a new eviction fence */
>> ev_fence = kzalloc(sizeof(*ev_fence), GFP_KERNEL);
>> if (!ev_fence)
>> - return NULL;
>> + return -ENOMEM;
>> ev_fence->evf_mgr = evf_mgr;
>> get_task_comm(ev_fence->timeline_name, current);
>> @@ -171,56 +110,22 @@ amdgpu_eviction_fence_create(struct
>> amdgpu_eviction_fence_mgr *evf_mgr)
>> dma_fence_init64(&ev_fence->base, &amdgpu_eviction_fence_ops,
>> &ev_fence->lock, evf_mgr->ev_fence_ctx,
>> atomic_inc_return(&evf_mgr->ev_fence_seq));
>> - return ev_fence;
>> -}
>> -
>> -void amdgpu_eviction_fence_destroy(struct amdgpu_eviction_fence_mgr
>> *evf_mgr)
>> -{
>> - struct amdgpu_eviction_fence *ev_fence;
>> -
>> - /* Wait for any pending work to execute */
>> - flush_delayed_work(&evf_mgr->suspend_work);
>> -
>> - spin_lock(&evf_mgr->ev_fence_lock);
>> - ev_fence = evf_mgr->ev_fence;
>> - spin_unlock(&evf_mgr->ev_fence_lock);
>> -
>> - if (!ev_fence)
>> - return;
>> -
>> - dma_fence_wait(&ev_fence->base, false);
>> - /* Last unref of ev_fence */
>> - dma_fence_put(&ev_fence->base);
>> -}
>> -
>> -int amdgpu_eviction_fence_attach(struct amdgpu_eviction_fence_mgr *evf_mgr,
>> - struct amdgpu_bo *bo)
>> -{
>> - struct amdgpu_eviction_fence *ev_fence;
>> - struct dma_resv *resv = bo->tbo.base.resv;
>> - int ret;
>> + /* Remember it for newly added BOs */
>> + dma_fence_put(evf_mgr->ev_fence);
>> + evf_mgr->ev_fence = &ev_fence->base;
>> - if (!resv)
>> - return 0;
>> + /* And add it to all existing BOs */
>> + drm_exec_for_each_locked_object(exec, index, obj) {
>> + struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
>> - ret = dma_resv_reserve_fences(resv, 1);
>> - if (ret) {
>> - DRM_DEBUG_DRIVER("Failed to resv fence space\n");
>> - return ret;
>> + amdgpu_evf_mgr_attach_fence(evf_mgr, bo);
>> }
>> -
>> - spin_lock(&evf_mgr->ev_fence_lock);
>> - ev_fence = evf_mgr->ev_fence;
>> - if (ev_fence)
>> - dma_resv_add_fence(resv, &ev_fence->base, DMA_RESV_USAGE_BOOKKEEP);
>> - spin_unlock(&evf_mgr->ev_fence_lock);
>> -
>> return 0;
>> }
>> -void amdgpu_eviction_fence_detach(struct amdgpu_eviction_fence_mgr
>> *evf_mgr,
>> - struct amdgpu_bo *bo)
>> +void amdgpu_evf_mgr_detach_fence(struct amdgpu_eviction_fence_mgr *evf_mgr,
>> + struct amdgpu_bo *bo)
>> {
>> struct dma_fence *stub = dma_fence_get_stub();
>> @@ -229,13 +134,25 @@ void amdgpu_eviction_fence_detach(struct
>> amdgpu_eviction_fence_mgr *evf_mgr,
>> dma_fence_put(stub);
>> }
>> -int amdgpu_eviction_fence_init(struct amdgpu_eviction_fence_mgr *evf_mgr)
>> +void amdgpu_evf_mgr_init(struct amdgpu_eviction_fence_mgr *evf_mgr)
>> {
>> - /* This needs to be done one time per open */
>> atomic_set(&evf_mgr->ev_fence_seq, 0);
>> evf_mgr->ev_fence_ctx = dma_fence_context_alloc(1);
>> - spin_lock_init(&evf_mgr->ev_fence_lock);
>> + evf_mgr->ev_fence = dma_fence_get_stub();
>> - INIT_DELAYED_WORK(&evf_mgr->suspend_work,
>> amdgpu_eviction_fence_suspend_worker);
>> - return 0;
>> + INIT_WORK(&evf_mgr->suspend_work, amdgpu_eviction_fence_suspend_worker);
>> +}
>> +
>> +void amdgpu_evf_mgr_shutdown(struct amdgpu_eviction_fence_mgr *evf_mgr)
>> +{
>> + evf_mgr->shutdown = true;
>> + flush_work(&evf_mgr->suspend_work);
>> +}
>> +
>> +void amdgpu_evf_mgr_fini(struct amdgpu_eviction_fence_mgr *evf_mgr)
>> +{
>> + dma_fence_wait(rcu_dereference_protected(evf_mgr->ev_fence, true),
>> + false);
>> + flush_work(&evf_mgr->suspend_work);
>> + dma_fence_put(evf_mgr->ev_fence);
>> }
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.h
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.h
>> index fcd867b7147d..527de3a23583 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.h
>> @@ -25,6 +25,8 @@
>> #ifndef AMDGPU_EV_FENCE_H_
>> #define AMDGPU_EV_FENCE_H_
>> +#include <linux/dma-fence.h>
>> +
>> struct amdgpu_eviction_fence {
>> struct dma_fence base;
>> spinlock_t lock;
>> @@ -35,35 +37,35 @@ struct amdgpu_eviction_fence {
>> struct amdgpu_eviction_fence_mgr {
>> u64 ev_fence_ctx;
>> atomic_t ev_fence_seq;
>> - spinlock_t ev_fence_lock;
>> - struct amdgpu_eviction_fence *ev_fence;
>> - struct delayed_work suspend_work;
>> - uint8_t fd_closing;
>> -};
>> -
>> -/* Eviction fence helper functions */
>> -struct amdgpu_eviction_fence *
>> -amdgpu_eviction_fence_create(struct amdgpu_eviction_fence_mgr *evf_mgr);
>> -void
>> -amdgpu_eviction_fence_destroy(struct amdgpu_eviction_fence_mgr *evf_mgr);
>> -
>> -int
>> -amdgpu_eviction_fence_attach(struct amdgpu_eviction_fence_mgr *evf_mgr,
>> - struct amdgpu_bo *bo);
>> + /*
>> + * Only updated while holding the VM resv lock.
>> + * Only signaled while holding the userq mutex.
>> + */
>> + struct dma_fence __rcu *ev_fence;
>> + struct work_struct suspend_work;
>> + bool shutdown;
>> +};
>> -void
>> -amdgpu_eviction_fence_detach(struct amdgpu_eviction_fence_mgr *evf_mgr,
>> - struct amdgpu_bo *bo);
>> +static inline struct dma_fence *
>> +amdgpu_evf_mgr_get_fence(struct amdgpu_eviction_fence_mgr *evf_mgr)
>> +{
>> + struct dma_fence *ev_fence;
>> -int
>> -amdgpu_eviction_fence_init(struct amdgpu_eviction_fence_mgr *evf_mgr);
>> + rcu_read_lock();
>> + ev_fence = dma_fence_get_rcu_safe(&evf_mgr->ev_fence);
>> + rcu_read_unlock();
>> + return ev_fence;
>> +}
>> -void
>> -amdgpu_eviction_fence_signal(struct amdgpu_eviction_fence_mgr *evf_mgr,
>> - struct amdgpu_eviction_fence *ev_fence);
>> +void amdgpu_evf_mgr_attach_fence(struct amdgpu_eviction_fence_mgr *evf_mgr,
>> + struct amdgpu_bo *bo);
>> +int amdgpu_evf_mgr_rearm(struct amdgpu_eviction_fence_mgr *evf_mgr,
>> + struct drm_exec *exec);
>> +void amdgpu_evf_mgr_detach_fence(struct amdgpu_eviction_fence_mgr *evf_mgr,
>> + struct amdgpu_bo *bo);
>> +void amdgpu_evf_mgr_init(struct amdgpu_eviction_fence_mgr *evf_mgr);
>> +void amdgpu_evf_mgr_shutdown(struct amdgpu_eviction_fence_mgr *evf_mgr);
>> +void amdgpu_evf_mgr_fini(struct amdgpu_eviction_fence_mgr *evf_mgr);
>> -int
>> -amdgpu_eviction_fence_replace_fence(struct amdgpu_eviction_fence_mgr
>> *evf_mgr,
>> - struct drm_exec *exec);
>> #endif
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>> index 5c90de58cc28..e28abfd04867 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>> @@ -263,13 +263,7 @@ static int amdgpu_gem_object_open(struct drm_gem_object
>> *obj,
>> else
>> ++bo_va->ref_count;
>> - /* attach gfx eviction fence */
>> - r = amdgpu_eviction_fence_attach(&fpriv->evf_mgr, abo);
>> - if (r) {
>> - DRM_DEBUG_DRIVER("Failed to attach eviction fence to BO\n");
>> - amdgpu_bo_unreserve(abo);
>> - return r;
>> - }
>> + amdgpu_evf_mgr_attach_fence(&fpriv->evf_mgr, abo);
>> drm_exec_fini(&exec);
>> /* Validate and add eviction fence to DMABuf imports with dynamic
>> @@ -337,7 +331,7 @@ static void amdgpu_gem_object_close(struct
>> drm_gem_object *obj,
>> }
>> if (!amdgpu_vm_is_bo_always_valid(vm, bo))
>> - amdgpu_eviction_fence_detach(&fpriv->evf_mgr, bo);
>> + amdgpu_evf_mgr_detach_fence(&fpriv->evf_mgr, bo);
>> bo_va = amdgpu_vm_bo_find(vm, bo);
>> if (!bo_va || --bo_va->ref_count)
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>> index f69332eed051..f512b6ec6c53 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>> @@ -1522,10 +1522,7 @@ int amdgpu_driver_open_kms(struct drm_device *dev,
>> struct drm_file *file_priv)
>> "Failed to init usermode queue manager (%d), use legacy
>> workload submission only\n",
>> r);
>> - r = amdgpu_eviction_fence_init(&fpriv->evf_mgr);
>> - if (r)
>> - goto error_vm;
>> -
>> + amdgpu_evf_mgr_init(&fpriv->evf_mgr);
>> amdgpu_ctx_mgr_init(&fpriv->ctx_mgr, adev);
>> file_priv->driver_priv = fpriv;
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
>> index 85adc53eb523..67ba46851c2b 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
>> @@ -472,17 +472,16 @@ void
>> amdgpu_userq_ensure_ev_fence(struct amdgpu_userq_mgr *uq_mgr,
>> struct amdgpu_eviction_fence_mgr *evf_mgr)
>> {
>> - struct amdgpu_eviction_fence *ev_fence;
>> + struct dma_fence *ev_fence;
>> retry:
>> /* Flush any pending resume work to create ev_fence */
>> flush_delayed_work(&uq_mgr->resume_work);
>> mutex_lock(&uq_mgr->userq_mutex);
>> - spin_lock(&evf_mgr->ev_fence_lock);
>> - ev_fence = evf_mgr->ev_fence;
>> - spin_unlock(&evf_mgr->ev_fence_lock);
>> - if (!ev_fence || dma_fence_is_signaled(&ev_fence->base)) {
>> + ev_fence = amdgpu_evf_mgr_get_fence(evf_mgr);
>> + if (dma_fence_is_signaled(ev_fence)) {
>> + dma_fence_put(ev_fence);
>> mutex_unlock(&uq_mgr->userq_mutex);
>> /*
>> * Looks like there was no pending resume work,
>> @@ -491,6 +490,7 @@ amdgpu_userq_ensure_ev_fence(struct amdgpu_userq_mgr
>> *uq_mgr,
>> schedule_delayed_work(&uq_mgr->resume_work, 0);
>> goto retry;
>> }
>> + dma_fence_put(ev_fence);
>> }
>> int amdgpu_userq_create_object(struct amdgpu_userq_mgr *uq_mgr,
>> @@ -1196,7 +1196,7 @@ amdgpu_userq_vm_validate(struct amdgpu_userq_mgr
>> *uq_mgr)
>> dma_fence_wait(bo_va->last_pt_update, false);
>> dma_fence_wait(vm->last_update, false);
>> - ret = amdgpu_eviction_fence_replace_fence(&fpriv->evf_mgr, &exec);
>> + ret = amdgpu_evf_mgr_rearm(&fpriv->evf_mgr, &exec);
>> if (ret)
>> drm_file_err(uq_mgr->file, "Failed to replace eviction fence\n");
>> @@ -1216,11 +1216,13 @@ static void amdgpu_userq_restore_worker(struct
>> work_struct *work)
>> {
>> struct amdgpu_userq_mgr *uq_mgr = work_to_uq_mgr(work,
>> resume_work.work);
>> struct amdgpu_fpriv *fpriv = uq_mgr_to_fpriv(uq_mgr);
>> + struct dma_fence *ev_fence;
>> int ret;
>> - flush_delayed_work(&fpriv->evf_mgr.suspend_work);
>> -
>> mutex_lock(&uq_mgr->userq_mutex);
>> + ev_fence = amdgpu_evf_mgr_get_fence(&fpriv->evf_mgr);
>> + if (!dma_fence_is_signaled(ev_fence))
>> + goto unlock;
>> ret = amdgpu_userq_vm_validate(uq_mgr);
>> if (ret) {
>> @@ -1236,6 +1238,7 @@ static void amdgpu_userq_restore_worker(struct
>> work_struct *work)
>> unlock:
>> mutex_unlock(&uq_mgr->userq_mutex);
>> + dma_fence_put(ev_fence);
>> }
>> static int
>> @@ -1311,11 +1314,8 @@ amdgpu_userq_wait_for_signal(struct amdgpu_userq_mgr
>> *uq_mgr)
>> }
>> void
>> -amdgpu_userq_evict(struct amdgpu_userq_mgr *uq_mgr,
>> - struct amdgpu_eviction_fence *ev_fence)
>> +amdgpu_userq_evict(struct amdgpu_userq_mgr *uq_mgr, bool schedule_resume)
>> {
>> - struct amdgpu_fpriv *fpriv = uq_mgr_to_fpriv(uq_mgr);
>> - struct amdgpu_eviction_fence_mgr *evf_mgr = &fpriv->evf_mgr;
>> struct amdgpu_device *adev = uq_mgr->adev;
>> int ret;
>> @@ -1328,10 +1328,7 @@ amdgpu_userq_evict(struct amdgpu_userq_mgr *uq_mgr,
>> if (ret)
>> dev_err(adev->dev, "Failed to evict userqueue\n");
>> - /* Signal current eviction fence */
>> - amdgpu_eviction_fence_signal(evf_mgr, ev_fence);
>> -
>> - if (!evf_mgr->fd_closing)
>> + if (schedule_resume)
>> schedule_delayed_work(&uq_mgr->resume_work, 0);
>> }
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
>> index 54e1997b3cc0..82306d489064 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
>> @@ -133,7 +133,7 @@ void amdgpu_userq_destroy_object(struct amdgpu_userq_mgr
>> *uq_mgr,
>> struct amdgpu_userq_obj *userq_obj);
>> void amdgpu_userq_evict(struct amdgpu_userq_mgr *uq_mgr,
>> - struct amdgpu_eviction_fence *ev_fence);
>> + bool schedule_resume);
>> void amdgpu_userq_ensure_ev_fence(struct amdgpu_userq_mgr *userq_mgr,
>> struct amdgpu_eviction_fence_mgr *evf_mgr);