On 3/11/26 13:27, Khatri, Sunil wrote:
>>   static bool amdgpu_eviction_fence_enable_signaling(struct dma_fence *f)
>>   {
>>       struct amdgpu_eviction_fence_mgr *evf_mgr;
>>       struct amdgpu_eviction_fence *ev_fence;
>>   -    if (!f)
>> -        return true;
> Isn't there a possibility of the fence being signaled or f to be NULL?

No, I have no idea why everybody things adding such checks is a good idea.

The function is called through the function table of f, so a NULL pointer would 
have crashed long before calling that here.

Additional to that such NULL checks should only be used in combination with a 
WARN_ON() since that is clearly incorrect use of the function.

Regards,
Christian.

>> -
>>       ev_fence = to_ev_fence(f);
>>       evf_mgr = ev_fence->evf_mgr;
>> -
>> -    schedule_delayed_work(&evf_mgr->suspend_work, 0);
>> +    schedule_work(&evf_mgr->suspend_work);
> 
> We can avoid to use evf_mgr instead directly use ev_fence->evf_mgr as it is 
> only one time usage.
> 
> Regards
> Sunil Khatri
> 
>>       return true;
>>   }
>>   @@ -148,22 +57,52 @@ static const struct dma_fence_ops 
>> amdgpu_eviction_fence_ops = {
>>       .enable_signaling = amdgpu_eviction_fence_enable_signaling,
>>   };
>>   -void amdgpu_eviction_fence_signal(struct amdgpu_eviction_fence_mgr 
>> *evf_mgr,
>> -                  struct amdgpu_eviction_fence *ev_fence)
>> +static void
>> +amdgpu_eviction_fence_suspend_worker(struct work_struct *work)
>>   {
>> -    spin_lock(&evf_mgr->ev_fence_lock);
>> -    dma_fence_signal(&ev_fence->base);
>> -    spin_unlock(&evf_mgr->ev_fence_lock);
>> +    struct amdgpu_eviction_fence_mgr *evf_mgr =
>> +        container_of(work, struct amdgpu_eviction_fence_mgr,
>> +                 suspend_work);
>> +    struct amdgpu_fpriv *fpriv =
>> +        container_of(evf_mgr, struct amdgpu_fpriv, evf_mgr);
>> +    struct amdgpu_userq_mgr *uq_mgr = &fpriv->userq_mgr;
>> +    struct dma_fence *ev_fence;
>> +
>> +    mutex_lock(&uq_mgr->userq_mutex);
>> +    ev_fence = amdgpu_evf_mgr_get_fence(evf_mgr);
>> +    amdgpu_userq_evict(uq_mgr, !evf_mgr->shutdown);
>> +
>> +    /*
>> +     * Signaling the eviction fence must be done while holding the
>> +     * userq_mutex. Otherwise we won't resume the queues before issuing the
>> +     * next fence.
>> +     */
>> +    dma_fence_signal(ev_fence);
>> +    dma_fence_put(ev_fence);
>> +    mutex_unlock(&uq_mgr->userq_mutex);
>> +}
>> +
>> +void amdgpu_evf_mgr_attach_fence(struct amdgpu_eviction_fence_mgr *evf_mgr,
>> +                 struct amdgpu_bo *bo)
>> +{
>> +    struct dma_fence *ev_fence = amdgpu_evf_mgr_get_fence(evf_mgr);
>> +    struct dma_resv *resv = bo->tbo.base.resv;
>> +
>> +    dma_resv_add_fence(resv, ev_fence, DMA_RESV_USAGE_BOOKKEEP);
>> +    dma_fence_put(ev_fence);
>>   }
>>   -struct amdgpu_eviction_fence *
>> -amdgpu_eviction_fence_create(struct amdgpu_eviction_fence_mgr *evf_mgr)
>> +int amdgpu_evf_mgr_rearm(struct amdgpu_eviction_fence_mgr *evf_mgr,
>> +             struct drm_exec *exec)
>>   {
>>       struct amdgpu_eviction_fence *ev_fence;
>> +    struct drm_gem_object *obj;
>> +    unsigned long index;
>>   +    /* Create and initialize a new eviction fence */
>>       ev_fence = kzalloc(sizeof(*ev_fence), GFP_KERNEL);
>>       if (!ev_fence)
>> -        return NULL;
>> +        return -ENOMEM;
>>         ev_fence->evf_mgr = evf_mgr;
>>       get_task_comm(ev_fence->timeline_name, current);
>> @@ -171,56 +110,22 @@ amdgpu_eviction_fence_create(struct 
>> amdgpu_eviction_fence_mgr *evf_mgr)
>>       dma_fence_init64(&ev_fence->base, &amdgpu_eviction_fence_ops,
>>                &ev_fence->lock, evf_mgr->ev_fence_ctx,
>>                atomic_inc_return(&evf_mgr->ev_fence_seq));
>> -    return ev_fence;
>> -}
>> -
>> -void amdgpu_eviction_fence_destroy(struct amdgpu_eviction_fence_mgr 
>> *evf_mgr)
>> -{
>> -    struct amdgpu_eviction_fence *ev_fence;
>> -
>> -    /* Wait for any pending work to execute */
>> -    flush_delayed_work(&evf_mgr->suspend_work);
>> -
>> -    spin_lock(&evf_mgr->ev_fence_lock);
>> -    ev_fence = evf_mgr->ev_fence;
>> -    spin_unlock(&evf_mgr->ev_fence_lock);
>> -
>> -    if (!ev_fence)
>> -        return;
>> -
>> -    dma_fence_wait(&ev_fence->base, false);
>>   -    /* Last unref of ev_fence */
>> -    dma_fence_put(&ev_fence->base);
>> -}
>> -
>> -int amdgpu_eviction_fence_attach(struct amdgpu_eviction_fence_mgr *evf_mgr,
>> -                 struct amdgpu_bo *bo)
>> -{
>> -    struct amdgpu_eviction_fence *ev_fence;
>> -    struct dma_resv *resv = bo->tbo.base.resv;
>> -    int ret;
>> +    /* Remember it for newly added BOs */
>> +    dma_fence_put(evf_mgr->ev_fence);
>> +    evf_mgr->ev_fence = &ev_fence->base;
>>   -    if (!resv)
>> -        return 0;
>> +    /* And add it to all existing BOs */
>> +    drm_exec_for_each_locked_object(exec, index, obj) {
>> +        struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
>>   -    ret = dma_resv_reserve_fences(resv, 1);
>> -    if (ret) {
>> -        DRM_DEBUG_DRIVER("Failed to resv fence space\n");
>> -        return ret;
>> +        amdgpu_evf_mgr_attach_fence(evf_mgr, bo);
>>       }
>> -
>> -    spin_lock(&evf_mgr->ev_fence_lock);
>> -    ev_fence = evf_mgr->ev_fence;
>> -    if (ev_fence)
>> -        dma_resv_add_fence(resv, &ev_fence->base, DMA_RESV_USAGE_BOOKKEEP);
>> -    spin_unlock(&evf_mgr->ev_fence_lock);
>> -
>>       return 0;
>>   }
>>   -void amdgpu_eviction_fence_detach(struct amdgpu_eviction_fence_mgr 
>> *evf_mgr,
>> -                  struct amdgpu_bo *bo)
>> +void amdgpu_evf_mgr_detach_fence(struct amdgpu_eviction_fence_mgr *evf_mgr,
>> +                 struct amdgpu_bo *bo)
>>   {
>>       struct dma_fence *stub = dma_fence_get_stub();
>>   @@ -229,13 +134,25 @@ void amdgpu_eviction_fence_detach(struct 
>> amdgpu_eviction_fence_mgr *evf_mgr,
>>       dma_fence_put(stub);
>>   }
>>   -int amdgpu_eviction_fence_init(struct amdgpu_eviction_fence_mgr *evf_mgr)
>> +void amdgpu_evf_mgr_init(struct amdgpu_eviction_fence_mgr *evf_mgr)
>>   {
>> -    /* This needs to be done one time per open */
>>       atomic_set(&evf_mgr->ev_fence_seq, 0);
>>       evf_mgr->ev_fence_ctx = dma_fence_context_alloc(1);
>> -    spin_lock_init(&evf_mgr->ev_fence_lock);
>> +    evf_mgr->ev_fence = dma_fence_get_stub();
>>   -    INIT_DELAYED_WORK(&evf_mgr->suspend_work, 
>> amdgpu_eviction_fence_suspend_worker);
>> -    return 0;
>> +    INIT_WORK(&evf_mgr->suspend_work, amdgpu_eviction_fence_suspend_worker);
>> +}
>> +
>> +void amdgpu_evf_mgr_shutdown(struct amdgpu_eviction_fence_mgr *evf_mgr)
>> +{
>> +    evf_mgr->shutdown = true;
>> +    flush_work(&evf_mgr->suspend_work);
>> +}
>> +
>> +void amdgpu_evf_mgr_fini(struct amdgpu_eviction_fence_mgr *evf_mgr)
>> +{
>> +    dma_fence_wait(rcu_dereference_protected(evf_mgr->ev_fence, true),
>> +               false);
>> +    flush_work(&evf_mgr->suspend_work);
>> +    dma_fence_put(evf_mgr->ev_fence);
>>   }
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.h 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.h
>> index fcd867b7147d..527de3a23583 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.h
>> @@ -25,6 +25,8 @@
>>   #ifndef AMDGPU_EV_FENCE_H_
>>   #define AMDGPU_EV_FENCE_H_
>>   +#include <linux/dma-fence.h>
>> +
>>   struct amdgpu_eviction_fence {
>>       struct dma_fence base;
>>       spinlock_t     lock;
>> @@ -35,35 +37,35 @@ struct amdgpu_eviction_fence {
>>   struct amdgpu_eviction_fence_mgr {
>>       u64            ev_fence_ctx;
>>       atomic_t        ev_fence_seq;
>> -    spinlock_t        ev_fence_lock;
>> -    struct amdgpu_eviction_fence *ev_fence;
>> -    struct delayed_work    suspend_work;
>> -    uint8_t fd_closing;
>> -};
>> -
>> -/* Eviction fence helper functions */
>> -struct amdgpu_eviction_fence *
>> -amdgpu_eviction_fence_create(struct amdgpu_eviction_fence_mgr *evf_mgr);
>>   -void
>> -amdgpu_eviction_fence_destroy(struct amdgpu_eviction_fence_mgr *evf_mgr);
>> -
>> -int
>> -amdgpu_eviction_fence_attach(struct amdgpu_eviction_fence_mgr *evf_mgr,
>> -                 struct amdgpu_bo *bo);
>> +    /*
>> +     * Only updated while holding the VM resv lock.
>> +     * Only signaled while holding the userq mutex.
>> +     */
>> +    struct dma_fence __rcu    *ev_fence;
>> +    struct work_struct    suspend_work;
>> +    bool            shutdown;
>> +};
>>   -void
>> -amdgpu_eviction_fence_detach(struct amdgpu_eviction_fence_mgr *evf_mgr,
>> -                 struct amdgpu_bo *bo);
>> +static inline struct dma_fence *
>> +amdgpu_evf_mgr_get_fence(struct amdgpu_eviction_fence_mgr *evf_mgr)
>> +{
>> +    struct dma_fence *ev_fence;
>>   -int
>> -amdgpu_eviction_fence_init(struct amdgpu_eviction_fence_mgr *evf_mgr);
>> +    rcu_read_lock();
>> +    ev_fence = dma_fence_get_rcu_safe(&evf_mgr->ev_fence);
>> +    rcu_read_unlock();
>> +    return ev_fence;
>> +}
>>   -void
>> -amdgpu_eviction_fence_signal(struct amdgpu_eviction_fence_mgr *evf_mgr,
>> -                 struct amdgpu_eviction_fence *ev_fence);
>> +void amdgpu_evf_mgr_attach_fence(struct amdgpu_eviction_fence_mgr *evf_mgr,
>> +                 struct amdgpu_bo *bo);
>> +int amdgpu_evf_mgr_rearm(struct amdgpu_eviction_fence_mgr *evf_mgr,
>> +             struct drm_exec *exec);
>> +void amdgpu_evf_mgr_detach_fence(struct amdgpu_eviction_fence_mgr *evf_mgr,
>> +                 struct amdgpu_bo *bo);
>> +void amdgpu_evf_mgr_init(struct amdgpu_eviction_fence_mgr *evf_mgr);
>> +void amdgpu_evf_mgr_shutdown(struct amdgpu_eviction_fence_mgr *evf_mgr);
>> +void amdgpu_evf_mgr_fini(struct amdgpu_eviction_fence_mgr *evf_mgr);
>>   -int
>> -amdgpu_eviction_fence_replace_fence(struct amdgpu_eviction_fence_mgr 
>> *evf_mgr,
>> -                    struct drm_exec *exec);
>>   #endif
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>> index 5c90de58cc28..e28abfd04867 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>> @@ -263,13 +263,7 @@ static int amdgpu_gem_object_open(struct drm_gem_object 
>> *obj,
>>       else
>>           ++bo_va->ref_count;
>>   -    /* attach gfx eviction fence */
>> -    r = amdgpu_eviction_fence_attach(&fpriv->evf_mgr, abo);
>> -    if (r) {
>> -        DRM_DEBUG_DRIVER("Failed to attach eviction fence to BO\n");
>> -        amdgpu_bo_unreserve(abo);
>> -        return r;
>> -    }
>> +    amdgpu_evf_mgr_attach_fence(&fpriv->evf_mgr, abo);
>>       drm_exec_fini(&exec);
>>         /* Validate and add eviction fence to DMABuf imports with dynamic
>> @@ -337,7 +331,7 @@ static void amdgpu_gem_object_close(struct 
>> drm_gem_object *obj,
>>       }
>>         if (!amdgpu_vm_is_bo_always_valid(vm, bo))
>> -        amdgpu_eviction_fence_detach(&fpriv->evf_mgr, bo);
>> +        amdgpu_evf_mgr_detach_fence(&fpriv->evf_mgr, bo);
>>         bo_va = amdgpu_vm_bo_find(vm, bo);
>>       if (!bo_va || --bo_va->ref_count)
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>> index f69332eed051..f512b6ec6c53 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>> @@ -1522,10 +1522,7 @@ int amdgpu_driver_open_kms(struct drm_device *dev, 
>> struct drm_file *file_priv)
>>                "Failed to init usermode queue manager (%d), use legacy 
>> workload submission only\n",
>>                r);
>>   -    r = amdgpu_eviction_fence_init(&fpriv->evf_mgr);
>> -    if (r)
>> -        goto error_vm;
>> -
>> +    amdgpu_evf_mgr_init(&fpriv->evf_mgr);
>>       amdgpu_ctx_mgr_init(&fpriv->ctx_mgr, adev);
>>         file_priv->driver_priv = fpriv;
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
>> index 85adc53eb523..67ba46851c2b 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
>> @@ -472,17 +472,16 @@ void
>>   amdgpu_userq_ensure_ev_fence(struct amdgpu_userq_mgr *uq_mgr,
>>                    struct amdgpu_eviction_fence_mgr *evf_mgr)
>>   {
>> -    struct amdgpu_eviction_fence *ev_fence;
>> +    struct dma_fence *ev_fence;
>>     retry:
>>       /* Flush any pending resume work to create ev_fence */
>>       flush_delayed_work(&uq_mgr->resume_work);
>>         mutex_lock(&uq_mgr->userq_mutex);
>> -    spin_lock(&evf_mgr->ev_fence_lock);
>> -    ev_fence = evf_mgr->ev_fence;
>> -    spin_unlock(&evf_mgr->ev_fence_lock);
>> -    if (!ev_fence || dma_fence_is_signaled(&ev_fence->base)) {
>> +    ev_fence = amdgpu_evf_mgr_get_fence(evf_mgr);
>> +    if (dma_fence_is_signaled(ev_fence)) {
>> +        dma_fence_put(ev_fence);
>>           mutex_unlock(&uq_mgr->userq_mutex);
>>           /*
>>            * Looks like there was no pending resume work,
>> @@ -491,6 +490,7 @@ amdgpu_userq_ensure_ev_fence(struct amdgpu_userq_mgr 
>> *uq_mgr,
>>           schedule_delayed_work(&uq_mgr->resume_work, 0);
>>           goto retry;
>>       }
>> +    dma_fence_put(ev_fence);
>>   }
>>     int amdgpu_userq_create_object(struct amdgpu_userq_mgr *uq_mgr,
>> @@ -1196,7 +1196,7 @@ amdgpu_userq_vm_validate(struct amdgpu_userq_mgr 
>> *uq_mgr)
>>           dma_fence_wait(bo_va->last_pt_update, false);
>>       dma_fence_wait(vm->last_update, false);
>>   -    ret = amdgpu_eviction_fence_replace_fence(&fpriv->evf_mgr, &exec);
>> +    ret = amdgpu_evf_mgr_rearm(&fpriv->evf_mgr, &exec);
>>       if (ret)
>>           drm_file_err(uq_mgr->file, "Failed to replace eviction fence\n");
>>   @@ -1216,11 +1216,13 @@ static void amdgpu_userq_restore_worker(struct 
>> work_struct *work)
>>   {
>>       struct amdgpu_userq_mgr *uq_mgr = work_to_uq_mgr(work, 
>> resume_work.work);
>>       struct amdgpu_fpriv *fpriv = uq_mgr_to_fpriv(uq_mgr);
>> +    struct dma_fence *ev_fence;
>>       int ret;
>>   -    flush_delayed_work(&fpriv->evf_mgr.suspend_work);
>> -
>>       mutex_lock(&uq_mgr->userq_mutex);
>> +    ev_fence = amdgpu_evf_mgr_get_fence(&fpriv->evf_mgr);
>> +    if (!dma_fence_is_signaled(ev_fence))
>> +        goto unlock;
>>         ret = amdgpu_userq_vm_validate(uq_mgr);
>>       if (ret) {
>> @@ -1236,6 +1238,7 @@ static void amdgpu_userq_restore_worker(struct 
>> work_struct *work)
>>     unlock:
>>       mutex_unlock(&uq_mgr->userq_mutex);
>> +    dma_fence_put(ev_fence);
>>   }
>>     static int
>> @@ -1311,11 +1314,8 @@ amdgpu_userq_wait_for_signal(struct amdgpu_userq_mgr 
>> *uq_mgr)
>>   }
>>     void
>> -amdgpu_userq_evict(struct amdgpu_userq_mgr *uq_mgr,
>> -           struct amdgpu_eviction_fence *ev_fence)
>> +amdgpu_userq_evict(struct amdgpu_userq_mgr *uq_mgr, bool schedule_resume)
>>   {
>> -    struct amdgpu_fpriv *fpriv = uq_mgr_to_fpriv(uq_mgr);
>> -    struct amdgpu_eviction_fence_mgr *evf_mgr = &fpriv->evf_mgr;
>>       struct amdgpu_device *adev = uq_mgr->adev;
>>       int ret;
>>   @@ -1328,10 +1328,7 @@ amdgpu_userq_evict(struct amdgpu_userq_mgr *uq_mgr,
>>       if (ret)
>>           dev_err(adev->dev, "Failed to evict userqueue\n");
>>   -    /* Signal current eviction fence */
>> -    amdgpu_eviction_fence_signal(evf_mgr, ev_fence);
>> -
>> -    if (!evf_mgr->fd_closing)
>> +    if (schedule_resume)
>>           schedule_delayed_work(&uq_mgr->resume_work, 0);
>>   }
>>   diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
>> index 54e1997b3cc0..82306d489064 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
>> @@ -133,7 +133,7 @@ void amdgpu_userq_destroy_object(struct amdgpu_userq_mgr 
>> *uq_mgr,
>>                    struct amdgpu_userq_obj *userq_obj);
>>     void amdgpu_userq_evict(struct amdgpu_userq_mgr *uq_mgr,
>> -            struct amdgpu_eviction_fence *ev_fence);
>> +            bool schedule_resume);
>>     void amdgpu_userq_ensure_ev_fence(struct amdgpu_userq_mgr *userq_mgr,
>>                     struct amdgpu_eviction_fence_mgr *evf_mgr);

Reply via email to