On Thu, 2025-07-17 at 16:44 +0800, Lin.Cao wrote:
> When application A submits jobs and application B submits a job with
> a
> dependency on A's fence, the normal flow wakes up the scheduler after
> processing each job. However, the optimization in
> drm_sched_entity_add_dependency_cb() uses a callback that only clears
> dependencies without waking up the scheduler.
> 
> When application A is killed before its jobs can run, the callback
> gets
> triggered but only clears the dependency without waking up the
> scheduler,
> causing the scheduler to enter sleep state and application B to hang.
> 
> Remove the optimization by deleting drm_sched_entity_clear_dep() and
> its
> usage, ensuring the scheduler is always woken up when dependencies
> are
> cleared.
> 
> Fixes: 777dbd458c89 ("drm/amdgpu: drop a dummy wakeup scheduler")
> Cc: sta...@vger.kernel.org # v4.6+
> 
> Signed-off-by: Lin.Cao <linca...@amd.com>
> Reviewed-by: Christian König <christian.koe...@amd.com>

Applied to drm-misc-fixes, thank you.

P.

> ---
>  drivers/gpu/drm/scheduler/sched_entity.c | 21 ++-------------------
>  1 file changed, 2 insertions(+), 19 deletions(-)
> 
> diff --git a/drivers/gpu/drm/scheduler/sched_entity.c
> b/drivers/gpu/drm/scheduler/sched_entity.c
> index e671aa241720..ac678de7fe5e 100644
> --- a/drivers/gpu/drm/scheduler/sched_entity.c
> +++ b/drivers/gpu/drm/scheduler/sched_entity.c
> @@ -355,17 +355,6 @@ void drm_sched_entity_destroy(struct
> drm_sched_entity *entity)
>  }
>  EXPORT_SYMBOL(drm_sched_entity_destroy);
>  
> -/* drm_sched_entity_clear_dep - callback to clear the entities
> dependency */
> -static void drm_sched_entity_clear_dep(struct dma_fence *f,
> -                                    struct dma_fence_cb *cb)
> -{
> -     struct drm_sched_entity *entity =
> -             container_of(cb, struct drm_sched_entity, cb);
> -
> -     entity->dependency = NULL;
> -     dma_fence_put(f);
> -}
> -
>  /*
>   * drm_sched_entity_wakeup - callback to clear the entity's
> dependency and
>   * wake up the scheduler
> @@ -376,7 +365,8 @@ static void drm_sched_entity_wakeup(struct
> dma_fence *f,
>       struct drm_sched_entity *entity =
>               container_of(cb, struct drm_sched_entity, cb);
>  
> -     drm_sched_entity_clear_dep(f, cb);
> +     entity->dependency = NULL;
> +     dma_fence_put(f);
>       drm_sched_wakeup(entity->rq->sched);
>  }
>  
> @@ -429,13 +419,6 @@ static bool
> drm_sched_entity_add_dependency_cb(struct drm_sched_entity *entity)
>               fence = dma_fence_get(&s_fence->scheduled);
>               dma_fence_put(entity->dependency);
>               entity->dependency = fence;
> -             if (!dma_fence_add_callback(fence, &entity->cb,
> -                                        
> drm_sched_entity_clear_dep))
> -                     return true;
> -
> -             /* Ignore it when it is already scheduled */
> -             dma_fence_put(fence);
> -             return false;
>       }
>  
>       if (!dma_fence_add_callback(entity->dependency, &entity->cb,

Reply via email to