On Mon, Jan 20, 2025 at 04:52:39PM +0000, Tvrtko Ursulin wrote:
> The code assumes queue node is the first element in struct
> drm_sched_job.

I'd add that this assumption lies in doing the NULL check after the
container_of(). Without saying that, it might not be that obvious.

> Since this is not documented it can be very fragile so lets
> just remove the internal helper and explicitly check for "nothing
> dequeued", before converting the node to a sched job.
> 
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursu...@igalia.com>
> Cc: Christian König <christian.koe...@amd.com>
> Cc: Danilo Krummrich <d...@kernel.org>
> Cc: Matthew Brost <matthew.br...@intel.com>
> Cc: Philipp Stanner <pha...@kernel.org>
> ---
>  drivers/gpu/drm/scheduler/sched_entity.c | 18 +++++++++---------
>  drivers/gpu/drm/scheduler/sched_main.c   | 10 +++++-----
>  2 files changed, 14 insertions(+), 14 deletions(-)
> 
> diff --git a/drivers/gpu/drm/scheduler/sched_entity.c 
> b/drivers/gpu/drm/scheduler/sched_entity.c
> index 7c0d266a89ef..8992bb432ec6 100644
> --- a/drivers/gpu/drm/scheduler/sched_entity.c
> +++ b/drivers/gpu/drm/scheduler/sched_entity.c
> @@ -30,9 +30,6 @@
>  
>  #include "gpu_scheduler_trace.h"
>  
> -#define to_drm_sched_job(sched_job)          \
> -             container_of((sched_job), struct drm_sched_job, queue_node)
> -
>  /**
>   * drm_sched_entity_init - Init a context entity used by scheduler when
>   * submit to HW ring.
> @@ -476,11 +473,14 @@ drm_sched_job_dependency(struct drm_sched_job *job,
>  struct drm_sched_job *drm_sched_entity_pop_job(struct drm_sched_entity 
> *entity)
>  {
>       struct drm_sched_job *sched_job;
> +     struct spsc_node *node;
>  
> -     sched_job = to_drm_sched_job(spsc_queue_peek(&entity->job_queue));
> -     if (!sched_job)
> +     node = spsc_queue_peek(&entity->job_queue);
> +     if (!node)
>               return NULL;
>  
> +     sched_job = container_of(node, typeof(*sched_job), queue_node);

So, here you have this pattern for a valid used case and even twice. I think you
should add drm_sched_entity_peek_job() instead and document what the
preconditions are to be allowed to peek given it's an spsc queue.

> +
>       while ((entity->dependency =
>                       drm_sched_job_dependency(sched_job, entity))) {
>               trace_drm_sched_job_wait_dep(sched_job, entity->dependency);
> @@ -511,10 +511,10 @@ struct drm_sched_job *drm_sched_entity_pop_job(struct 
> drm_sched_entity *entity)
>        * the timestamp of the next job, if any.
>        */
>       if (drm_sched_policy == DRM_SCHED_POLICY_FIFO) {
> -             struct drm_sched_job *next;
> -
> -             next = to_drm_sched_job(spsc_queue_peek(&entity->job_queue));
> -             if (next) {
> +             node = spsc_queue_peek(&entity->job_queue);
> +             if (node) {
> +                     struct drm_sched_job *next =
> +                             container_of(node, typeof(*next), queue_node);
>                       struct drm_sched_rq *rq;
>  
>                       spin_lock(&entity->lock);
> diff --git a/drivers/gpu/drm/scheduler/sched_main.c 
> b/drivers/gpu/drm/scheduler/sched_main.c
> index a48be16ab84f..66eee6372253 100644
> --- a/drivers/gpu/drm/scheduler/sched_main.c
> +++ b/drivers/gpu/drm/scheduler/sched_main.c
> @@ -87,9 +87,6 @@ static struct lockdep_map drm_sched_lockdep_map = {
>  };
>  #endif
>  
> -#define to_drm_sched_job(sched_job)          \
> -             container_of((sched_job), struct drm_sched_job, queue_node)
> -
>  int drm_sched_policy = DRM_SCHED_POLICY_FIFO;
>  
>  /**
> @@ -122,11 +119,14 @@ static bool drm_sched_can_queue(struct 
> drm_gpu_scheduler *sched,
>                               struct drm_sched_entity *entity)
>  {
>       struct drm_sched_job *s_job;
> +     struct spsc_node *node;
>  
> -     s_job = to_drm_sched_job(spsc_queue_peek(&entity->job_queue));
> -     if (!s_job)
> +     node = spsc_queue_peek(&entity->job_queue);
> +     if (!node)
>               return false;
>  
> +     s_job = container_of(node, typeof(*s_job), queue_node);
> +
>       /* If a job exceeds the credit limit, truncate it to the credit limit
>        * itself to guarantee forward progress.
>        */
> -- 
> 2.47.1
> 

Reply via email to