entity

Matthew Brost Thu, 14 Sep 2023 08:49:45 -0700

On Thu, Sep 14, 2023 at 12:23:35AM -0400, Luben Tuikov wrote:
> On 2023-09-14 00:18, Luben Tuikov wrote:
> > On 2023-09-11 22:16, Matthew Brost wrote:
> >> Rather than a global modparam for scheduling policy, move the scheduling
> >> policy to scheduler / entity so user can control each scheduler / entity
> >> policy.
> >>
> >> v2:
> >>   - s/DRM_SCHED_POLICY_MAX/DRM_SCHED_POLICY_COUNT (Luben)
> >>   - Only include policy in scheduler (Luben)
> >>
> >> Signed-off-by: Matthew Brost <matthew.br...@intel.com>
> >> ---
> >>  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  1 +
> >>  drivers/gpu/drm/etnaviv/etnaviv_sched.c    |  3 ++-
> >>  drivers/gpu/drm/lima/lima_sched.c          |  3 ++-
> >>  drivers/gpu/drm/msm/msm_ringbuffer.c       |  3 ++-
> >>  drivers/gpu/drm/nouveau/nouveau_sched.c    |  3 ++-
> >>  drivers/gpu/drm/panfrost/panfrost_job.c    |  3 ++-
> >>  drivers/gpu/drm/scheduler/sched_entity.c   | 24 ++++++++++++++++++----
> >>  drivers/gpu/drm/scheduler/sched_main.c     | 23 +++++++++++++++------
> >>  drivers/gpu/drm/v3d/v3d_sched.c            | 15 +++++++++-----
> >>  include/drm/gpu_scheduler.h                | 20 ++++++++++++------
> >>  10 files changed, 72 insertions(+), 26 deletions(-)
> >>
> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
> >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> >> index c83a76bccc1d..ecb00991dd51 100644
> >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> >> @@ -2309,6 +2309,7 @@ static int amdgpu_device_init_schedulers(struct 
> >> amdgpu_device *adev)
> >>                               ring->num_hw_submission, 0,
> >>                               timeout, adev->reset_domain->wq,
> >>                               ring->sched_score, ring->name,
> >> +                             DRM_SCHED_POLICY_DEFAULT,
> >>                               adev->dev);
> >>            if (r) {
> >>                    DRM_ERROR("Failed to create scheduler on ring %s.\n",
> >> diff --git a/drivers/gpu/drm/etnaviv/etnaviv_sched.c 
> >> b/drivers/gpu/drm/etnaviv/etnaviv_sched.c
> >> index 618a804ddc34..3646f995ca94 100644
> >> --- a/drivers/gpu/drm/etnaviv/etnaviv_sched.c
> >> +++ b/drivers/gpu/drm/etnaviv/etnaviv_sched.c
> >> @@ -137,7 +137,8 @@ int etnaviv_sched_init(struct etnaviv_gpu *gpu)
> >>    ret = drm_sched_init(&gpu->sched, &etnaviv_sched_ops, NULL,
> >>                         etnaviv_hw_jobs_limit, etnaviv_job_hang_limit,
> >>                         msecs_to_jiffies(500), NULL, NULL,
> >> -                       dev_name(gpu->dev), gpu->dev);
> >> +                       dev_name(gpu->dev), DRM_SCHED_POLICY_DEFAULT,
> >> +                       gpu->dev);
> >>    if (ret)
> >>            return ret;
> >>  
> >> diff --git a/drivers/gpu/drm/lima/lima_sched.c 
> >> b/drivers/gpu/drm/lima/lima_sched.c
> >> index 8d858aed0e56..465d4bf3882b 100644
> >> --- a/drivers/gpu/drm/lima/lima_sched.c
> >> +++ b/drivers/gpu/drm/lima/lima_sched.c
> >> @@ -491,7 +491,8 @@ int lima_sched_pipe_init(struct lima_sched_pipe *pipe, 
> >> const char *name)
> >>    return drm_sched_init(&pipe->base, &lima_sched_ops, NULL, 1,
> >>                          lima_job_hang_limit,
> >>                          msecs_to_jiffies(timeout), NULL,
> >> -                        NULL, name, pipe->ldev->dev);
> >> +                        NULL, name, DRM_SCHED_POLICY_DEFAULT,
> >> +                        pipe->ldev->dev);
> >>  }
> >>  
> >>  void lima_sched_pipe_fini(struct lima_sched_pipe *pipe)
> >> diff --git a/drivers/gpu/drm/msm/msm_ringbuffer.c 
> >> b/drivers/gpu/drm/msm/msm_ringbuffer.c
> >> index b8865e61b40f..f45e674a0aaf 100644
> >> --- a/drivers/gpu/drm/msm/msm_ringbuffer.c
> >> +++ b/drivers/gpu/drm/msm/msm_ringbuffer.c
> >> @@ -96,7 +96,8 @@ struct msm_ringbuffer *msm_ringbuffer_new(struct msm_gpu 
> >> *gpu, int id,
> >>  
> >>    ret = drm_sched_init(&ring->sched, &msm_sched_ops, NULL,
> >>                    num_hw_submissions, 0, sched_timeout,
> >> -                  NULL, NULL, to_msm_bo(ring->bo)->name, gpu->dev->dev);
> >> +                  NULL, NULL, to_msm_bo(ring->bo)->name,
> >> +                  DRM_SCHED_POLICY_DEFAULT, gpu->dev->dev);
> >>    if (ret) {
> >>            goto fail;
> >>    }
> >> diff --git a/drivers/gpu/drm/nouveau/nouveau_sched.c 
> >> b/drivers/gpu/drm/nouveau/nouveau_sched.c
> >> index d458c2227d4f..70e497e40c70 100644
> >> --- a/drivers/gpu/drm/nouveau/nouveau_sched.c
> >> +++ b/drivers/gpu/drm/nouveau/nouveau_sched.c
> >> @@ -431,7 +431,8 @@ int nouveau_sched_init(struct nouveau_drm *drm)
> >>  
> >>    return drm_sched_init(sched, &nouveau_sched_ops, NULL,
> >>                          NOUVEAU_SCHED_HW_SUBMISSIONS, 0, job_hang_limit,
> >> -                        NULL, NULL, "nouveau_sched", drm->dev->dev);
> >> +                        NULL, NULL, "nouveau_sched",
> >> +                        DRM_SCHED_POLICY_DEFAULT, drm->dev->dev);
> >>  }
> >>  
> >>  void nouveau_sched_fini(struct nouveau_drm *drm)
> >> diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c 
> >> b/drivers/gpu/drm/panfrost/panfrost_job.c
> >> index 326ca1ddf1d7..ad36bf3a4699 100644
> >> --- a/drivers/gpu/drm/panfrost/panfrost_job.c
> >> +++ b/drivers/gpu/drm/panfrost/panfrost_job.c
> >> @@ -835,7 +835,8 @@ int panfrost_job_init(struct panfrost_device *pfdev)
> >>                                 nentries, 0,
> >>                                 msecs_to_jiffies(JOB_TIMEOUT_MS),
> >>                                 pfdev->reset.wq,
> >> -                               NULL, "pan_js", pfdev->dev);
> >> +                               NULL, "pan_js", DRM_SCHED_POLICY_DEFAULT,
> >> +                               pfdev->dev);
> >>            if (ret) {
> >>                    dev_err(pfdev->dev, "Failed to create scheduler: %d.", 
> >> ret);
> >>                    goto err_sched;
> >> diff --git a/drivers/gpu/drm/scheduler/sched_entity.c 
> >> b/drivers/gpu/drm/scheduler/sched_entity.c
> >> index a42763e1429d..65a972b52eda 100644
> >> --- a/drivers/gpu/drm/scheduler/sched_entity.c
> >> +++ b/drivers/gpu/drm/scheduler/sched_entity.c
> >> @@ -33,6 +33,20 @@
> >>  #define to_drm_sched_job(sched_job)               \
> >>            container_of((sched_job), struct drm_sched_job, queue_node)
> >>  
> >> +static bool bad_policies(struct drm_gpu_scheduler **sched_list,
> >> +                   unsigned int num_sched_list)
> > 
> > Rename the function to the status quo,
> >     drm_sched_policy_mismatch(...
> >


Will do.

> >> +{
> >> +  enum drm_sched_policy sched_policy = sched_list[0]->sched_policy;
> >> +  unsigned int i;
> >> +
> >> +  /* All schedule policies must match */
> >> +  for (i = 1; i < num_sched_list; ++i)
> >> +          if (sched_policy != sched_list[i]->sched_policy)
> >> +                  return true;
> >> +
> >> +  return false;
> >> +}
> >> +
> >>  /**
> >>   * drm_sched_entity_init - Init a context entity used by scheduler when
> >>   * submit to HW ring.
> >> @@ -62,7 +76,8 @@ int drm_sched_entity_init(struct drm_sched_entity 
> >> *entity,
> >>                      unsigned int num_sched_list,
> >>                      atomic_t *guilty)
> >>  {
> >> -  if (!(entity && sched_list && (num_sched_list == 0 || sched_list[0])))
> >> +  if (!(entity && sched_list && (num_sched_list == 0 || sched_list[0])) ||
> >> +      bad_policies(sched_list, num_sched_list))
> >>            return -EINVAL;
> >>  
> >>    memset(entity, 0, sizeof(struct drm_sched_entity));
> >> @@ -486,7 +501,7 @@ struct drm_sched_job *drm_sched_entity_pop_job(struct 
> >> drm_sched_entity *entity)
> >>     * Update the entity's location in the min heap according to
> >>     * the timestamp of the next job, if any.
> >>     */
> >> -  if (drm_sched_policy == DRM_SCHED_POLICY_FIFO) {
> >> +  if (entity->rq->sched->sched_policy == DRM_SCHED_POLICY_FIFO) {
> >>            struct drm_sched_job *next;
> >>  
> >>            next = to_drm_sched_job(spsc_queue_peek(&entity->job_queue));
> >> @@ -558,7 +573,8 @@ void drm_sched_entity_select_rq(struct 
> >> drm_sched_entity *entity)
> >>  void drm_sched_entity_push_job(struct drm_sched_job *sched_job)
> >>  {
> >>    struct drm_sched_entity *entity = sched_job->entity;
> >> -  bool first;
> >> +  bool first, fifo = entity->rq->sched->sched_policy ==
> >> +          DRM_SCHED_POLICY_FIFO;
> >>    ktime_t submit_ts;
> >>  
> >>    trace_drm_sched_job(sched_job, entity);
> >> @@ -587,7 +603,7 @@ void drm_sched_entity_push_job(struct drm_sched_job 
> >> *sched_job)
> >>            drm_sched_rq_add_entity(entity->rq, entity);
> >>            spin_unlock(&entity->rq_lock);
> >>  
> >> -          if (drm_sched_policy == DRM_SCHED_POLICY_FIFO)
> >> +          if (fifo)
> >>                    drm_sched_rq_update_fifo(entity, submit_ts);
> >>  
> >>            drm_sched_wakeup_if_can_queue(entity->rq->sched);
> >> diff --git a/drivers/gpu/drm/scheduler/sched_main.c 
> >> b/drivers/gpu/drm/scheduler/sched_main.c
> >> index 614e8c97a622..545d5298c086 100644
> >> --- a/drivers/gpu/drm/scheduler/sched_main.c
> >> +++ b/drivers/gpu/drm/scheduler/sched_main.c
> >> @@ -66,14 +66,14 @@
> >>  #define to_drm_sched_job(sched_job)               \
> >>            container_of((sched_job), struct drm_sched_job, queue_node)
> >>  
> >> -int drm_sched_policy = DRM_SCHED_POLICY_FIFO;
> >> +int default_drm_sched_policy = DRM_SCHED_POLICY_FIFO;
> >>  
> >>  /**
> >>   * DOC: sched_policy (int)
> >>   * Used to override default entities scheduling policy in a run queue.
> >>   */
> >> -MODULE_PARM_DESC(sched_policy, "Specify the scheduling policy for 
> >> entities on a run-queue, " __stringify(DRM_SCHED_POLICY_RR) " = Round 
> >> Robin, " __stringify(DRM_SCHED_POLICY_FIFO) " = FIFO (default).");
> >> -module_param_named(sched_policy, drm_sched_policy, int, 0444);
> >> +MODULE_PARM_DESC(sched_policy, "Specify the default scheduling policy for 
> >> entities on a run-queue, " __stringify(DRM_SCHED_POLICY_RR) " = Round 
> >> Robin, " __stringify(DRM_SCHED_POLICY_FIFO) " = FIFO (default).");
> > 
> > Note, that you don't need to add "default" in the text as it is already 
> > there at the very end "FIFO (default)."
> > Else, it gets confusing what is meant by "default". Like this:
> > 
> >     Specify the default scheduling policy for entities on a run-queue, 1 = 
> > Round Robin, 2 = FIFO (default).
> > 
> > See "default" appear twice and it creates confusion? We don't need our 
> > internal "default" play to get
> > exported all the way to the casual user reading this. It is much clear, 
> > however,
> > 
> >     Specify the scheduling policy for entities on a run-queue, 1 = Round 
> > Robin, 2 = FIFO (default).
> > 
> > To mean, if unset, the default one would be used. But this is all internal 
> > code stuff.
> > 
> > So I'd say leave this one alone.
> >

Ok.
 
> >> +module_param_named(sched_policy, default_drm_sched_policy, int, 0444);
> > 
> > Put "default" as a postfix:
> > default_drm_sched_policy --> drm_sched_policy_default
> > 

Sure.

> >>  
> >>  static __always_inline bool drm_sched_entity_compare_before(struct 
> >> rb_node *a,
> >>                                                        const struct 
> >> rb_node *b)
> >> @@ -177,7 +177,7 @@ void drm_sched_rq_remove_entity(struct drm_sched_rq 
> >> *rq,
> >>    if (rq->current_entity == entity)
> >>            rq->current_entity = NULL;
> >>  
> >> -  if (drm_sched_policy == DRM_SCHED_POLICY_FIFO)
> >> +  if (rq->sched->sched_policy == DRM_SCHED_POLICY_FIFO)
> >>            drm_sched_rq_remove_fifo_locked(entity);
> >>  
> >>    spin_unlock(&rq->lock);
> >> @@ -898,7 +898,7 @@ drm_sched_select_entity(struct drm_gpu_scheduler 
> >> *sched)
> >>  
> >>    /* Kernel run queue has higher priority than normal run queue*/
> >>    for (i = DRM_SCHED_PRIORITY_COUNT - 1; i >= DRM_SCHED_PRIORITY_MIN; 
> >> i--) {
> >> -          entity = drm_sched_policy == DRM_SCHED_POLICY_FIFO ?
> >> +          entity = sched->sched_policy == DRM_SCHED_POLICY_FIFO ?
> >>                    drm_sched_rq_select_entity_fifo(&sched->sched_rq[i]) :
> >>                    drm_sched_rq_select_entity_rr(&sched->sched_rq[i]);
> >>            if (entity)
> >> @@ -1071,6 +1071,7 @@ static void drm_sched_main(struct work_struct *w)
> >>   *                used
> >>   * @score: optional score atomic shared with other schedulers
> >>   * @name: name used for debugging
> >> + * @sched_policy: schedule policy
> >>   * @dev: target &struct device
> >>   *
> >>   * Return 0 on success, otherwise error code.
> >> @@ -1080,9 +1081,15 @@ int drm_sched_init(struct drm_gpu_scheduler *sched,
> >>               struct workqueue_struct *submit_wq,
> >>               unsigned hw_submission, unsigned hang_limit,
> >>               long timeout, struct workqueue_struct *timeout_wq,
> >> -             atomic_t *score, const char *name, struct device *dev)
> >> +             atomic_t *score, const char *name,
> >> +             enum drm_sched_policy sched_policy,
> >> +             struct device *dev)
> >>  {
> >>    int i;
> >> +
> >> +  if (sched_policy >= DRM_SCHED_POLICY_COUNT)
> >> +          return -EINVAL;
> >> +
> >>    sched->ops = ops;
> >>    sched->hw_submission_limit = hw_submission;
> >>    sched->name = name;
> >> @@ -1092,6 +1099,10 @@ int drm_sched_init(struct drm_gpu_scheduler *sched,
> >>    sched->hang_limit = hang_limit;
> >>    sched->score = score ? score : &sched->_score;
> >>    sched->dev = dev;
> >> +  if (sched_policy == DRM_SCHED_POLICY_DEFAULT)
> >> +          sched->sched_policy = default_drm_sched_policy;
> >> +  else
> >> +          sched->sched_policy = sched_policy;
> 
> Note also that here you can use a ternary operator as opposed to an 
> if-control.
> 
>       sched->sched_policy = sched_policy == DRM_SCHED_POLICY_UNSET ?
>                               drm_sched_policy_default : sched_policy;

Sure, will fix in next rev.

Matt

> 
> -- 
> Regards,
> Luben
>

Re: [PATCH v3 03/13] drm/sched: Move schedule policy to scheduler / entity

Reply via email to