On Thu, Jun 22, 2017 at 11:55:51AM +0100, Chris Wilson wrote:
> Once a client has requested a waitboost, we keep that waitboost active
> until all clients are no longer waiting. This is because we don't
> distinguish which waiter deserves the boost. However, with the advent of
> fence signaling, the signaler threads appear as waiters to the RPS
> interrupt handler. So instead of using a single boolean to track when to
> keep the waitboost active, use a counter of all outstanding waitboosted
> requests.
> 
> At this point, I have removed all vestiges of the rate limiting on
> clients. Whilst this means that compositors should remain more fluid,
> it also means that boosts are more prevalent.
> 
> A drawback of this implementation is that it requires constant request
> submission to keep the waitboost trimmed (as it is now cancelled when the
> request is completed). This will be fine for a busy system, but near
> idle the boosts may be kept for longer than desired (effectively tens of
> vblanks worstcase) and there is a reliance on rc6 instead.

In other words, now we're disabling boost when all requests that required boost
are retired.
We may need to tweak igt pm_rps boost scenarios to account for that extra time.

> 
> Reported-by: Michał Winiarski <michal.winiar...@intel.com>
> Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk>
> Cc: Michał Winiarski <michal.winiar...@intel.com>

[SNIP]

> @@ -2347,11 +2349,10 @@ static int i915_rps_boost_info(struct seq_file *m, 
> void *data)
>  
>               rcu_read_lock();
>               task = pid_task(file->pid, PIDTYPE_PID);
> -             seq_printf(m, "%s [%d]: %d boosts%s\n",
> +             seq_printf(m, "%s [%d]: %d boosts\n",
>                          task ? task->comm : "<unknown>",
>                          task ? task->pid : -1,
> -                        file_priv->rps.boosts,
> -                        list_empty(&file_priv->rps.link) ? "" : ", active");
> +                        atomic_read(&file_priv->rps.boosts));
>               rcu_read_unlock();
>       }
>       seq_printf(m, "Kernel (anonymous) boosts: %d\n", dev_priv->rps.boosts);

dev_priv->rps.boosts seems to be unused at this point, could you remove it while
you're here?

With that:
Reviewed-by: Michał Winiarski <michal.winiar...@intel.com>

-Michał

> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 30e89456fc61..95e164387363 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -584,8 +584,7 @@ struct drm_i915_file_private {
>       struct idr context_idr;
>  
>       struct intel_rps_client {
> -             struct list_head link;
> -             unsigned boosts;
> +             atomic_t boosts;
>       } rps;
>  
>       unsigned int bsd_engine;
> @@ -1304,8 +1303,7 @@ struct intel_gen6_power_mgmt {
>       enum { LOW_POWER, BETWEEN, HIGH_POWER } power;
>  
>       spinlock_t client_lock;
> -     struct list_head clients;
> -     bool client_boost;
> +     atomic_t num_waiters;
>  
>       bool enabled;
>       struct delayed_work autoenable_work;
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index ae3ce1314bd1..7391e2d36a31 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -388,7 +388,7 @@ i915_gem_object_wait_fence(struct dma_fence *fence,
>        */
>       if (rps) {
>               if (INTEL_GEN(rq->i915) >= 6)
> -                     gen6_rps_boost(rq->i915, rps, rq->emitted_jiffies);
> +                     gen6_rps_boost(rq, rps);
>               else
>                       rps = NULL;
>       }
> @@ -399,22 +399,6 @@ i915_gem_object_wait_fence(struct dma_fence *fence,
>       if (flags & I915_WAIT_LOCKED && i915_gem_request_completed(rq))
>               i915_gem_request_retire_upto(rq);
>  
> -     if (rps && i915_gem_request_global_seqno(rq) == 
> intel_engine_last_submit(rq->engine)) {
> -             /* The GPU is now idle and this client has stalled.
> -              * Since no other client has submitted a request in the
> -              * meantime, assume that this client is the only one
> -              * supplying work to the GPU but is unable to keep that
> -              * work supplied because it is waiting. Since the GPU is
> -              * then never kept fully busy, RPS autoclocking will
> -              * keep the clocks relatively low, causing further delays.
> -              * Compensate by giving the synchronous client credit for
> -              * a waitboost next time.
> -              */
> -             spin_lock(&rq->i915->rps.client_lock);
> -             list_del_init(&rps->link);
> -             spin_unlock(&rq->i915->rps.client_lock);
> -     }
> -
>       return timeout;
>  }
>  
> @@ -5065,12 +5049,6 @@ void i915_gem_release(struct drm_device *dev, struct 
> drm_file *file)
>       list_for_each_entry(request, &file_priv->mm.request_list, client_link)
>               request->file_priv = NULL;
>       spin_unlock(&file_priv->mm.lock);
> -
> -     if (!list_empty(&file_priv->rps.link)) {
> -             spin_lock(&to_i915(dev)->rps.client_lock);
> -             list_del(&file_priv->rps.link);
> -             spin_unlock(&to_i915(dev)->rps.client_lock);
> -     }
>  }
>  
>  int i915_gem_open(struct drm_i915_private *i915, struct drm_file *file)
> @@ -5087,7 +5065,6 @@ int i915_gem_open(struct drm_i915_private *i915, struct 
> drm_file *file)
>       file->driver_priv = file_priv;
>       file_priv->dev_priv = i915;
>       file_priv->file = file;
> -     INIT_LIST_HEAD(&file_priv->rps.link);
>  
>       spin_lock_init(&file_priv->mm.lock);
>       INIT_LIST_HEAD(&file_priv->mm.request_list);
> diff --git a/drivers/gpu/drm/i915/i915_gem_request.c 
> b/drivers/gpu/drm/i915/i915_gem_request.c
> index 8c59c79cbd8b..483af8921060 100644
> --- a/drivers/gpu/drm/i915/i915_gem_request.c
> +++ b/drivers/gpu/drm/i915/i915_gem_request.c
> @@ -384,7 +384,11 @@ static void i915_gem_request_retire(struct 
> drm_i915_gem_request *request)
>               engine->context_unpin(engine, engine->last_retired_context);
>       engine->last_retired_context = request->ctx;
>  
> -     dma_fence_signal(&request->fence);
> +     spin_lock_irq(&request->lock);
> +     if (request->waitboost)
> +             atomic_dec(&request->i915->rps.num_waiters);
> +     dma_fence_signal_locked(&request->fence);
> +     spin_unlock_irq(&request->lock);
>  
>       i915_priotree_fini(request->i915, &request->priotree);
>       i915_gem_request_put(request);
> @@ -639,6 +643,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
>       req->file_priv = NULL;
>       req->batch = NULL;
>       req->capture_list = NULL;
> +     req->waitboost = false;
>  
>       /*
>        * Reserve space in the ring buffer for all the commands required to
> diff --git a/drivers/gpu/drm/i915/i915_gem_request.h 
> b/drivers/gpu/drm/i915/i915_gem_request.h
> index 7b7c84369d78..604e131470a1 100644
> --- a/drivers/gpu/drm/i915/i915_gem_request.h
> +++ b/drivers/gpu/drm/i915/i915_gem_request.h
> @@ -184,6 +184,8 @@ struct drm_i915_gem_request {
>       /** Time at which this request was emitted, in jiffies. */
>       unsigned long emitted_jiffies;
>  
> +     bool waitboost;
> +
>       /** engine->request_list entry for this request */
>       struct list_head link;
>  
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index b1c7d1a04612..61047ee2eede 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -1091,18 +1091,6 @@ static u32 vlv_wa_c0_ei(struct drm_i915_private 
> *dev_priv, u32 pm_iir)
>       return events;
>  }
>  
> -static bool any_waiters(struct drm_i915_private *dev_priv)
> -{
> -     struct intel_engine_cs *engine;
> -     enum intel_engine_id id;
> -
> -     for_each_engine(engine, dev_priv, id)
> -             if (intel_engine_has_waiter(engine))
> -                     return true;
> -
> -     return false;
> -}
> -
>  static void gen6_pm_rps_work(struct work_struct *work)
>  {
>       struct drm_i915_private *dev_priv =
> @@ -1114,7 +1102,7 @@ static void gen6_pm_rps_work(struct work_struct *work)
>       spin_lock_irq(&dev_priv->irq_lock);
>       if (dev_priv->rps.interrupts_enabled) {
>               pm_iir = fetch_and_zero(&dev_priv->rps.pm_iir);
> -             client_boost = fetch_and_zero(&dev_priv->rps.client_boost);
> +             client_boost = atomic_read(&dev_priv->rps.num_waiters);
>       }
>       spin_unlock_irq(&dev_priv->irq_lock);
>  
> @@ -1131,7 +1119,7 @@ static void gen6_pm_rps_work(struct work_struct *work)
>       new_delay = dev_priv->rps.cur_freq;
>       min = dev_priv->rps.min_freq_softlimit;
>       max = dev_priv->rps.max_freq_softlimit;
> -     if (client_boost || any_waiters(dev_priv))
> +     if (client_boost)
>               max = dev_priv->rps.max_freq;
>       if (client_boost && new_delay < dev_priv->rps.boost_freq) {
>               new_delay = dev_priv->rps.boost_freq;
> @@ -1144,7 +1132,7 @@ static void gen6_pm_rps_work(struct work_struct *work)
>  
>               if (new_delay >= dev_priv->rps.max_freq_softlimit)
>                       adj = 0;
> -     } else if (client_boost || any_waiters(dev_priv)) {
> +     } else if (client_boost) {
>               adj = 0;
>       } else if (pm_iir & GEN6_PM_RP_DOWN_TIMEOUT) {
>               if (dev_priv->rps.cur_freq > dev_priv->rps.efficient_freq)
> diff --git a/drivers/gpu/drm/i915/intel_drv.h 
> b/drivers/gpu/drm/i915/intel_drv.h
> index d93efb49a2e2..d17a32437f07 100644
> --- a/drivers/gpu/drm/i915/intel_drv.h
> +++ b/drivers/gpu/drm/i915/intel_drv.h
> @@ -1858,9 +1858,8 @@ void intel_suspend_gt_powersave(struct drm_i915_private 
> *dev_priv);
>  void gen6_rps_busy(struct drm_i915_private *dev_priv);
>  void gen6_rps_reset_ei(struct drm_i915_private *dev_priv);
>  void gen6_rps_idle(struct drm_i915_private *dev_priv);
> -void gen6_rps_boost(struct drm_i915_private *dev_priv,
> -                 struct intel_rps_client *rps,
> -                 unsigned long submitted);
> +void gen6_rps_boost(struct drm_i915_gem_request *rq,
> +                 struct intel_rps_client *rps);
>  void intel_queue_rps_boost_for_request(struct drm_i915_gem_request *req);
>  void g4x_wm_get_hw_state(struct drm_device *dev);
>  void vlv_wm_get_hw_state(struct drm_device *dev);
> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> index b5b7372fcddc..fabea61ca0b6 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -6125,47 +6125,36 @@ void gen6_rps_idle(struct drm_i915_private *dev_priv)
>                          gen6_sanitize_rps_pm_mask(dev_priv, ~0));
>       }
>       mutex_unlock(&dev_priv->rps.hw_lock);
> -
> -     spin_lock(&dev_priv->rps.client_lock);
> -     while (!list_empty(&dev_priv->rps.clients))
> -             list_del_init(dev_priv->rps.clients.next);
> -     spin_unlock(&dev_priv->rps.client_lock);
>  }
>  
> -void gen6_rps_boost(struct drm_i915_private *dev_priv,
> -                 struct intel_rps_client *rps,
> -                 unsigned long submitted)
> +void gen6_rps_boost(struct drm_i915_gem_request *rq,
> +                 struct intel_rps_client *rps)
>  {
> +     struct drm_i915_private *i915 = rq->i915;
> +     bool boost;
> +
>       /* This is intentionally racy! We peek at the state here, then
>        * validate inside the RPS worker.
>        */
> -     if (!(dev_priv->gt.awake &&
> -           dev_priv->rps.enabled &&
> -           dev_priv->rps.cur_freq < dev_priv->rps.boost_freq))
> +     if (!i915->rps.enabled)
>               return;
>  
> -     /* Force a RPS boost (and don't count it against the client) if
> -      * the GPU is severely congested.
> -      */
> -     if (rps && time_after(jiffies, submitted + DRM_I915_THROTTLE_JIFFIES))
> -             rps = NULL;
> -
> -     spin_lock(&dev_priv->rps.client_lock);
> -     if (rps == NULL || list_empty(&rps->link)) {
> -             spin_lock_irq(&dev_priv->irq_lock);
> -             if (dev_priv->rps.interrupts_enabled) {
> -                     dev_priv->rps.client_boost = true;
> -                     schedule_work(&dev_priv->rps.work);
> -             }
> -             spin_unlock_irq(&dev_priv->irq_lock);
> -
> -             if (rps != NULL) {
> -                     list_add(&rps->link, &dev_priv->rps.clients);
> -                     rps->boosts++;
> -             } else
> -                     dev_priv->rps.boosts++;
> +     boost = false;
> +     spin_lock_irq(&rq->lock);
> +     if (!rq->waitboost && !i915_gem_request_completed(rq)) {
> +             atomic_inc(&i915->rps.num_waiters);
> +             rq->waitboost = true;
> +             boost = true;
>       }
> -     spin_unlock(&dev_priv->rps.client_lock);
> +     spin_unlock_irq(&rq->lock);
> +     if (!boost)
> +             return;
> +
> +     if (READ_ONCE(i915->rps.cur_freq) < i915->rps.boost_freq)
> +             schedule_work(&i915->rps.work);
> +
> +     if (rps != NULL)
> +             atomic_inc(&rps->boosts);
>  }
>  
>  int intel_set_rps(struct drm_i915_private *dev_priv, u8 val)
> @@ -9112,7 +9101,7 @@ static void __intel_rps_boost_work(struct work_struct 
> *work)
>       struct drm_i915_gem_request *req = boost->req;
>  
>       if (!i915_gem_request_completed(req))
> -             gen6_rps_boost(req->i915, NULL, req->emitted_jiffies);
> +             gen6_rps_boost(req, NULL);
>  
>       i915_gem_request_put(req);
>       kfree(boost);
> @@ -9141,11 +9130,10 @@ void intel_queue_rps_boost_for_request(struct 
> drm_i915_gem_request *req)
>  void intel_pm_setup(struct drm_i915_private *dev_priv)
>  {
>       mutex_init(&dev_priv->rps.hw_lock);
> -     spin_lock_init(&dev_priv->rps.client_lock);
>  
>       INIT_DELAYED_WORK(&dev_priv->rps.autoenable_work,
>                         __intel_autoenable_gt_powersave);
> -     INIT_LIST_HEAD(&dev_priv->rps.clients);
> +     atomic_set(&dev_priv->rps.num_waiters, 0);
>  
>       dev_priv->pm.suspended = false;
>       atomic_set(&dev_priv->pm.wakeref_count, 0);
> -- 
> 2.11.0
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to