We have a number of tasks that we like to run when idle and parking the
GPU into a powersaving mode. A few of those tasks are using the global
idle point as a convenient moment when all previous execution has been
required (and so we know that the GPU is not still touching random
user memory). However, on a busy system we are unlikely to see global
idle points, and would prefer a much more incremental system of being
able to retire after all current execution has completed.

Enter the idle barrier and idle tasks.

To determine a point in the future when all current tasks are complete,
we schedule a new low priority request that will be executed after all
current work is complete, and by imposing a barrier before all future
work. We therefore know we retire that barrier, the GPU is no longer
touching any memory released before the barrier was submitting allowing
us to run a set of idle tasks clear of any dangling GPU references.

Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.h      |  5 ++
 drivers/gpu/drm/i915/i915_gem.c      | 90 ++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/i915_request.c  |  9 +++
 drivers/gpu/drm/i915/i915_timeline.c |  3 +
 4 files changed, 107 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index d072f3369ee1..5ca77e2e53fb 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2021,6 +2021,9 @@ struct drm_i915_private {
                 */
                struct delayed_work idle_work;
 
+               struct i915_gem_active idle_barrier;
+               struct list_head idle_tasks;
+
                ktime_t last_init_time;
 
                struct i915_vma *scratch;
@@ -3040,6 +3043,8 @@ void i915_gem_fini(struct drm_i915_private *dev_priv);
 void i915_gem_cleanup_engines(struct drm_i915_private *dev_priv);
 int i915_gem_wait_for_idle(struct drm_i915_private *dev_priv,
                           unsigned int flags, long timeout);
+void i915_gem_add_idle_task(struct drm_i915_private *i915,
+                           struct i915_gem_active *idle);
 int __must_check i915_gem_suspend(struct drm_i915_private *dev_priv);
 void i915_gem_suspend_late(struct drm_i915_private *dev_priv);
 void i915_gem_resume(struct drm_i915_private *dev_priv);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 101a0f644787..0a8bcf6e7098 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -141,6 +141,15 @@ int i915_mutex_lock_interruptible(struct drm_device *dev)
        return 0;
 }
 
+static void call_idle_tasks(struct list_head *tasks)
+{
+       struct i915_gem_active *tsk, *tn;
+
+       list_for_each_entry_safe(tsk, tn, tasks, link)
+               tsk->retire(tsk, NULL);
+       INIT_LIST_HEAD(tasks);
+}
+
 static u32 __i915_gem_park(struct drm_i915_private *i915)
 {
        intel_wakeref_t wakeref;
@@ -169,6 +178,8 @@ static u32 __i915_gem_park(struct drm_i915_private *i915)
         */
        synchronize_irq(i915->drm.irq);
 
+       call_idle_tasks(&i915->gt.idle_tasks);
+
        intel_engines_park(i915);
        i915_timelines_park(i915);
 
@@ -2906,6 +2917,81 @@ i915_gem_find_active_request(struct intel_engine_cs 
*engine)
        return active;
 }
 
+static void idle_barrier(struct drm_i915_private *i915)
+{
+       struct i915_gt_timelines *gt = &i915->gt.timelines;
+       struct i915_timeline *tl;
+       struct i915_request *rq;
+       int err = 0;
+
+       if (list_empty(&i915->gt.idle_tasks))
+               return;
+
+       if (!i915->gt.active_requests) {
+               call_idle_tasks(&i915->gt.idle_tasks);
+               return;
+       }
+
+       /* Keep just one idle barrier in flight, amalgamating tasks instead */
+       if (i915_gem_active_isset(&i915->gt.idle_barrier))
+               return;
+
+       GEM_TRACE("adding idle barrier\n");
+
+       rq = i915_request_alloc(i915->engine[RCS], i915->kernel_context);
+       if (IS_ERR(rq))
+               return;
+
+       /* run after all current requests have executed, but before any new */
+       mutex_lock(&gt->mutex);
+       list_for_each_entry(tl, &gt->active_list, link) {
+               struct i915_request *last;
+
+               if (tl == rq->timeline)
+                       continue;
+
+               err = i915_timeline_set_barrier(tl, rq);
+               if (err == -EEXIST)
+                       continue;
+               if (err)
+                       break;
+
+               last = i915_gem_active_raw(&tl->last_request,
+                                          &i915->drm.struct_mutex);
+               if (!last)
+                       continue;
+
+               mutex_unlock(&gt->mutex); /* allocation ahead! */
+               err = i915_request_await_dma_fence(rq, &last->fence);
+               mutex_lock(&gt->mutex);
+               if (err)
+                       break;
+
+               /* restart after reacquiring the lock */
+               tl = list_entry(&gt->active_list, typeof(*tl), link);
+       }
+       mutex_unlock(&gt->mutex);
+
+       if (err == 0) {
+               list_splice_init(&i915->gt.idle_tasks, &rq->active_list);
+               i915_gem_active_set(&i915->gt.idle_barrier, rq);
+       }
+
+       i915_request_add(rq);
+}
+
+void i915_gem_add_idle_task(struct drm_i915_private *i915,
+                           struct i915_gem_active *task)
+{
+       lockdep_assert_held(&i915->drm.struct_mutex);
+       GEM_TRACE("adding idle task hint:%pS\n", task->retire);
+
+       if (i915->gt.active_requests)
+               list_add(&task->link, &i915->gt.idle_tasks);
+       else
+               task->retire(task, NULL);
+}
+
 static void
 i915_gem_retire_work_handler(struct work_struct *work)
 {
@@ -2916,6 +3002,7 @@ i915_gem_retire_work_handler(struct work_struct *work)
        /* Come back later if the device is busy... */
        if (mutex_trylock(&dev->struct_mutex)) {
                i915_retire_requests(dev_priv);
+               idle_barrier(dev_priv);
                mutex_unlock(&dev->struct_mutex);
        }
 
@@ -5182,6 +5269,7 @@ void i915_gem_fini(struct drm_i915_private *dev_priv)
 
        /* Flush any outstanding unpin_work. */
        i915_gem_drain_workqueue(dev_priv);
+       GEM_BUG_ON(!list_empty(&dev_priv->gt.idle_tasks));
 
        mutex_lock(&dev_priv->drm.struct_mutex);
        intel_uc_fini_hw(dev_priv);
@@ -5302,6 +5390,8 @@ int i915_gem_init_early(struct drm_i915_private *dev_priv)
 
        INIT_LIST_HEAD(&dev_priv->gt.active_rings);
        INIT_LIST_HEAD(&dev_priv->gt.closed_vma);
+       INIT_LIST_HEAD(&dev_priv->gt.idle_tasks);
+       init_request_active(&dev_priv->gt.idle_barrier, NULL);
 
        i915_gem_init__mm(dev_priv);
 
diff --git a/drivers/gpu/drm/i915/i915_request.c 
b/drivers/gpu/drm/i915/i915_request.c
index c09a6644a2ab..b397155fe8a7 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -532,6 +532,11 @@ static int add_barrier(struct i915_request *rq, struct 
i915_gem_active *active)
        return barrier ? i915_request_await_dma_fence(rq, &barrier->fence) : 0;
 }
 
+static int add_idle_barrier(struct i915_request *rq)
+{
+       return add_barrier(rq, &rq->i915->gt.idle_barrier);
+}
+
 static int add_timeline_barrier(struct i915_request *rq)
 {
        return add_barrier(rq, &rq->timeline->barrier);
@@ -679,6 +684,10 @@ i915_request_alloc(struct intel_engine_cs *engine, struct 
i915_gem_context *ctx)
         */
        rq->head = rq->ring->emit;
 
+       ret = add_idle_barrier(rq);
+       if (ret)
+               goto err_unwind;
+
        ret = add_timeline_barrier(rq);
        if (ret)
                goto err_unwind;
diff --git a/drivers/gpu/drm/i915/i915_timeline.c 
b/drivers/gpu/drm/i915/i915_timeline.c
index 8f5c57304064..60b2e1c3abf4 100644
--- a/drivers/gpu/drm/i915/i915_timeline.c
+++ b/drivers/gpu/drm/i915/i915_timeline.c
@@ -270,6 +270,9 @@ int i915_timeline_set_barrier(struct i915_timeline *tl, 
struct i915_request *rq)
        /* Must maintain ordering wrt existing barriers */
        old = i915_gem_active_raw(&tl->barrier, &rq->i915->drm.struct_mutex);
        if (old) {
+               if (old == rq)
+                       return -EEXIST;
+
                err = i915_request_await_dma_fence(rq, &old->fence);
                if (err)
                        return err;
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to