Make b->signaled_requests a lockless-list so that we can manipulate it
outside of the b->irq_lock.

Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/gt/intel_breadcrumbs.c   | 53 ++++++++++++++-----
 .../gpu/drm/i915/gt/intel_breadcrumbs_types.h |  2 +-
 drivers/gpu/drm/i915/i915_request.h           |  6 ++-
 3 files changed, 46 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c 
b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
index d8b206e53660..9e7ac612fabb 100644
--- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
@@ -174,16 +174,13 @@ static void add_retire(struct intel_breadcrumbs *b, 
struct intel_timeline *tl)
                intel_engine_add_retire(b->irq_engine, tl);
 }
 
-static bool __signal_request(struct i915_request *rq, struct list_head 
*signals)
+static bool __signal_request(struct i915_request *rq)
 {
-       clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
-
        if (!__dma_fence_signal(&rq->fence)) {
                i915_request_put(rq);
                return false;
        }
 
-       list_add_tail(&rq->signal_link, signals);
        return true;
 }
 
@@ -191,17 +188,42 @@ static void signal_irq_work(struct irq_work *work)
 {
        struct intel_breadcrumbs *b = container_of(work, typeof(*b), irq_work);
        const ktime_t timestamp = ktime_get();
+       struct llist_node *signal, *sn;
        struct intel_context *ce, *cn;
        struct list_head *pos, *next;
-       LIST_HEAD(signal);
+
+       signal = NULL;
+       if (unlikely(!llist_empty(&b->signaled_requests)))
+               signal = llist_del_all(&b->signaled_requests);
 
        spin_lock(&b->irq_lock);
 
-       if (list_empty(&b->signalers))
+       /*
+        * Keep the irq armed until the interrupt after all listeners are gone.
+        *
+        * Enabling/disabling the interrupt is rather costly, roughly a couple
+        * of hundred microseconds. If we are proactive and enable/disable
+        * the interrupt around every request that wants a breadcrumb, we
+        * quickly drown in the extra orders of magnitude of latency imposed
+        * on request submission.
+        *
+        * So we try to be lazy, and keep the interrupts enabled until no
+        * more listeners appear within a breadcrumb interrupt interval (that
+        * is until a request completes that no one cares about). The
+        * observation is that listeners come in batches, and will often
+        * listen to a bunch of requests in succession.
+        *
+        * We also try to avoid raising too many interrupts, as they may
+        * be generated by userspace batches and it is unfortunately rather
+        * too easy to drown the CPU under a flood of GPU interrupts. Thus
+        * whenever no one appears to be listening, we turn off the interrupts.
+        * Fewer interrupts should conserve power -- at the very least, fewer
+        * interrupt draw less ire from other users of the system and tools
+        * like powertop.
+        */
+       if (!signal && list_empty(&b->signalers))
                __intel_breadcrumbs_disarm_irq(b);
 
-       list_splice_init(&b->signaled_requests, &signal);
-
        list_for_each_entry_safe(ce, cn, &b->signalers, signal_link) {
                GEM_BUG_ON(list_empty(&ce->signals));
 
@@ -218,7 +240,11 @@ static void signal_irq_work(struct irq_work *work)
                         * spinlock as the callback chain may end up adding
                         * more signalers to the same context or engine.
                         */
-                       __signal_request(rq, &signal);
+                       clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
+                       if (__signal_request(rq)) {
+                               rq->signal_node.next = signal;
+                               signal = &rq->signal_node;
+                       }
                }
 
                /*
@@ -238,9 +264,9 @@ static void signal_irq_work(struct irq_work *work)
 
        spin_unlock(&b->irq_lock);
 
-       list_for_each_safe(pos, next, &signal) {
+       llist_for_each_safe(signal, sn, signal) {
                struct i915_request *rq =
-                       list_entry(pos, typeof(*rq), signal_link);
+                       llist_entry(signal, typeof(*rq), signal_node);
                struct list_head cb_list;
 
                spin_lock(&rq->lock);
@@ -264,7 +290,7 @@ intel_breadcrumbs_create(struct intel_engine_cs *irq_engine)
 
        spin_lock_init(&b->irq_lock);
        INIT_LIST_HEAD(&b->signalers);
-       INIT_LIST_HEAD(&b->signaled_requests);
+       init_llist_head(&b->signaled_requests);
 
        init_irq_work(&b->irq_work, signal_irq_work);
 
@@ -327,7 +353,8 @@ static void insert_breadcrumb(struct i915_request *rq,
         * its signal completion.
         */
        if (__request_completed(rq)) {
-               if (__signal_request(rq, &b->signaled_requests))
+               if (__signal_request(rq) &&
+                   llist_add(&rq->signal_node, &b->signaled_requests))
                        irq_work_queue(&b->irq_work);
                return;
        }
diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h 
b/drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h
index 8e53b9942695..3fa19820b37a 100644
--- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h
@@ -35,7 +35,7 @@ struct intel_breadcrumbs {
        struct intel_engine_cs *irq_engine;
 
        struct list_head signalers;
-       struct list_head signaled_requests;
+       struct llist_head signaled_requests;
 
        struct irq_work irq_work; /* for use from inside irq_lock */
 
diff --git a/drivers/gpu/drm/i915/i915_request.h 
b/drivers/gpu/drm/i915/i915_request.h
index 16b721080195..874af6db6103 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -176,7 +176,11 @@ struct i915_request {
        struct intel_context *context;
        struct intel_ring *ring;
        struct intel_timeline __rcu *timeline;
-       struct list_head signal_link;
+
+       union {
+               struct list_head signal_link;
+               struct llist_node signal_node;
+       };
 
        /*
         * The rcu epoch of when this request was allocated. Used to judiciously
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to