Only declare a missed interrupt if we find that the GPU is idle with
waiters and a hangcheck interval has passed in which no new user
interrupts have been raised.

Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuopp...@intel.com>
---
 drivers/gpu/drm/i915/i915_debugfs.c     | 11 +++++++----
 drivers/gpu/drm/i915/i915_irq.c         |  7 ++++++-
 drivers/gpu/drm/i915/intel_ringbuffer.h |  2 ++
 3 files changed, 15 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index b421b53ca128..966fc022418c 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -730,10 +730,10 @@ static int i915_gem_request_info(struct seq_file *m, void 
*data)
 static void i915_ring_seqno_info(struct seq_file *m,
                                 struct intel_engine_cs *ring)
 {
-       if (ring->get_seqno) {
-               seq_printf(m, "Current sequence (%s): %x\n",
-                          ring->name, ring->get_seqno(ring));
-       }
+       seq_printf(m, "Current sequence (%s): %x\n",
+                  ring->name, ring->get_seqno(ring));
+       seq_printf(m, "Current user interrupts (%s): %x\n",
+                  ring->name, READ_ONCE(ring->user_interrupts));
 }
 
 static int i915_gem_seqno_info(struct seq_file *m, void *data)
@@ -1361,6 +1361,9 @@ static int i915_hangcheck_info(struct seq_file *m, void 
*unused)
                seq_printf(m, "%s:\n", ring->name);
                seq_printf(m, "\tseqno = %x [current %x]\n",
                           ring->hangcheck.seqno, seqno[i]);
+               seq_printf(m, "\tuser interrupts = %x [current %x]\n",
+                          ring->hangcheck.user_interrupts,
+                          ring->user_interrupts);
                seq_printf(m, "\tACTHD = 0x%08llx [current 0x%08llx]\n",
                           (long long)ring->hangcheck.acthd,
                           (long long)acthd[i]);
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 07bc2cdd6252..978eebcf4594 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1000,6 +1000,7 @@ static void notify_ring(struct intel_engine_cs *ring)
                return;
 
        trace_i915_gem_request_notify(ring);
+       ring->user_interrupts++;
 
        wake_up_all(&ring->irq_queue);
 }
@@ -3097,6 +3098,7 @@ static void i915_hangcheck_elapsed(struct work_struct 
*work)
        for_each_ring(ring, dev_priv, i) {
                u64 acthd;
                u32 seqno;
+               unsigned user_interrupts;
                bool busy = true;
 
                semaphore_clear_deadlocks(dev_priv);
@@ -3113,6 +3115,7 @@ static void i915_hangcheck_elapsed(struct work_struct 
*work)
 
                acthd = intel_ring_get_active_head(ring);
                seqno = ring->get_seqno(ring);
+               user_interrupts = READ_ONCE(ring->user_interrupts);
 
                if (ring->hangcheck.seqno == seqno) {
                        if (ring_idle(ring, seqno)) {
@@ -3120,7 +3123,8 @@ static void i915_hangcheck_elapsed(struct work_struct 
*work)
 
                                if (waitqueue_active(&ring->irq_queue)) {
                                        /* Issue a wake-up to catch stuck h/w. 
*/
-                                       if (!test_and_set_bit(ring->id, 
&dev_priv->gpu_error.missed_irq_rings)) {
+                                       if (ring->hangcheck.user_interrupts == 
user_interrupts &&
+                                           !test_and_set_bit(ring->id, 
&dev_priv->gpu_error.missed_irq_rings)) {
                                                if 
(!(dev_priv->gpu_error.test_irq_rings & intel_ring_flag(ring)))
                                                        DRM_ERROR("Hangcheck 
timer elapsed... %s idle\n",
                                                                  ring->name);
@@ -3187,6 +3191,7 @@ static void i915_hangcheck_elapsed(struct work_struct 
*work)
 
                ring->hangcheck.seqno = seqno;
                ring->hangcheck.acthd = acthd;
+               ring->hangcheck.user_interrupts = user_interrupts;
                busy_count += busy;
        }
 
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h 
b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 8fb02b21e75d..b22573561669 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -90,6 +90,7 @@ struct intel_ring_hangcheck {
        u64 acthd;
        u64 max_acthd;
        u32 seqno;
+       unsigned user_interrupts;
        int score;
        enum intel_ring_hangcheck_action action;
        int deadlock;
@@ -301,6 +302,7 @@ struct  intel_engine_cs {
         * inspecting request list.
         */
        u32 last_submitted_seqno;
+       unsigned user_interrupts;
 
        bool gpu_caches_dirty;
 
-- 
2.7.0.rc3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to