We can forgo queuing the hangcheck from the start of every request to
until we wait upon a request. This reduces the overhead of every
request, but may increase the latency of detecting a hang. Howeever, if
nothing every waits upon a hang, did it ever hang? It also improves the
robustness of the wait-request by ensuring that the hangchecker is
indeed running before we sleep indefinitely (and thereby ensuring that
we never actually sleep forever waiting for a dead GPU).

v2: Also queue the hangcheck from retire work in case the GPU become
stuck when no one is watching.

Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.h |  2 +-
 drivers/gpu/drm/i915/i915_gem.c | 13 ++++++++-----
 drivers/gpu/drm/i915/i915_irq.c |  9 ++++-----
 3 files changed, 13 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index bbdb056d2a8e..d9d411919779 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2710,7 +2710,7 @@ void intel_hpd_cancel_work(struct drm_i915_private 
*dev_priv);
 bool intel_hpd_pin_to_port(enum hpd_pin pin, enum port *port);
 
 /* i915_irq.c */
-void i915_queue_hangcheck(struct drm_device *dev);
+void i915_queue_hangcheck(struct drm_i915_private *dev_priv);
 __printf(3, 4)
 void i915_handle_error(struct drm_device *dev, bool wedged,
                       const char *fmt, ...);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index f570990f03e0..b4da8b354a3b 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1306,6 +1306,9 @@ int __i915_wait_request(struct drm_i915_gem_request *req,
                        break;
                }
 
+               /* Ensure that even if the GPU hangs, we get woken up. */
+               i915_queue_hangcheck(dev_priv);
+
                timer.function = NULL;
                if (timeout || missed_irq(dev_priv, ring)) {
                        unsigned long expire;
@@ -2592,8 +2595,6 @@ void __i915_add_request(struct drm_i915_gem_request 
*request,
 
        trace_i915_gem_request_add(request);
 
-       i915_queue_hangcheck(ring->dev);
-
        queue_delayed_work(dev_priv->wq,
                           &dev_priv->mm.retire_work,
                           round_jiffies_up_relative(HZ));
@@ -2947,8 +2948,8 @@ i915_gem_retire_requests(struct drm_device *dev)
 
        if (idle)
                mod_delayed_work(dev_priv->wq,
-                                  &dev_priv->mm.idle_work,
-                                  msecs_to_jiffies(100));
+                                &dev_priv->mm.idle_work,
+                                msecs_to_jiffies(100));
 
        return idle;
 }
@@ -2967,9 +2968,11 @@ i915_gem_retire_work_handler(struct work_struct *work)
                idle = i915_gem_retire_requests(dev);
                mutex_unlock(&dev->struct_mutex);
        }
-       if (!idle)
+       if (!idle) {
+               i915_queue_hangcheck(dev_priv);
                queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work,
                                   round_jiffies_up_relative(HZ));
+       }
 }
 
 static void
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 15973e917566..94f5f4e99446 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -3165,18 +3165,17 @@ static void i915_hangcheck_elapsed(struct work_struct 
*work)
                goto out;
        }
 
+       /* Reset timer in case GPU hangs without another request being added */
        if (busy_count)
-               /* Reset timer case chip hangs without another request
-                * being added */
-               i915_queue_hangcheck(dev);
+               i915_queue_hangcheck(dev_priv);
 
 out:
        ENABLE_RPM_WAKEREF_ASSERTS(dev_priv);
 }
 
-void i915_queue_hangcheck(struct drm_device *dev)
+void i915_queue_hangcheck(struct drm_i915_private *dev_priv)
 {
-       struct i915_gpu_error *e = &to_i915(dev)->gpu_error;
+       struct i915_gpu_error *e = &dev_priv->gpu_error;
 
        if (!i915.enable_hangcheck)
                return;
-- 
2.7.0.rc3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to