If we can't recover the GPU after the reset, mark it as wedged to cancel
the outstanding tasks and to prevent new users from trying to use the
broken GPU.

v2: Check the same ring is hung again before declaring the reset broken.

Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuopp...@intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursu...@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.c |  7 ++++++-
 drivers/gpu/drm/i915/i915_drv.h |  2 +-
 drivers/gpu/drm/i915/i915_gem.c | 16 ++++++++++++++--
 3 files changed, 21 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index bb747aeb29aa..5ee62976ec98 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -1761,7 +1761,12 @@ void i915_reset(struct drm_i915_private *dev_priv)
 
        pr_notice("drm/i915: Resetting chip after gpu hang\n");
        disable_irq(dev_priv->drm.irq);
-       i915_gem_reset_prepare(dev_priv);
+       ret = i915_gem_reset_prepare(dev_priv);
+       if (ret) {
+               DRM_ERROR("GPU recovery failed\n");
+               intel_gpu_reset(dev_priv, ALL_ENGINES);
+               goto error;
+       }
 
        ret = intel_gpu_reset(dev_priv, ALL_ENGINES);
        if (ret) {
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index f861418122ef..38509505424d 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3328,7 +3328,7 @@ static inline u32 i915_reset_count(struct i915_gpu_error 
*error)
        return READ_ONCE(error->reset_count);
 }
 
-void i915_gem_reset_prepare(struct drm_i915_private *dev_priv);
+int i915_gem_reset_prepare(struct drm_i915_private *dev_priv);
 void i915_gem_reset_finish(struct drm_i915_private *dev_priv);
 void i915_gem_set_wedged(struct drm_i915_private *dev_priv);
 void i915_gem_clflush_object(struct drm_i915_gem_object *obj, bool force);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 021d1e97cc15..e9dfd320a0e3 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2629,16 +2629,28 @@ static void reset_request(struct drm_i915_gem_request 
*request)
        dma_fence_set_error(&request->fence, -EIO);
 }
 
-void i915_gem_reset_prepare(struct drm_i915_private *dev_priv)
+int i915_gem_reset_prepare(struct drm_i915_private *dev_priv)
 {
        struct intel_engine_cs *engine;
        enum intel_engine_id id;
+       int err = 0;
 
        /* Ensure irq handler finishes, and not run again. */
-       for_each_engine(engine, dev_priv, id)
+       for_each_engine(engine, dev_priv, id) {
+               struct drm_i915_gem_request *request;
+
                tasklet_kill(&engine->irq_tasklet);
 
+               if (engine->hangcheck.stalled) {
+                       request = i915_gem_find_active_request(engine);
+                       if (request && request->fence.error == -EIO)
+                               err = -EIO; /* Previous reset failed! */
+               }
+       }
+
        i915_gem_revoke_fences(dev_priv);
+
+       return err;
 }
 
 static void i915_gem_reset_engine(struct intel_engine_cs *engine)
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to