Chris Wilson <ch...@chris-wilson.co.uk> writes:

> Some machines, *cough* snb *cough*, fail catastrophically if asked to
> reset the GPU under certain conditions. The initial guess is that this
> is when the rings are still busy at the time of the reset request
> (because that's a pattern we've seen elsewhere, hence why we do try
> gen3_stop_engines() before reset) so abandon the reset and leave the
> device wedged, if gen3_stop_engines() fails.
>
> v2: Only give up if not idle after emptying the ring.
>
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=103240
> Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk>
> Cc: Mika Kuoppala <mika.kuopp...@linux.intel.com>
> ---
>  drivers/gpu/drm/i915/intel_uncore.c | 43 
> ++++++++++++++++++++++++++++---------
>  1 file changed, 33 insertions(+), 10 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/intel_uncore.c 
> b/drivers/gpu/drm/i915/intel_uncore.c
> index 96ee6b2754be..f80dbff3595f 100644
> --- a/drivers/gpu/drm/i915/intel_uncore.c
> +++ b/drivers/gpu/drm/i915/intel_uncore.c
> @@ -1372,7 +1372,7 @@ int i915_reg_read_ioctl(struct drm_device *dev,
>       return ret;
>  }
>  
> -static void gen3_stop_engine(struct intel_engine_cs *engine)
> +static bool gen3_stop_engine(struct intel_engine_cs *engine)
>  {
>       struct drm_i915_private *dev_priv = engine->i915;
>       const u32 base = engine->mmio_base;
> @@ -1392,26 +1392,46 @@ static void gen3_stop_engine(struct intel_engine_cs 
> *engine)
>       I915_WRITE_FW(RING_HEAD(base), 0);
>       I915_WRITE_FW(RING_TAIL(base), 0);
>  
> +     /* Check acts as a post */
> +     if (intel_wait_for_register_fw(dev_priv,
> +                                    mode,
> +                                    MODE_IDLE,
> +                                    MODE_IDLE,
> +                                    1000)) {
> +             DRM_DEBUG_DRIVER("%s: timed out after clearing ring\n",
> +                              engine->name);
> +             return false;
> +     }
> +

I recall that this bailout was the reason I didn't want to
use the stop_ring in intel_ringbuffer.c

Now if you choose to reintroduce the bailout, I think you can
make a generic stop_engine and get rid of the copy.

-Mika

>       /* The ring must be empty before it is disabled */
>       I915_WRITE_FW(RING_CTL(base), 0);
> +     POSTING_READ_FW(RING_CTL(base));
>  
> -     /* Check acts as a post */
> -     if (I915_READ_FW(RING_HEAD(base)) != 0)
> -             DRM_DEBUG_DRIVER("%s: ring head not parked\n",
> -                              engine->name);
> +     return true;
>  }
>  
> -static void i915_stop_engines(struct drm_i915_private *dev_priv,
> -                           unsigned engine_mask)
> +static int i915_stop_engines(struct drm_i915_private *dev_priv,
> +                          unsigned engine_mask)
>  {
>       struct intel_engine_cs *engine;
>       enum intel_engine_id id;
> +     bool idle;
>  
>       if (INTEL_GEN(dev_priv) < 3)
> -             return;
> +             return true;
>  
> +     idle = true;
>       for_each_engine_masked(engine, dev_priv, engine_mask, id)
> -             gen3_stop_engine(engine);
> +             idle &= gen3_stop_engine(engine);
> +     if (idle)
> +             return true;
> +
> +     dev_err(dev_priv->drm.dev, "Failed to stop all engines\n");
> +     for_each_engine_masked(engine, dev_priv, engine_mask, id) {
> +             struct drm_printer p = drm_debug_printer(__func__);
> +             intel_engine_dump(engine, &p);
> +     }
> +     return false;
>  }
>  
>  static bool i915_reset_complete(struct pci_dev *pdev)
> @@ -1772,7 +1792,10 @@ int intel_gpu_reset(struct drm_i915_private *dev_priv, 
> unsigned engine_mask)
>                *
>                * FIXME: Wa for more modern gens needs to be validated
>                */
> -             i915_stop_engines(dev_priv, engine_mask);
> +             if (!i915_stop_engines(dev_priv, engine_mask)) {
> +                     ret = -EIO;
> +                     break;
> +             }
>  
>               ret = -ENODEV;
>               if (reset)
> -- 
> 2.15.0.rc2
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to