Chris Wilson <ch...@chris-wilson.co.uk> writes:

> When we restart the engines, and we have active requests, a request on
> the first engine may complete and queue a request to the second engine
> before we try to restart the second engine. That queueing of the
> request may itself cause the engine to restart, and so the subsequent
> handling by engine->init_hw() will corrupt the current state.
>
> If we restart all the engines under stop_machine(), we will not process
> any interrupts as we restart the engines, and all the engines will
> appear to start simultaneously from the snapshot of state.
>
> Fixes: 821ed7df6e2a ("drm/i915: Update reset path to fix incomplete requests")
> Testcase: igt/gem_exec_fence/await-hang
> Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursu...@intel.com>
> Cc: Mika Kuoppala <mika.kuopp...@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_gem.c | 34 +++++++++++++++++++++++++++-------
>  1 file changed, 27 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 8a510c7f6828..65651a889813 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -4369,11 +4369,30 @@ static void init_unused_rings(struct drm_i915_private 
> *dev_priv)
>       }
>  }
>  
> -int
> -i915_gem_init_hw(struct drm_i915_private *dev_priv)
> +static int __i915_gem_restart_engines(void *data)
>  {
> +     struct drm_i915_private *i915 = data;
>       struct intel_engine_cs *engine;
>       enum intel_engine_id id;
> +     int err;
> +
> +     /* We want all the engines to restart from the same snapshot, the
> +      * restart has to be appear simultaneous (i.e. atomic). If one
> +      * request on the first engine completes and queues a request for
> +      * a secodn engine, *before* we call init_hw on that second engine,
s/secodn/second.

> +      * we may corrupt state.
> +      */
> +     for_each_engine(engine, i915, id) {
> +             err = engine->init_hw(engine);
> +             if (err)
> +                     return err;
> +     }
> +
> +     return 0;
> +}
> +
> +int i915_gem_init_hw(struct drm_i915_private *dev_priv)
> +{
>       int ret;
>  
>       dev_priv->gt.last_init_time = ktime_get();
> @@ -4419,11 +4438,12 @@ i915_gem_init_hw(struct drm_i915_private *dev_priv)
>       }
>  
>       /* Need to do basic initialisation of all rings first: */
> -     for_each_engine(engine, dev_priv, id) {
> -             ret = engine->init_hw(engine);
> -             if (ret)
> -                     goto out;
> -     }
> +     if (dev_priv->gt.active_requests)

This should only increment while mutex is held, so

Reviewed-by: Mika Kuoppala <mika.kuopp...@intel.com>

> +             ret = stop_machine(__i915_gem_restart_engines, dev_priv, NULL);
> +     else
> +             ret = __i915_gem_restart_engines(dev_priv);
> +     if (ret)
> +             goto out;
>  
>       intel_mocs_init_l3cc_table(dev_priv);
>  
> -- 
> 2.11.0
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to