On Wed, Apr 11, 2018 at 01:03:46PM +0100, Chris Wilson wrote:
> Even though we weren't injecting guilty requests to be reset, we could
> still fall over the issue of resetting the same request too fast -- where
> the GPU refuses to start again. (Although it is interesting to note that
> reloading the driver is sufficient, suggesting that we could recover if
> we delayed the setup after reset?) Continue to paper over the problem by
> adding a small delay by waiting for the engine to idle between tests,
> and ensure that the engines are idle before starting the idle tests.
> 
> References: 028666793a02 ("drm/i915/selftests: Avoid repeatedly harming the 
> same innocent context")
> Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk>
> Cc: Mika Kuoppala <mika.kuopp...@linux.intel.com>
> Cc: Michał Winiarski <michal.winiar...@intel.com>
> Cc: Michel Thierry <michel.thie...@intel.com>
> Cc: Tvrtko Ursulin <tvrtko.ursu...@intel.com>
> ---
>  .../gpu/drm/i915/selftests/intel_hangcheck.c  | 48 ++++++++++++++++++-
>  1 file changed, 47 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c 
> b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
> index 24f913f26a7b..7e23e6a6719c 100644
> --- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
> +++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
> @@ -454,6 +454,11 @@ static int igt_global_reset(void *arg)
>       return err;
>  }
>  

#define IGT_IDLE_TIMEOUT 50 ?
It should even fit within a line.

With or without that:

Reviewed-by: Michał Winiarski <michal.winiar...@intel.com>

-Michał

> +static bool wait_for_idle(struct intel_engine_cs *engine)
> +{
> +     return wait_for(intel_engine_is_idle(engine), 50) == 0;
> +}
> +
>  static int __igt_reset_engine(struct drm_i915_private *i915, bool active)
>  {
>       struct intel_engine_cs *engine;
> @@ -481,6 +486,13 @@ static int __igt_reset_engine(struct drm_i915_private 
> *i915, bool active)
>               if (active && !intel_engine_can_store_dword(engine))
>                       continue;
>  
> +             if (!wait_for_idle(engine)) {
> +                     pr_err("%s failed to idle before reset\n",
> +                            engine->name);
> +                     err = -EIO;
> +                     break;
> +             }
> +
>               reset_count = i915_reset_count(&i915->gpu_error);
>               reset_engine_count = i915_reset_engine_count(&i915->gpu_error,
>                                                            engine);
> @@ -542,6 +554,19 @@ static int __igt_reset_engine(struct drm_i915_private 
> *i915, bool active)
>                               err = -EINVAL;
>                               break;
>                       }
> +
> +                     if (!wait_for_idle(engine)) {
> +                             struct drm_printer p =
> +                                     drm_info_printer(i915->drm.dev);
> +
> +                             pr_err("%s failed to idle after reset\n",
> +                                    engine->name);
> +                             intel_engine_dump(engine, &p,
> +                                               "%s\n", engine->name);
> +
> +                             err = -EIO;
> +                             break;
> +                     }
>               } while (time_before(jiffies, end_time));
>               clear_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags);
>  
> @@ -696,6 +721,13 @@ static int __igt_reset_engines(struct drm_i915_private 
> *i915,
>                   !intel_engine_can_store_dword(engine))
>                       continue;
>  
> +             if (!wait_for_idle(engine)) {
> +                     pr_err("i915_reset_engine(%s:%s): failed to idle before 
> reset\n",
> +                            engine->name, test_name);
> +                     err = -EIO;
> +                     break;
> +             }
> +
>               memset(threads, 0, sizeof(threads));
>               for_each_engine(other, i915, tmp) {
>                       struct task_struct *tsk;
> @@ -772,6 +804,20 @@ static int __igt_reset_engines(struct drm_i915_private 
> *i915,
>                               i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT);
>                               i915_request_put(rq);
>                       }
> +
> +                     if (!(flags & TEST_SELF) && !wait_for_idle(engine)) {
> +                             struct drm_printer p =
> +                                     drm_info_printer(i915->drm.dev);
> +
> +                             pr_err("i915_reset_engine(%s:%s):"
> +                                    " failed to idle after reset\n",
> +                                    engine->name, test_name);
> +                             intel_engine_dump(engine, &p,
> +                                               "%s\n", engine->name);
> +
> +                             err = -EIO;
> +                             break;
> +                     }
>               } while (time_before(jiffies, end_time));
>               clear_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags);
>               pr_info("i915_reset_engine(%s:%s): %lu resets\n",
> @@ -981,7 +1027,7 @@ static int wait_for_others(struct drm_i915_private *i915,
>               if (engine == exclude)
>                       continue;
>  
> -             if (wait_for(intel_engine_is_idle(engine), 10))
> +             if (!wait_for_idle(engine))
>                       return -EIO;
>       }
>  
> -- 
> 2.17.0
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to