On Fri, Jul 16, 2021 at 01:17:18PM -0700, Matthew Brost wrote:
> From: Rahul Kumar Singh <rahul.kumar.si...@intel.com>
> 
> When GuC submission is enabled, the GuC controls engine resets. Rather
> than explicitly triggering a reset, the driver must submit a hanging
> context to GuC and wait for the reset to occur.
> 
> Signed-off-by: Rahul Kumar Singh <rahul.kumar.si...@intel.com>
> Signed-off-by: John Harrison <john.c.harri...@intel.com>
> Signed-off-by: Matthew Brost <matthew.br...@intel.com>
> Cc: Daniele Ceraolo Spurio <daniele.ceraolospu...@intel.com>
> Cc: Matthew Brost <matthew.br...@intel.com>

Reviewed-by: Matthew Brost <matthew.br...@intel.com>

> ---
>  drivers/gpu/drm/i915/Makefile                 |   1 +
>  .../gpu/drm/i915/gt/selftest_workarounds.c    | 130 +++++++++++++-----
>  .../i915/selftests/intel_scheduler_helpers.c  |  76 ++++++++++
>  .../i915/selftests/intel_scheduler_helpers.h  |  28 ++++
>  4 files changed, 201 insertions(+), 34 deletions(-)
>  create mode 100644 drivers/gpu/drm/i915/selftests/intel_scheduler_helpers.c
>  create mode 100644 drivers/gpu/drm/i915/selftests/intel_scheduler_helpers.h
> 
> diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
> index 10b3bb6207ba..ab7679957623 100644
> --- a/drivers/gpu/drm/i915/Makefile
> +++ b/drivers/gpu/drm/i915/Makefile
> @@ -280,6 +280,7 @@ i915-$(CONFIG_DRM_I915_CAPTURE_ERROR) += i915_gpu_error.o
>  i915-$(CONFIG_DRM_I915_SELFTEST) += \
>       gem/selftests/i915_gem_client_blt.o \
>       gem/selftests/igt_gem_utils.o \
> +     selftests/intel_scheduler_helpers.o \
>       selftests/i915_random.o \
>       selftests/i915_selftest.o \
>       selftests/igt_atomic.o \
> diff --git a/drivers/gpu/drm/i915/gt/selftest_workarounds.c 
> b/drivers/gpu/drm/i915/gt/selftest_workarounds.c
> index 7ebc4edb8ecf..7727bc531ea9 100644
> --- a/drivers/gpu/drm/i915/gt/selftest_workarounds.c
> +++ b/drivers/gpu/drm/i915/gt/selftest_workarounds.c
> @@ -12,6 +12,7 @@
>  #include "selftests/igt_flush_test.h"
>  #include "selftests/igt_reset.h"
>  #include "selftests/igt_spinner.h"
> +#include "selftests/intel_scheduler_helpers.h"
>  #include "selftests/mock_drm.h"
>  
>  #include "gem/selftests/igt_gem_utils.h"
> @@ -261,28 +262,34 @@ static int do_engine_reset(struct intel_engine_cs 
> *engine)
>       return intel_engine_reset(engine, "live_workarounds");
>  }
>  
> +static int do_guc_reset(struct intel_engine_cs *engine)
> +{
> +     /* Currently a no-op as the reset is handled by GuC */
> +     return 0;
> +}
> +
>  static int
>  switch_to_scratch_context(struct intel_engine_cs *engine,
> -                       struct igt_spinner *spin)
> +                       struct igt_spinner *spin,
> +                       struct i915_request **rq)
>  {
>       struct intel_context *ce;
> -     struct i915_request *rq;
>       int err = 0;
>  
>       ce = intel_context_create(engine);
>       if (IS_ERR(ce))
>               return PTR_ERR(ce);
>  
> -     rq = igt_spinner_create_request(spin, ce, MI_NOOP);
> +     *rq = igt_spinner_create_request(spin, ce, MI_NOOP);
>       intel_context_put(ce);
>  
> -     if (IS_ERR(rq)) {
> +     if (IS_ERR(*rq)) {
>               spin = NULL;
> -             err = PTR_ERR(rq);
> +             err = PTR_ERR(*rq);
>               goto err;
>       }
>  
> -     err = request_add_spin(rq, spin);
> +     err = request_add_spin(*rq, spin);
>  err:
>       if (err && spin)
>               igt_spinner_end(spin);
> @@ -296,6 +303,7 @@ static int check_whitelist_across_reset(struct 
> intel_engine_cs *engine,
>  {
>       struct intel_context *ce, *tmp;
>       struct igt_spinner spin;
> +     struct i915_request *rq;
>       intel_wakeref_t wakeref;
>       int err;
>  
> @@ -316,13 +324,24 @@ static int check_whitelist_across_reset(struct 
> intel_engine_cs *engine,
>               goto out_spin;
>       }
>  
> -     err = switch_to_scratch_context(engine, &spin);
> +     err = switch_to_scratch_context(engine, &spin, &rq);
>       if (err)
>               goto out_spin;
>  
> +     /* Ensure the spinner hasn't aborted */
> +     if (i915_request_completed(rq)) {
> +             pr_err("%s spinner failed to start\n", name);
> +             err = -ETIMEDOUT;
> +             goto out_spin;
> +     }
> +
>       with_intel_runtime_pm(engine->uncore->rpm, wakeref)
>               err = reset(engine);
>  
> +     /* Ensure the reset happens and kills the engine */
> +     if (err == 0)
> +             err = intel_selftest_wait_for_rq(rq);
> +
>       igt_spinner_end(&spin);
>  
>       if (err) {
> @@ -787,9 +806,26 @@ static int live_reset_whitelist(void *arg)
>                       continue;
>  
>               if (intel_has_reset_engine(gt)) {
> -                     err = check_whitelist_across_reset(engine,
> -                                                        do_engine_reset,
> -                                                        "engine");
> +                     if (intel_engine_uses_guc(engine)) {
> +                             struct intel_selftest_saved_policy saved;
> +                             int err2;
> +
> +                             err = intel_selftest_modify_policy(engine, 
> &saved);
> +                             if(err)
> +                                     goto out;
> +
> +                             err = check_whitelist_across_reset(engine,
> +                                                                do_guc_reset,
> +                                                                "guc");
> +
> +                             err2 = intel_selftest_restore_policy(engine, 
> &saved);
> +                             if (err == 0)
> +                                     err = err2;
> +                     } else
> +                             err = check_whitelist_across_reset(engine,
> +                                                                
> do_engine_reset,
> +                                                                "engine");
> +
>                       if (err)
>                               goto out;
>               }
> @@ -1226,31 +1262,41 @@ live_engine_reset_workarounds(void *arg)
>       reference_lists_init(gt, &lists);
>  
>       for_each_engine(engine, gt, id) {
> +             struct intel_selftest_saved_policy saved;
> +             bool using_guc = intel_engine_uses_guc(engine);
>               bool ok;
> +             int ret2;
>  
>               pr_info("Verifying after %s reset...\n", engine->name);
> +             ret = intel_selftest_modify_policy(engine, &saved);
> +             if (ret)
> +                     break;
> +
> +
>               ce = intel_context_create(engine);
>               if (IS_ERR(ce)) {
>                       ret = PTR_ERR(ce);
> -                     break;
> +                     goto restore;
>               }
>  
> -             ok = verify_wa_lists(gt, &lists, "before reset");
> -             if (!ok) {
> -                     ret = -ESRCH;
> -                     goto err;
> -             }
> +             if (!using_guc) {
> +                     ok = verify_wa_lists(gt, &lists, "before reset");
> +                     if (!ok) {
> +                             ret = -ESRCH;
> +                             goto err;
> +                     }
>  
> -             ret = intel_engine_reset(engine, "live_workarounds:idle");
> -             if (ret) {
> -                     pr_err("%s: Reset failed while idle\n", engine->name);
> -                     goto err;
> -             }
> +                     ret = intel_engine_reset(engine, 
> "live_workarounds:idle");
> +                     if (ret) {
> +                             pr_err("%s: Reset failed while idle\n", 
> engine->name);
> +                             goto err;
> +                     }
>  
> -             ok = verify_wa_lists(gt, &lists, "after idle reset");
> -             if (!ok) {
> -                     ret = -ESRCH;
> -                     goto err;
> +                     ok = verify_wa_lists(gt, &lists, "after idle reset");
> +                     if (!ok) {
> +                             ret = -ESRCH;
> +                             goto err;
> +                     }
>               }
>  
>               ret = igt_spinner_init(&spin, engine->gt);
> @@ -1271,25 +1317,41 @@ live_engine_reset_workarounds(void *arg)
>                       goto err;
>               }
>  
> -             ret = intel_engine_reset(engine, "live_workarounds:active");
> -             if (ret) {
> -                     pr_err("%s: Reset failed on an active spinner\n",
> -                            engine->name);
> -                     igt_spinner_fini(&spin);
> -                     goto err;
> +             /* Ensure the spinner hasn't aborted */
> +             if (i915_request_completed(rq)) {
> +                     ret = -ETIMEDOUT;
> +                     goto skip;
> +             }
> +
> +             if (!using_guc) {
> +                     ret = intel_engine_reset(engine, 
> "live_workarounds:active");
> +                     if (ret) {
> +                             pr_err("%s: Reset failed on an active 
> spinner\n",
> +                                    engine->name);
> +                             igt_spinner_fini(&spin);
> +                             goto err;
> +                     }
>               }
>  
> +             /* Ensure the reset happens and kills the engine */
> +             if (ret == 0)
> +                     ret = intel_selftest_wait_for_rq(rq);
> +
> +skip:
>               igt_spinner_end(&spin);
>               igt_spinner_fini(&spin);
>  
>               ok = verify_wa_lists(gt, &lists, "after busy reset");
> -             if (!ok) {
> +             if (!ok)
>                       ret = -ESRCH;
> -                     goto err;
> -             }
>  
>  err:
>               intel_context_put(ce);
> +
> +restore:
> +             ret2 = intel_selftest_restore_policy(engine, &saved);
> +             if (ret == 0)
> +                     ret = ret2;
>               if (ret)
>                       break;
>       }
> diff --git a/drivers/gpu/drm/i915/selftests/intel_scheduler_helpers.c 
> b/drivers/gpu/drm/i915/selftests/intel_scheduler_helpers.c
> new file mode 100644
> index 000000000000..91ecd8a1bd21
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/selftests/intel_scheduler_helpers.c
> @@ -0,0 +1,76 @@
> +/*
> + * SPDX-License-Identifier: MIT
> + *
> + * Copyright © 2018 Intel Corporation
> + */
> +
> +//#include "gt/intel_engine_user.h"
> +#include "gt/intel_gt.h"
> +#include "i915_drv.h"
> +#include "i915_selftest.h"
> +
> +#include "selftests/intel_scheduler_helpers.h"
> +
> +#define REDUCED_TIMESLICE    5
> +#define REDUCED_PREEMPT              10
> +#define WAIT_FOR_RESET_TIME  1000
> +
> +int intel_selftest_modify_policy(struct intel_engine_cs *engine,
> +                              struct intel_selftest_saved_policy *saved)
> +
> +{
> +     int err;
> +
> +     saved->reset = engine->i915->params.reset;
> +     saved->flags = engine->flags;
> +     saved->timeslice = engine->props.timeslice_duration_ms;
> +     saved->preempt_timeout = engine->props.preempt_timeout_ms;
> +
> +     /*
> +      * Enable force pre-emption on time slice expiration
> +      * together with engine reset on pre-emption timeout.
> +      * This is required to make the GuC notice and reset
> +      * the single hanging context.
> +      * Also, reduce the preemption timeout to something
> +      * small to speed the test up.
> +      */
> +     engine->i915->params.reset = 2;
> +     engine->flags |= I915_ENGINE_WANT_FORCED_PREEMPTION;
> +     engine->props.timeslice_duration_ms = REDUCED_TIMESLICE;
> +     engine->props.preempt_timeout_ms = REDUCED_PREEMPT;
> +
> +     if (!intel_engine_uses_guc(engine))
> +             return 0;
> +
> +     err = intel_guc_global_policies_update(&engine->gt->uc.guc);
> +     if (err)
> +             intel_selftest_restore_policy(engine, saved);
> +
> +     return err;
> +}
> +
> +int intel_selftest_restore_policy(struct intel_engine_cs *engine,
> +                               struct intel_selftest_saved_policy *saved)
> +{
> +     /* Restore the original policies */
> +     engine->i915->params.reset = saved->reset;
> +     engine->flags = saved->flags;
> +     engine->props.timeslice_duration_ms = saved->timeslice;
> +     engine->props.preempt_timeout_ms = saved->preempt_timeout;
> +
> +     if (!intel_engine_uses_guc(engine))
> +             return 0;
> +
> +     return intel_guc_global_policies_update(&engine->gt->uc.guc);
> +}
> +
> +int intel_selftest_wait_for_rq(struct i915_request *rq)
> +{
> +     long ret;
> +
> +     ret = i915_request_wait(rq, 0, WAIT_FOR_RESET_TIME);
> +     if (ret < 0)
> +             return ret;
> +
> +     return 0;
> +}
> diff --git a/drivers/gpu/drm/i915/selftests/intel_scheduler_helpers.h 
> b/drivers/gpu/drm/i915/selftests/intel_scheduler_helpers.h
> new file mode 100644
> index 000000000000..f30e96f0ba95
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/selftests/intel_scheduler_helpers.h
> @@ -0,0 +1,28 @@
> +/* SPDX-License-Identifier: MIT */
> +/*
> + * Copyright © 2014-2019 Intel Corporation
> + */
> +
> +#ifndef _INTEL_SELFTEST_SCHEDULER_HELPERS_H_
> +#define _INTEL_SELFTEST_SCHEDULER_HELPERS_H_
> +
> +#include <linux/types.h>
> +
> +struct i915_request;
> +struct intel_engine_cs;
> +
> +struct intel_selftest_saved_policy
> +{
> +     u32 flags;
> +     u32 reset;
> +     u64 timeslice;
> +     u64 preempt_timeout;
> +};
> +
> +int intel_selftest_modify_policy(struct intel_engine_cs *engine,
> +                              struct intel_selftest_saved_policy *saved);
> +int intel_selftest_restore_policy(struct intel_engine_cs *engine,
> +                               struct intel_selftest_saved_policy *saved);
> +int intel_selftest_wait_for_rq( struct i915_request *rq);
> +
> +#endif
> -- 
> 2.28.0
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to