Inject probe errors into intel_uc_init_hw to make sure we
correctly handle any uC initialization failure.

To avoid complains from CI about injected errors use
i915_probe_error to lower message level.

v2: _sanitize instead _reset to correctly handle Gen9 retries
v3: reorder fw status codes as failed fw is still available
    add more failure points

Signed-off-by: Michal Wajdeczko <michal.wajdec...@intel.com>
Cc: Daniele Ceraolo Spurio <daniele.ceraolospu...@intel.com>
Cc: Chris Wilson <ch...@chris-wilson.co.uk>
Reviewed-by: Chris Wilson <ch...@chris-wilson.co.uk> #v1
---
 .../gpu/drm/i915/gt/uc/intel_guc_submission.c |  4 ++++
 drivers/gpu/drm/i915/gt/uc/intel_huc.c        |  8 +++++---
 drivers/gpu/drm/i915/gt/uc/intel_uc.c         | 20 +++++++++++++++----
 drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c      | 18 +++++++++++------
 drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h      | 14 ++++++-------
 drivers/gpu/drm/i915/i915_gem.c               |  2 +-
 6 files changed, 45 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index b4b508f19a1c..412892096daa 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -1123,6 +1123,10 @@ int intel_guc_submission_enable(struct intel_guc *guc)
        enum intel_engine_id id;
        int err;
 
+       err = i915_inject_load_error(gt->i915, -ENXIO);
+       if (err)
+               return err;
+
        /*
         * We're using GuC work items for submitting work through GuC. Since
         * we're coalescing multiple requests from a single context into a
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.c 
b/drivers/gpu/drm/i915/gt/uc/intel_huc.c
index d642b167a389..ef54053c5ef9 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_huc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.c
@@ -139,6 +139,10 @@ int intel_huc_auth(struct intel_huc *huc)
        GEM_BUG_ON(!intel_uc_fw_is_loaded(&huc->fw));
        GEM_BUG_ON(intel_huc_is_authenticated(huc));
 
+       ret = i915_inject_load_error(gt->i915, -ENXIO);
+       if (ret)
+               goto fail;
+
        ret = intel_guc_auth_huc(guc,
                                 intel_guc_ggtt_offset(guc, huc->rsa_data));
        if (ret) {
@@ -158,13 +162,11 @@ int intel_huc_auth(struct intel_huc *huc)
        }
 
        huc->fw.status = INTEL_UC_FIRMWARE_RUNNING;
-
        return 0;
 
 fail:
+       i915_probe_error(gt->i915, "HuC: Authentication failed %d\n", ret);
        huc->fw.status = INTEL_UC_FIRMWARE_FAIL;
-
-       DRM_ERROR("HuC: Authentication failed %d\n", ret);
        return ret;
 }
 
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c 
b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
index d1b08b28b1ad..1d21c2646831 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
@@ -41,6 +41,10 @@ static int __intel_uc_reset_hw(struct intel_uc *uc)
        int ret;
        u32 guc_status;
 
+       ret = i915_inject_load_error(gt->i915, -ENXIO);
+       if (ret)
+               return ret;
+
        ret = intel_reset_guc(gt);
        if (ret) {
                DRM_ERROR("Failed to reset GuC, ret = %d\n", ret);
@@ -209,6 +213,10 @@ static int guc_enable_communication(struct intel_guc *guc)
 
        GEM_BUG_ON(guc_communication_enabled(guc));
 
+       ret = i915_inject_load_error(i915, -ENXIO);
+       if (ret)
+               return ret;
+
        ret = intel_guc_ct_enable(&guc->ct);
        if (ret)
                return ret;
@@ -340,7 +348,7 @@ void intel_uc_fini(struct intel_uc *uc)
        intel_guc_fini(guc);
 }
 
-static void __uc_sanitize(struct intel_uc *uc)
+static int __uc_sanitize(struct intel_uc *uc)
 {
        struct intel_guc *guc = &uc->guc;
        struct intel_huc *huc = &uc->huc;
@@ -350,7 +358,7 @@ static void __uc_sanitize(struct intel_uc *uc)
        intel_huc_sanitize(huc);
        intel_guc_sanitize(guc);
 
-       __intel_uc_reset_hw(uc);
+       return __intel_uc_reset_hw(uc);
 }
 
 void intel_uc_sanitize(struct intel_uc *uc)
@@ -378,6 +386,10 @@ static int uc_init_wopcm(struct intel_uc *uc)
        GEM_BUG_ON(!(size & GUC_WOPCM_SIZE_MASK));
        GEM_BUG_ON(size & ~GUC_WOPCM_SIZE_MASK);
 
+       err = i915_inject_load_error(gt->i915, -ENXIO);
+       if (err)
+               return err;
+
        mask = GUC_WOPCM_SIZE_MASK | GUC_WOPCM_SIZE_LOCKED;
        err = intel_uncore_write_and_verify(uncore, GUC_WOPCM_SIZE, size, mask,
                                            size | GUC_WOPCM_SIZE_LOCKED);
@@ -434,7 +446,7 @@ int intel_uc_init_hw(struct intel_uc *uc)
                 * Always reset the GuC just before (re)loading, so
                 * that the state and timing are fairly predictable
                 */
-               ret = __intel_uc_reset_hw(uc);
+               ret = __uc_sanitize(uc);
                if (ret)
                        goto err_out;
 
@@ -504,7 +516,7 @@ int intel_uc_init_hw(struct intel_uc *uc)
        if (GEM_WARN_ON(ret == -EIO))
                ret = -EINVAL;
 
-       dev_err(i915->drm.dev, "GuC initialization failed %d\n", ret);
+       i915_probe_error(i915, "GuC initialization failed %d\n", ret);
        return ret;
 }
 
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c 
b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
index 650ad6037b74..a3a22a26016c 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
@@ -383,6 +383,10 @@ static int uc_fw_xfer(struct intel_uc_fw *uc_fw, struct 
intel_gt *gt,
        u64 offset;
        int ret;
 
+       ret = i915_inject_load_error(gt->i915, -ETIMEDOUT);
+       if (ret)
+               return ret;
+
        intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
 
        /* Set the source address for the uCode */
@@ -443,8 +447,13 @@ int intel_uc_fw_upload(struct intel_uc_fw *uc_fw, struct 
intel_gt *gt,
        /* make sure the status was cleared the last time we reset the uc */
        GEM_BUG_ON(intel_uc_fw_is_loaded(uc_fw));
 
+       err = i915_inject_load_error(gt->i915, -ENOEXEC);
+       if (err)
+               return err;
+
        if (!intel_uc_fw_is_available(uc_fw))
                return -ENOEXEC;
+
        /* Call custom loader */
        intel_uc_fw_ggtt_bind(uc_fw, gt);
        err = uc_fw_xfer(uc_fw, gt, wopcm_offset, dma_flags);
@@ -464,13 +473,10 @@ int intel_uc_fw_upload(struct intel_uc_fw *uc_fw, struct 
intel_gt *gt,
        return 0;
 
 fail:
+       i915_probe_error(gt->i915, "Failed to load %s firmware %s (%d)\n",
+                        intel_uc_fw_type_repr(uc_fw->type), uc_fw->path,
+                        err);
        uc_fw->status = INTEL_UC_FIRMWARE_FAIL;
-       DRM_DEBUG_DRIVER("%s fw load failed\n",
-                        intel_uc_fw_type_repr(uc_fw->type));
-
-       DRM_WARN("%s: Failed to load firmware %s (error %d)\n",
-                intel_uc_fw_type_repr(uc_fw->type), uc_fw->path, err);
-
        return err;
 }
 
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h 
b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h
index 6b64b8073703..bfe3614613b7 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h
@@ -37,12 +37,12 @@ struct intel_gt;
 #define INTEL_UC_FIRMWARE_URL 
"https://git.kernel.org/pub/scm/linux/kernel/git/firmware/linux-firmware.git/tree/i915";
 
 enum intel_uc_fw_status {
-       INTEL_UC_FIRMWARE_FAIL = -3, /* failed to xfer or init/auth the fw */
-       INTEL_UC_FIRMWARE_MISSING = -2, /* blob not found on the system */
-       INTEL_UC_FIRMWARE_NOT_SUPPORTED = -1, /* no uc HW */
+       INTEL_UC_FIRMWARE_NOT_SUPPORTED = -1, /* no uc HW or disabled */
        INTEL_UC_FIRMWARE_UNINITIALIZED = 0, /* used to catch checks done too 
early */
        INTEL_UC_FIRMWARE_SELECTED, /* selected the blob we want to load */
+       INTEL_UC_FIRMWARE_MISSING, /* blob not found on the system */
        INTEL_UC_FIRMWARE_AVAILABLE, /* blob found and copied in mem */
+       INTEL_UC_FIRMWARE_FAIL, /* failed to xfer or init/auth the fw */
        INTEL_UC_FIRMWARE_TRANSFERRED, /* dma xfer done */
        INTEL_UC_FIRMWARE_RUNNING /* init/auth done */
 };
@@ -83,18 +83,18 @@ static inline
 const char *intel_uc_fw_status_repr(enum intel_uc_fw_status status)
 {
        switch (status) {
-       case INTEL_UC_FIRMWARE_FAIL:
-               return "FAIL";
-       case INTEL_UC_FIRMWARE_MISSING:
-               return "MISSING";
        case INTEL_UC_FIRMWARE_NOT_SUPPORTED:
                return "N/A";
        case INTEL_UC_FIRMWARE_UNINITIALIZED:
                return "UNINITIALIZED";
        case INTEL_UC_FIRMWARE_SELECTED:
                return "SELECTED";
+       case INTEL_UC_FIRMWARE_MISSING:
+               return "MISSING";
        case INTEL_UC_FIRMWARE_AVAILABLE:
                return "AVAILABLE";
+       case INTEL_UC_FIRMWARE_FAIL:
+               return "FAIL";
        case INTEL_UC_FIRMWARE_TRANSFERRED:
                return "TRANSFERRED";
        case INTEL_UC_FIRMWARE_RUNNING:
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 5e87acc4b770..2436cd598e6e 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1243,7 +1243,7 @@ int i915_gem_init_hw(struct drm_i915_private *i915)
        /* We can't enable contexts until all firmware is loaded */
        ret = intel_uc_init_hw(&gt->uc);
        if (ret) {
-               DRM_ERROR("Enabling uc failed (%d)\n", ret);
+               i915_probe_error(i915, "Enabling uc failed (%d)\n", ret);
                goto out;
        }
 
-- 
2.19.2

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to