xe_hw_engine_print is called by debugfs to do an immediate raw
dump of the engine registers. It depends on hw_engine_snapshot_capture
that assumes a prior capture with a matching job is ready for printing.
However, for the debugfs case, there is no prior job so ensure
hw_engine_snapshot_capture can also invoke GuC-Err-Capture for
an immediate jobless snapshot.

Additionally, because there are valid cases where raw-jobless
register dumps + printings are done such as gt-reset events,
let's differentiate manual captures that were attached to a
job vs late manual raw captures that are jobless.

   v7:- Fix mismatch func name vs comment (kernel robot)
      - Differentiate between early manual captures that have a
        job association vs raw manual captures that may not have
        a job association like in gt-reset events. (John Harrison).

Signed-off-by: Alan Previn <alan.previn.teres.ale...@intel.com>
---
 drivers/gpu/drm/xe/xe_guc_capture.c           | 37 +++++++++++++++++--
 drivers/gpu/drm/xe/xe_guc_capture.h           |  2 +
 .../drm/xe/xe_guc_capture_snapshot_types.h    |  6 ++-
 drivers/gpu/drm/xe/xe_guc_submit.c            |  2 +-
 drivers/gpu/drm/xe/xe_hw_engine.c             | 17 +++++++--
 5 files changed, 55 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_guc_capture.c 
b/drivers/gpu/drm/xe/xe_guc_capture.c
index ff16bed86b77..746d3b21b18b 100644
--- a/drivers/gpu/drm/xe/xe_guc_capture.c
+++ b/drivers/gpu/drm/xe/xe_guc_capture.c
@@ -1584,6 +1584,32 @@ guc_capture_get_manual_snapshot(struct xe_guc *guc, 
struct xe_hw_engine *hwe)
        return new;
 }
 
+/**
+ * xe_guc_capture_snapshot_manual_hwe - Generate and get manual engine 
register dump
+ * @guc: Target GuC for manual capture
+ * @hwe: The engine instance to capture from
+ *
+ * Generate a manual GuC-Error-Capture snapshot of engine instance + engine 
class registers
+ * without any queue association. This capture node is not stored in outlist 
or cachelist,
+ * Returns: New capture node and caller must "put"
+ */
+struct xe_guc_capture_snapshot *
+xe_guc_capture_snapshot_manual_hwe(struct xe_guc *guc, struct xe_hw_engine 
*hwe)
+{
+       struct xe_guc_capture_snapshot *new;
+
+       new = guc_capture_get_manual_snapshot(guc, hwe);
+       if (!new)
+               return NULL;
+
+       new->guc_id = 0;
+       new->lrca = 0;
+       new->is_partial = 0;
+       new->source = XE_ENGINE_CAPTURE_SOURCE_MANUAL_RAW;
+
+       return new;
+}
+
 /**
  * xe_guc_capture_snapshot_store_manual_job - Generate and store a manual 
engine register dump
  * @guc: Target GuC for manual capture
@@ -1631,7 +1657,7 @@ xe_guc_capture_snapshot_store_manual_job(struct xe_guc 
*guc, struct xe_exec_queu
        new->lrca = xe_lrc_ggtt_addr(q->lrc[0]);
        new->is_partial = 0;
        new->locked = 1;
-       new->source = XE_ENGINE_CAPTURE_SOURCE_MANUAL;
+       new->source = XE_ENGINE_CAPTURE_SOURCE_MANUAL_JOB;
 
        guc_capture_add_node_to_outlist(guc->capture, new);
 
@@ -1772,6 +1798,11 @@ void xe_guc_capture_snapshot_print(struct xe_guc *guc, 
struct xe_guc_capture_sna
                "full-capture",
                "partial-capture"
        };
+       const char *srctype[XE_ENGINE_CAPTURE_SOURCE_GUC + 1] = {
+               "Manual-Job",
+               "Manual-Raw",
+               "GuC"
+       };
        int type;
        const struct __guc_mmio_reg_descr_group *list;
        struct xe_gt *gt;
@@ -1788,9 +1819,7 @@ void xe_guc_capture_snapshot_print(struct xe_guc *guc, 
struct xe_guc_capture_sna
                return;
        }
 
-       drm_printf(p, "\tCapture_source: %s\n",
-                  node->source == XE_ENGINE_CAPTURE_SOURCE_GUC ?
-                  "GuC" : "Manual");
+       drm_printf(p, "\tCapture_source: %s\n", srctype[node->source]);
        drm_printf(p, "\tCoverage: %s\n", grptype[node->is_partial]);
 
        for (type = GUC_STATE_CAPTURE_TYPE_GLOBAL; type < 
GUC_STATE_CAPTURE_TYPE_MAX; type++) {
diff --git a/drivers/gpu/drm/xe/xe_guc_capture.h 
b/drivers/gpu/drm/xe/xe_guc_capture.h
index 77ee35a3f205..5df4b5579d2b 100644
--- a/drivers/gpu/drm/xe/xe_guc_capture.h
+++ b/drivers/gpu/drm/xe/xe_guc_capture.h
@@ -54,6 +54,8 @@ struct xe_guc_capture_snapshot *
 xe_guc_capture_get_matching_and_lock(struct xe_exec_queue *q,
                                     enum xe_guc_capture_snapshot_source 
srctype);
 void xe_guc_capture_snapshot_store_manual_job(struct xe_guc *guc, struct 
xe_exec_queue *q);
+struct xe_guc_capture_snapshot *
+xe_guc_capture_snapshot_manual_hwe(struct xe_guc *guc, struct xe_hw_engine 
*hwe);
 void xe_guc_capture_snapshot_print(struct xe_guc *guc, struct 
xe_guc_capture_snapshot *node,
                                   struct drm_printer *p);
 void xe_guc_capture_steered_list_init(struct xe_guc *guc);
diff --git a/drivers/gpu/drm/xe/xe_guc_capture_snapshot_types.h 
b/drivers/gpu/drm/xe/xe_guc_capture_snapshot_types.h
index a5579e69da2e..43f1cf046732 100644
--- a/drivers/gpu/drm/xe/xe_guc_capture_snapshot_types.h
+++ b/drivers/gpu/drm/xe/xe_guc_capture_snapshot_types.h
@@ -12,7 +12,11 @@
 struct guc_mmio_reg;
 
 enum xe_guc_capture_snapshot_source {
-       XE_ENGINE_CAPTURE_SOURCE_MANUAL,
+       /* KMD captured engine registers when job timeout is detected */
+       XE_ENGINE_CAPTURE_SOURCE_MANUAL_JOB,
+       /* KMD captured raw engine registers without any job association */
+       XE_ENGINE_CAPTURE_SOURCE_MANUAL_RAW,
+       /* GUC-FW captured engine registers before workload was killed */
        XE_ENGINE_CAPTURE_SOURCE_GUC
 };
 
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c 
b/drivers/gpu/drm/xe/xe_guc_submit.c
index 6e33081dd7b8..4d7530e8bf63 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -1079,7 +1079,7 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job)
                /*
                 * Generate a manual capture. Below function will store it
                 * in GuC Error Capture's internal link-list as if it came from 
GuC
-                * but with a source-type == XE_ENGINE_CAPTURE_SOURCE_MANUAL
+                * but with a source-type == XE_ENGINE_CAPTURE_SOURCE_MANUAL_JOB
                 */
                xe_guc_capture_snapshot_store_manual_job(guc, q);
                xe_force_wake_put(gt_to_fw(q->gt), fw_ref);
diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c 
b/drivers/gpu/drm/xe/xe_hw_engine.c
index fef01d2086a8..d0ed0639ae08 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine.c
+++ b/drivers/gpu/drm/xe/xe_hw_engine.c
@@ -832,7 +832,7 @@ void xe_hw_engine_handle_irq(struct xe_hw_engine *hwe, u16 
intr_vec)
 /**
  * hw_engine_snapshot_capture - Take a quick snapshot of the HW Engine.
  * @hwe: Xe HW Engine.
- * @q: The exec queue object.
+ * @q: The exec queue object. (can be NULL for debugfs engine-register dump)
  *
  * This can be printed out in a later stage like during dev_coredump
  * analysis.
@@ -845,9 +845,11 @@ hw_engine_snapshot_capture(struct xe_hw_engine *hwe, 
struct xe_exec_queue *q)
 {
        struct xe_hw_engine_snapshot *snapshot;
        struct xe_guc_capture_snapshot *node;
+       struct xe_guc *guc;
 
        if (!xe_hw_engine_is_valid(hwe))
                return NULL;
+       guc = &hwe->gt->uc.guc;
 
        snapshot = kzalloc(sizeof(*snapshot), GFP_ATOMIC);
 
@@ -869,7 +871,7 @@ hw_engine_snapshot_capture(struct xe_hw_engine *hwe, struct 
xe_exec_queue *q)
 
        if (q) {
                /* First, retrieve the manual GuC-Error-Capture node if it 
exists */
-               node = xe_guc_capture_get_matching_and_lock(q, 
XE_ENGINE_CAPTURE_SOURCE_MANUAL);
+               node = xe_guc_capture_get_matching_and_lock(q, 
XE_ENGINE_CAPTURE_SOURCE_MANUAL_JOB);
                /* Find preferred node type sourced from firmware if available 
*/
                snapshot->matched_node = 
xe_guc_capture_get_matching_and_lock(q, XE_ENGINE_CAPTURE_SOURCE_GUC);
                if (!snapshot->matched_node) {
@@ -877,13 +879,22 @@ hw_engine_snapshot_capture(struct xe_hw_engine *hwe, 
struct xe_exec_queue *q)
                        snapshot->matched_node = node;
                } else if (node) {
                        xe_gt_dbg(hwe->gt, "Found manual GuC-Err-Capture for 
queue %s", q->name);
-                       xe_guc_capture_put_matched_nodes(&hwe->gt->uc.guc, 
node);
+                       xe_guc_capture_put_matched_nodes(guc, node);
                }
                if (!snapshot->matched_node)
                        xe_gt_dbg(hwe->gt, "Can't retrieve any GuC-Err-Capture 
node for queue %s",
                                  q->name);
        }
 
+       if (!snapshot->matched_node) {
+               /*
+                * Fallback path - do an immediate jobless manual engine 
capture.
+                * This will happen when debugfs is triggered to force an 
engine dump.
+                */
+               snapshot->matched_node = 
xe_guc_capture_snapshot_manual_hwe(guc, hwe);
+               xe_gt_dbg(hwe->gt, "Fallback to jobless-manual-err-capture 
node");
+       }
+
        return snapshot;
 }
 
-- 
2.34.1

Reply via email to