I was dealing with a bug recently where the system would hard hang
somewhere between hangcheck and reset. There was time after error
collection to actually get my error state out, but I couldn't get the
reads to work.

This patch is also useful for when reset kills the machine, and you want
to keep reset enabled but still get error state.

Since I found the patch pretty useful, I decided to clean it up and
submit it. It was mostly meant as a one-off hack originally though.

If a maintainer decides it's useful, then here it is.

Signed-off-by: Ben Widawsky <b...@bwidawsk.net>
---
 drivers/gpu/drm/i915/i915_debugfs.c   |  2 +-
 drivers/gpu/drm/i915/i915_drv.h       |  3 ++-
 drivers/gpu/drm/i915/i915_gpu_error.c | 31 +++++++++++++++++++++++++------
 drivers/gpu/drm/i915/i915_sysfs.c     |  2 +-
 4 files changed, 29 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index 6b7b32b..2daad46 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -929,7 +929,7 @@ static ssize_t i915_error_state_read(struct file *file, 
char __user *userbuf,
        if (ret)
                return ret;
 
-       ret = i915_error_state_to_str(&error_str, error_priv);
+       ret = i915_error_state_to_str(&error_str, error_priv->dev, 
error_priv->error);
        if (ret)
                goto out;
 
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 1045006..b6a4f1e 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2544,7 +2544,8 @@ static inline void intel_display_crc_init(struct 
drm_device *dev) {}
 __printf(2, 3)
 void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...);
 int i915_error_state_to_str(struct drm_i915_error_state_buf *estr,
-                           const struct i915_error_state_file_priv *error);
+                           struct drm_device *dev,
+                           const struct drm_i915_error_state *error);
 int i915_error_state_buf_init(struct drm_i915_error_state_buf *eb,
                              size_t count, loff_t pos);
 static inline void i915_error_state_buf_release(
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c 
b/drivers/gpu/drm/i915/i915_gpu_error.c
index e82e590..1540bf6 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -184,8 +184,22 @@ static void i915_error_puts(struct 
drm_i915_error_state_buf *e,
        __i915_error_advance(e, len);
 }
 
-#define err_printf(e, ...) i915_error_printf(e, __VA_ARGS__)
-#define err_puts(e, s) i915_error_puts(e, s)
+
+static bool wire = false;
+#define err_printf(e, ...) do {                                \
+       if (wire) {                                     \
+               printk(__VA_ARGS__);                    \
+       } else {                                        \
+               i915_error_printf(e, __VA_ARGS__);      \
+       }                                               \
+} while (0)
+#define err_puts(e, s) do {                            \
+       if (wire) {                                     \
+               printk(s);                              \
+       } else {                                        \
+               i915_error_puts(e, s);                  \
+       }                                               \
+} while (0)
 
 static void print_error_buffers(struct drm_i915_error_state_buf *m,
                                const char *name,
@@ -240,7 +254,7 @@ static const char *hangcheck_action_to_str(enum 
intel_ring_hangcheck_action a)
 
 static void i915_ring_error_state(struct drm_i915_error_state_buf *m,
                                  struct drm_device *dev,
-                                 struct drm_i915_error_ring *ring)
+                                 const struct drm_i915_error_ring *ring)
 {
        if (!ring->valid)
                return;
@@ -322,11 +336,10 @@ static void print_error_obj(struct 
drm_i915_error_state_buf *m,
 }
 
 int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
-                           const struct i915_error_state_file_priv *error_priv)
+                           struct drm_device *dev,
+                           const struct drm_i915_error_state *error)
 {
-       struct drm_device *dev = error_priv->dev;
        struct drm_i915_private *dev_priv = dev->dev_private;
-       struct drm_i915_error_state *error = error_priv->error;
        int i, j, offset, elt;
        int max_hangcheck_score;
 
@@ -1197,6 +1210,12 @@ void i915_capture_error_state(struct drm_device *dev, 
bool wedged,
        spin_lock_irqsave(&dev_priv->gpu_error.lock, flags);
        if (dev_priv->gpu_error.first_error == NULL) {
                dev_priv->gpu_error.first_error = error;
+#ifdef PUSH_TO_WIRE
+               /* Probably racy, but this is emergency debug */
+               wire = true;
+               i915_error_state_to_str(NULL, dev, error);
+               wire = false;
+#endif
                error = NULL;
        }
        spin_unlock_irqrestore(&dev_priv->gpu_error.lock, flags);
diff --git a/drivers/gpu/drm/i915/i915_sysfs.c 
b/drivers/gpu/drm/i915/i915_sysfs.c
index 86ce39a..6f4be9d 100644
--- a/drivers/gpu/drm/i915/i915_sysfs.c
+++ b/drivers/gpu/drm/i915/i915_sysfs.c
@@ -512,7 +512,7 @@ static ssize_t error_state_read(struct file *filp, struct 
kobject *kobj,
        error_priv.dev = dev;
        i915_error_state_get(dev, &error_priv);
 
-       ret = i915_error_state_to_str(&error_str, &error_priv);
+       ret = i915_error_state_to_str(&error_str, dev, error_priv.error);
        if (ret)
                goto out;
 
-- 
2.0.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to