i915: Move error state capture code out from i915_irq.c

Mika Kuoppala Tue, 01 Nov 2016 09:04:18 -0700

We have a place already for error handling and error
state capture, i915_gpu_error.c. Move code to more
appropriate file.


No functional changes.

Cc: Chris Wilson <ch...@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahti...@linux.intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursu...@intel.com>
Signed-off-by: Mika Kuoppala <mika.kuopp...@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h       |   4 +-
 drivers/gpu/drm/i915/i915_gpu_error.c | 157 +++++++++++++++++++++++++++++++++-
 drivers/gpu/drm/i915/i915_irq.c       | 151 --------------------------------
 3 files changed, 155 insertions(+), 157 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 6c0b0a6..88301fa 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3686,9 +3686,7 @@ static inline void i915_error_state_buf_release(
 {
        kfree(eb->buf);
 }
-void i915_capture_error_state(struct drm_i915_private *dev_priv,
-                             u32 engine_mask,
-                             const char *error_msg);
+
 void i915_error_state_get(struct drm_device *dev,
                          struct i915_error_state_file_priv *error_priv);
 void i915_error_state_put(struct i915_error_state_file_priv *error_priv);
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c 
b/drivers/gpu/drm/i915/i915_gpu_error.c
index 204093f..e307841 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -1585,9 +1585,9 @@ static int capture(void *data)
  * out a structure which becomes available in debugfs for user level tools
  * to pick up.
  */
-void i915_capture_error_state(struct drm_i915_private *dev_priv,
-                             u32 engine_mask,
-                             const char *error_msg)
+static void i915_capture_error_state(struct drm_i915_private *dev_priv,
+                                    u32 engine_mask,
+                                    const char *error_msg)
 {
        static bool warned;
        struct drm_i915_error_state *error;
@@ -1640,6 +1640,108 @@ void i915_capture_error_state(struct drm_i915_private 
*dev_priv,
        }
 }
 
+static void i915_clear_error_registers(struct drm_i915_private *dev_priv)
+{
+       u32 eir;
+
+       if (!IS_GEN2(dev_priv))
+               I915_WRITE(PGTBL_ER, I915_READ(PGTBL_ER));
+
+       if (INTEL_GEN(dev_priv) < 4)
+               I915_WRITE(IPEIR, I915_READ(IPEIR));
+       else
+               I915_WRITE(IPEIR_I965, I915_READ(IPEIR_I965));
+
+       I915_WRITE(EIR, I915_READ(EIR));
+       eir = I915_READ(EIR);
+       if (eir) {
+               /*
+                * some errors might have become stuck,
+                * mask them.
+                */
+               DRM_DEBUG_DRIVER("EIR stuck: 0x%08x, masking\n", eir);
+               I915_WRITE(EMR, I915_READ(EMR) | eir);
+               I915_WRITE(IIR, I915_RENDER_COMMAND_PARSER_ERROR_INTERRUPT);
+       }
+}
+
+static void i915_error_wake_up(struct drm_i915_private *dev_priv)
+{
+       /*
+        * Notify all waiters for GPU completion events that reset state has
+        * been changed, and that they need to restart their wait after
+        * checking for potential errors (and bail out to drop locks if there is
+        * a gpu reset pending so that i915_error_work_func can acquire them).
+        */
+
+       /* Wake up __wait_seqno, potentially holding dev->struct_mutex. */
+       wake_up_all(&dev_priv->gpu_error.wait_queue);
+
+       /* Wake up intel_crtc_wait_for_pending_flips, holding crtc->mutex. */
+       wake_up_all(&dev_priv->pending_flip_queue);
+}
+
+/**
+ * i915_reset_and_wakeup - do process context error handling work
+ * @dev_priv: i915 device private
+ *
+ * Fire an error uevent so userspace can see that a hang or error
+ * was detected.
+ */
+static void i915_reset_and_wakeup(struct drm_i915_private *dev_priv)
+{
+       struct kobject *kobj = &dev_priv->drm.primary->kdev->kobj;
+       char *error_event[] = { I915_ERROR_UEVENT "=1", NULL };
+       char *reset_event[] = { I915_RESET_UEVENT "=1", NULL };
+       char *reset_done_event[] = { I915_ERROR_UEVENT "=0", NULL };
+
+       kobject_uevent_env(kobj, KOBJ_CHANGE, error_event);
+
+       DRM_DEBUG_DRIVER("resetting chip\n");
+       kobject_uevent_env(kobj, KOBJ_CHANGE, reset_event);
+
+       /*
+        * In most cases it's guaranteed that we get here with an RPM
+        * reference held, for example because there is a pending GPU
+        * request that won't finish until the reset is done. This
+        * isn't the case at least when we get here by doing a
+        * simulated reset via debugs, so get an RPM reference.
+        */
+       intel_runtime_pm_get(dev_priv);
+       intel_prepare_reset(dev_priv);
+
+       do {
+               /*
+                * All state reset _must_ be completed before we update the
+                * reset counter, for otherwise waiters might miss the reset
+                * pending state and not properly drop locks, resulting in
+                * deadlocks with the reset work.
+                */
+               if (mutex_trylock(&dev_priv->drm.struct_mutex)) {
+                       i915_reset(dev_priv);
+                       mutex_unlock(&dev_priv->drm.struct_mutex);
+               }
+
+               /* We need to wait for anyone holding the lock to wakeup */
+       } while (wait_on_bit_timeout(&dev_priv->gpu_error.flags,
+                                    I915_RESET_IN_PROGRESS,
+                                    TASK_UNINTERRUPTIBLE,
+                                    HZ));
+
+       intel_finish_reset(dev_priv);
+       intel_runtime_pm_put(dev_priv);
+
+       if (!test_bit(I915_WEDGED, &dev_priv->gpu_error.flags))
+               kobject_uevent_env(kobj,
+                                  KOBJ_CHANGE, reset_done_event);
+
+       /*
+        * Note: The wake_up also serves as a memory barrier so that
+        * waiters see the updated value of the dev_priv->gpu_error.
+        */
+       wake_up_all(&dev_priv->gpu_error.reset_queue);
+}
+
 void i915_error_state_get(struct drm_device *dev,
                          struct i915_error_state_file_priv *error_priv)
 {
@@ -1671,3 +1773,52 @@ void i915_destroy_error_state(struct drm_device *dev)
        if (error)
                kref_put(&error->ref, i915_error_state_free);
 }
+
+/**
+ * i915_handle_error - handle a gpu error
+ * @dev_priv: i915 device private
+ * @engine_mask: mask representing engines that are hung
+ * Do some basic checking of register state at error time and
+ * dump it to the syslog.  Also call i915_capture_error_state() to make
+ * sure we get a record and make it available in debugfs.  Fire a uevent
+ * so userspace knows something bad happened (should trigger collection
+ * of a ring dump etc.).
+ * @fmt: Error message format string
+ */
+void i915_handle_error(struct drm_i915_private *dev_priv,
+                      u32 engine_mask,
+                      const char *fmt, ...)
+{
+       va_list args;
+       char error_msg[80];
+
+       va_start(args, fmt);
+       vscnprintf(error_msg, sizeof(error_msg), fmt, args);
+       va_end(args);
+
+       i915_capture_error_state(dev_priv, engine_mask, error_msg);
+       i915_clear_error_registers(dev_priv);
+
+       if (!engine_mask)
+               return;
+
+       if (test_and_set_bit(I915_RESET_IN_PROGRESS,
+                            &dev_priv->gpu_error.flags))
+               return;
+
+       /*
+        * Wakeup waiting processes so that the reset function
+        * i915_reset_and_wakeup doesn't deadlock trying to grab
+        * various locks. By bumping the reset counter first, the woken
+        * processes will see a reset in progress and back off,
+        * releasing their locks and then wait for the reset completion.
+        * We must do this for _all_ gpu waiters that might hold locks
+        * that the reset work needs to acquire.
+        *
+        * Note: The wake_up also provides a memory barrier to ensure that the
+        * waiters see the updated value of the reset flags.
+        */
+       i915_error_wake_up(dev_priv);
+
+       i915_reset_and_wakeup(dev_priv);
+}
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 6d7505b..f6869f0 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -2571,83 +2571,6 @@ static irqreturn_t gen8_irq_handler(int irq, void *arg)
        return ret;
 }
 
-static void i915_error_wake_up(struct drm_i915_private *dev_priv)
-{
-       /*
-        * Notify all waiters for GPU completion events that reset state has
-        * been changed, and that they need to restart their wait after
-        * checking for potential errors (and bail out to drop locks if there is
-        * a gpu reset pending so that i915_error_work_func can acquire them).
-        */
-
-       /* Wake up __wait_seqno, potentially holding dev->struct_mutex. */
-       wake_up_all(&dev_priv->gpu_error.wait_queue);
-
-       /* Wake up intel_crtc_wait_for_pending_flips, holding crtc->mutex. */
-       wake_up_all(&dev_priv->pending_flip_queue);
-}
-
-/**
- * i915_reset_and_wakeup - do process context error handling work
- * @dev_priv: i915 device private
- *
- * Fire an error uevent so userspace can see that a hang or error
- * was detected.
- */
-static void i915_reset_and_wakeup(struct drm_i915_private *dev_priv)
-{
-       struct kobject *kobj = &dev_priv->drm.primary->kdev->kobj;
-       char *error_event[] = { I915_ERROR_UEVENT "=1", NULL };
-       char *reset_event[] = { I915_RESET_UEVENT "=1", NULL };
-       char *reset_done_event[] = { I915_ERROR_UEVENT "=0", NULL };
-
-       kobject_uevent_env(kobj, KOBJ_CHANGE, error_event);
-
-       DRM_DEBUG_DRIVER("resetting chip\n");
-       kobject_uevent_env(kobj, KOBJ_CHANGE, reset_event);
-
-       /*
-        * In most cases it's guaranteed that we get here with an RPM
-        * reference held, for example because there is a pending GPU
-        * request that won't finish until the reset is done. This
-        * isn't the case at least when we get here by doing a
-        * simulated reset via debugs, so get an RPM reference.
-        */
-       intel_runtime_pm_get(dev_priv);
-       intel_prepare_reset(dev_priv);
-
-       do {
-               /*
-                * All state reset _must_ be completed before we update the
-                * reset counter, for otherwise waiters might miss the reset
-                * pending state and not properly drop locks, resulting in
-                * deadlocks with the reset work.
-                */
-               if (mutex_trylock(&dev_priv->drm.struct_mutex)) {
-                       i915_reset(dev_priv);
-                       mutex_unlock(&dev_priv->drm.struct_mutex);
-               }
-
-               /* We need to wait for anyone holding the lock to wakeup */
-       } while (wait_on_bit_timeout(&dev_priv->gpu_error.flags,
-                                    I915_RESET_IN_PROGRESS,
-                                    TASK_UNINTERRUPTIBLE,
-                                    HZ));
-
-       intel_finish_reset(dev_priv);
-       intel_runtime_pm_put(dev_priv);
-
-       if (!test_bit(I915_WEDGED, &dev_priv->gpu_error.flags))
-               kobject_uevent_env(kobj,
-                                  KOBJ_CHANGE, reset_done_event);
-
-       /*
-        * Note: The wake_up also serves as a memory barrier so that
-        * waiters see the updated value of the dev_priv->gpu_error.
-        */
-       wake_up_all(&dev_priv->gpu_error.reset_queue);
-}
-
 static inline void
 i915_err_print_instdone(struct drm_i915_private *dev_priv,
                        struct intel_instdone *instdone)
@@ -2674,80 +2597,6 @@ i915_err_print_instdone(struct drm_i915_private 
*dev_priv,
                       slice, subslice, instdone->row[slice][subslice]);
 }
 
-static void i915_clear_error_registers(struct drm_i915_private *dev_priv)
-{
-       u32 eir;
-
-       if (!IS_GEN2(dev_priv))
-               I915_WRITE(PGTBL_ER, I915_READ(PGTBL_ER));
-
-       if (INTEL_GEN(dev_priv) < 4)
-               I915_WRITE(IPEIR, I915_READ(IPEIR));
-       else
-               I915_WRITE(IPEIR_I965, I915_READ(IPEIR_I965));
-
-       I915_WRITE(EIR, I915_READ(EIR));
-       eir = I915_READ(EIR);
-       if (eir) {
-               /*
-                * some errors might have become stuck,
-                * mask them.
-                */
-               DRM_DEBUG_DRIVER("EIR stuck: 0x%08x, masking\n", eir);
-               I915_WRITE(EMR, I915_READ(EMR) | eir);
-               I915_WRITE(IIR, I915_RENDER_COMMAND_PARSER_ERROR_INTERRUPT);
-       }
-}
-
-/**
- * i915_handle_error - handle a gpu error
- * @dev_priv: i915 device private
- * @engine_mask: mask representing engines that are hung
- * Do some basic checking of register state at error time and
- * dump it to the syslog.  Also call i915_capture_error_state() to make
- * sure we get a record and make it available in debugfs.  Fire a uevent
- * so userspace knows something bad happened (should trigger collection
- * of a ring dump etc.).
- * @fmt: Error message format string
- */
-void i915_handle_error(struct drm_i915_private *dev_priv,
-                      u32 engine_mask,
-                      const char *fmt, ...)
-{
-       va_list args;
-       char error_msg[80];
-
-       va_start(args, fmt);
-       vscnprintf(error_msg, sizeof(error_msg), fmt, args);
-       va_end(args);
-
-       i915_capture_error_state(dev_priv, engine_mask, error_msg);
-       i915_clear_error_registers(dev_priv);
-
-       if (!engine_mask)
-               return;
-
-       if (test_and_set_bit(I915_RESET_IN_PROGRESS,
-                            &dev_priv->gpu_error.flags))
-               return;
-
-       /*
-        * Wakeup waiting processes so that the reset function
-        * i915_reset_and_wakeup doesn't deadlock trying to grab
-        * various locks. By bumping the reset counter first, the woken
-        * processes will see a reset in progress and back off,
-        * releasing their locks and then wait for the reset completion.
-        * We must do this for _all_ gpu waiters that might hold locks
-        * that the reset work needs to acquire.
-        *
-        * Note: The wake_up also provides a memory barrier to ensure that the
-        * waiters see the updated value of the reset flags.
-        */
-       i915_error_wake_up(dev_priv);
-
-       i915_reset_and_wakeup(dev_priv);
-}
-
 /* Called from drm generic code, passed 'crtc' which
  * we use as a pipe index
  */
-- 
2.7.4

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

[Intel-gfx] [PATCH 2/2] drm/i915: Move error state capture code out from i915_irq.c

Reply via email to