GPU reset is a very important piece of our infrastructure.
Unfortunately we only really test it by actually hanging the gpu,
which often has bad side-effects for the entire system. And the gpu
hang handling code is one of the rather complicated pieces of code we
have, consisting of
- hang detection
- error capture
- actual gpu reset
- reset of all the gem bookkeeping
- reinitialition of the entire gpu

This patch adds a debugfs to selectively stopping rings by ceasing to
update the hw tail pointer. This way we can exercise the gpu hang code
under controlled conditions without a dying gpu taking down the entire
systems.

Patch motivated by me forgetting to properly reinitialize ppgtt after
a gpu reset.

Signed-Off-by: Daniel Vetter <daniel.vet...@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_debugfs.c     |   63 +++++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/i915_drv.c         |    3 +
 drivers/gpu/drm/i915/i915_drv.h         |    2 +
 drivers/gpu/drm/i915/intel_ringbuffer.c |    4 ++
 4 files changed, 72 insertions(+), 0 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index 8476441..9821a3b 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -1392,6 +1392,64 @@ static const struct file_operations i915_wedged_fops = {
 };
 
 static ssize_t
+i915_ring_stop_read(struct file *filp,
+                   char __user *ubuf,
+                   size_t max,
+                   loff_t *ppos)
+{
+       struct drm_device *dev = filp->private_data;
+       drm_i915_private_t *dev_priv = dev->dev_private;
+       char buf[80];
+       int len;
+
+       len = snprintf(buf, sizeof(buf),
+                      "%d\n", dev_priv->stop_rings);
+
+       if (len > sizeof(buf))
+               len = sizeof(buf);
+
+       return simple_read_from_buffer(ubuf, max, ppos, buf, len);
+}
+
+static ssize_t
+i915_ring_stop_write(struct file *filp,
+                    const char __user *ubuf,
+                    size_t cnt,
+                    loff_t *ppos)
+{
+       struct drm_device *dev = filp->private_data;
+       struct drm_i915_private *dev_priv = dev->dev_private;
+       char buf[20];
+       int val = 0;
+
+       if (cnt > 0) {
+               if (cnt > sizeof(buf) - 1)
+                       return -EINVAL;
+
+               if (copy_from_user(buf, ubuf, cnt))
+                       return -EFAULT;
+               buf[cnt] = 0;
+
+               val = simple_strtoul(buf, NULL, 0);
+       }
+
+       DRM_DEBUG_DRIVER("Stopping rings %u\n", val);
+
+       mutex_lock(&dev->struct_mutex);
+       dev_priv->stop_rings = val;
+       mutex_unlock(&dev->struct_mutex);
+
+       return cnt;
+}
+
+static const struct file_operations i915_ring_stop_fops = {
+       .owner = THIS_MODULE,
+       .open = i915_debugfs_common_open,
+       .read = i915_ring_stop_read,
+       .write = i915_ring_stop_write,
+       .llseek = default_llseek,
+};
+static ssize_t
 i915_max_freq_read(struct file *filp,
                   char __user *ubuf,
                   size_t max,
@@ -1691,6 +1749,11 @@ int i915_debugfs_init(struct drm_minor *minor)
                                  &i915_cache_sharing_fops);
        if (ret)
                return ret;
+       ret = i915_debugfs_create(minor->debugfs_root, minor,
+                                 "i915_ring_stop",
+                                 &i915_ring_stop_fops);
+       if (ret)
+               return ret;
 
        return drm_debugfs_create_files(i915_debugfs_list,
                                        I915_DEBUGFS_ENTRIES,
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 548e04b..566cc1e 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -611,6 +611,9 @@ int i915_reset(struct drm_device *dev, u8 flags)
        if (!mutex_trylock(&dev->struct_mutex))
                return -EBUSY;
 
+       printk("reenabling rings\n");
+       dev_priv->stop_rings = 0;
+
        i915_gem_reset(dev);
 
        ret = -ENODEV;
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index bd98fb3..503ae8c 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -330,6 +330,8 @@ typedef struct drm_i915_private {
        uint32_t last_instdone;
        uint32_t last_instdone1;
 
+       unsigned int stop_rings;
+
        unsigned long cfb_size;
        unsigned int cfb_fb;
        enum plane cfb_plane;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c 
b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 3c30dba..ef7a1ca 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1179,7 +1179,11 @@ int intel_ring_begin(struct intel_ring_buffer *ring,
 
 void intel_ring_advance(struct intel_ring_buffer *ring)
 {
+       struct drm_i915_private *dev_priv = ring->dev->dev_private;
+
        ring->tail &= ring->size - 1;
+       if (dev_priv->stop_rings & intel_ring_flag(ring))
+               return;
        ring->write_tail(ring, ring->tail);
 }
 
-- 
1.7.6.4

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to