Periodically check the scratch page to see if it changes. Scratch page
changes almost always indicate something is wrong.

We never expect a non-zero filled page, so potentially we could directly
put that md5 value for checking. I think the code as it is will be a bit
more robust.

Pretty RFC here since I've only compiled and run it for a few seconds.

Recommended-by: Stephane Marchesin <marc...@chromium.org>
Signed-off-by: Ben Widawsky <b...@bwidawsk.net>
---
 drivers/gpu/drm/Kconfig             |  2 +
 drivers/gpu/drm/i915/i915_drv.c     |  5 +++
 drivers/gpu/drm/i915/i915_drv.h     | 14 +++++++
 drivers/gpu/drm/i915/i915_gem_gtt.c | 73 +++++++++++++++++++++++++++++++++++++
 4 files changed, 94 insertions(+)

diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index 19b8e0d..44efe74 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -125,6 +125,8 @@ config DRM_I915
        depends on DRM
        depends on AGP
        depends on AGP_INTEL
+       select CRYPTO
+       select CRYPTO_MD5
        # we need shmfs for the swappable backing store, and in particular
        # the shmem_readpage() which depends upon tmpfs
        select SHMEM
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 9ebe895..a1f6142 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -128,6 +128,11 @@ module_param_named(disable_power_well, 
i915_disable_power_well, int, 0600);
 MODULE_PARM_DESC(disable_power_well,
                 "Disable the power well when possible (default: false)");
 
+int i915_enable_scratch_checker __read_mostly = 0;
+module_param_named(enable_scratch_checker, i915_enable_scratch_checker, int, 
0600);
+MODULE_PARM_DESC(enable_scratch_checker,
+                "Enable periodic timer to find stray writes to the scratch 
page (default: false)");
+
 static struct drm_driver driver;
 extern int intel_agp_enabled;
 
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index d5dcf7f..c1f4ef7 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -43,6 +43,8 @@
 #include <linux/intel-iommu.h>
 #include <linux/kref.h>
 #include <linux/pm_qos.h>
+#include <linux/crypto.h>
+#include <crypto/md5.h>
 
 /* General customization:
  */
@@ -869,6 +871,16 @@ struct i915_gpu_error {
        unsigned int stop_rings;
 };
 
+struct i915_scratch_checker {
+       struct scatterlist sg; /* sg for scratch page */
+       struct hash_desc hash;
+       char last_hash[MD5_DIGEST_SIZE];
+
+#define I915_SCRATCH_DIRT_PERIOD 1000 /* in ms */
+#define I915_SCRATCH_DIRT_JIFFIES msecs_to_jiffies(I915_SCRATCH_DIRT_PERIOD)
+       struct timer_list timer;
+};
+
 enum modeset_restore {
        MODESET_ON_LID_OPEN,
        MODESET_DONE,
@@ -1058,6 +1070,7 @@ typedef struct drm_i915_private {
        struct drm_mm_node *compressed_llb;
 
        struct i915_gpu_error gpu_error;
+       struct i915_scratch_checker scratch_checker;
 
        /* list of fbdev register on this device */
        struct intel_fbdev *fbdev;
@@ -1435,6 +1448,7 @@ extern bool i915_enable_hangcheck __read_mostly;
 extern int i915_enable_ppgtt __read_mostly;
 extern unsigned int i915_preliminary_hw_support __read_mostly;
 extern int i915_disable_power_well __read_mostly;
+extern int i915_enable_scratch_checker __read_mostly;
 
 extern int i915_suspend(struct drm_device *dev, pm_message_t state);
 extern int i915_resume(struct drm_device *dev);
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c 
b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 50df194..df7a3a8 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -660,6 +660,72 @@ void i915_gem_init_global_gtt(struct drm_device *dev)
        i915_gem_setup_global_gtt(dev, 0, mappable_size, gtt_size);
 }
 
+static void scratch_checker(unsigned long data)
+{
+       struct drm_i915_private *dev_priv = (struct drm_i915_private *)data;
+       char hash[MD5_DIGEST_SIZE];
+       int ret;
+
+       ret = crypto_hash_digest(&dev_priv->scratch_checker.hash,
+                                &dev_priv->scratch_checker.sg, PAGE_SIZE,
+                                hash);
+       if (ret) {
+               DRM_DEBUG_DRIVER("Couldn't hash scratch\n");
+               goto out;
+       }
+
+       if (!memcmp(hash, dev_priv->scratch_checker.last_hash, MD5_DIGEST_SIZE))
+               goto out;
+
+       DRM_DEBUG("Scratch page contents changed\n");
+       memcpy(dev_priv->scratch_checker.last_hash, hash, MD5_DIGEST_SIZE);
+
+out:
+       mod_timer(&dev_priv->scratch_checker.timer,
+                 round_jiffies_up(jiffies + I915_SCRATCH_DIRT_JIFFIES));
+}
+
+static void scratch_checker_init(struct drm_i915_private *dev_priv)
+{
+       int ret;
+
+       sg_init_table(&dev_priv->scratch_checker.sg, 1);
+       sg_set_page(&dev_priv->scratch_checker.sg, dev_priv->gtt.scratch_page,
+                   4096, 0);
+
+       dev_priv->scratch_checker.hash.tfm = crypto_alloc_hash("md5", 0,
+                                                      CRYPTO_ALG_ASYNC);
+       if (IS_ERR_OR_NULL(dev_priv->scratch_checker.hash.tfm))
+               return;
+
+       ret = crypto_hash_init(&dev_priv->scratch_checker.hash);
+       if (ret)
+               goto err;
+
+       ret = crypto_hash_digest(&dev_priv->scratch_checker.hash,
+                                &dev_priv->scratch_checker.sg, PAGE_SIZE,
+                                dev_priv->scratch_checker.last_hash);
+       if (ret)
+               goto err;
+
+       setup_timer(&dev_priv->scratch_checker.timer, scratch_checker,
+                   (unsigned long) dev_priv);
+       return;
+
+err:
+       crypto_free_hash(dev_priv->scratch_checker.hash.tfm);
+       dev_priv->scratch_checker.hash.tfm = NULL;
+}
+
+static void scratch_checker_fini(struct drm_i915_private *dev_priv)
+{
+       if (dev_priv->scratch_checker.hash.tfm)
+               return;
+
+       del_timer_sync(&dev_priv->scratch_checker.timer);
+       crypto_free_hash(dev_priv->scratch_checker.hash.tfm);
+}
+
 static int setup_scratch_page(struct drm_device *dev)
 {
        struct drm_i915_private *dev_priv = dev->dev_private;
@@ -682,6 +748,7 @@ static int setup_scratch_page(struct drm_device *dev)
 #endif
        dev_priv->gtt.scratch_page = page;
        dev_priv->gtt.scratch_page_dma = dma_addr;
+       scratch_checker_init(dev_priv);
 
        return 0;
 }
@@ -689,6 +756,7 @@ static int setup_scratch_page(struct drm_device *dev)
 static void teardown_scratch_page(struct drm_device *dev)
 {
        struct drm_i915_private *dev_priv = dev->dev_private;
+       scratch_checker_fini(dev_priv);
        set_pages_wb(dev_priv->gtt.scratch_page, 1);
        pci_unmap_page(dev->pdev, dev_priv->gtt.scratch_page_dma,
                       PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
@@ -811,6 +879,7 @@ static void i915_gmch_remove(struct drm_device *dev)
        intel_gmch_remove();
 }
 
+
 int i915_gem_gtt_init(struct drm_device *dev)
 {
        struct drm_i915_private *dev_priv = dev->dev_private;
@@ -840,5 +909,9 @@ int i915_gem_gtt_init(struct drm_device *dev)
        DRM_DEBUG_DRIVER("GTT stolen size = %zdM\n",
                         dev_priv->gtt.stolen_size >> 20);
 
+       if (i915_enable_scratch_checker)
+               mod_timer(&dev_priv->scratch_checker.timer,
+                         jiffies + I915_SCRATCH_DIRT_JIFFIES);
+
        return 0;
 }
-- 
1.8.2.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to