Hi guys,

I thought I'd attach this, as it is now gone 2AM and I doubt I'm going
to finish it "tonight". I was hoping to elicit some initial review to
suggest whether the design was sane or not.

I'd originally imagined tying the profiling lifetime to the execution /
completion of individual batch-buffers, but for now I'd like to get it
partly working like this, and perhaps develop some user-space program to
view the results and see if they make sense.

The basic (kernel side) functionality is there, albeit limited by some
hard-coded timing parameters and buffer sizes. I might look at whether
it makes sense to do some in-kernel over-sampling and percentage
generation, or just spit raw register dumps out to the debugfs interface
for userspace to do that. (Buffer size might play a part in that
decision).

The locking is a little rough, and I probably need to double-buffer the
sample buffers so I can always be sure the debugfs inteface gets a
complete "frame" / "buffer" / whatever.. trace's worth of data. Perhaps
I should put a semaphore around the debug data output which sleeps until
the profiling has been stopped. (E.g. at the end of a frame).


Currently the debugfs routine just takes a spinlock and copies out
whatever samples have been gathered, meaning the only reliable way to
see a full frame's worth of data is for it to be the last frame before
the instrumented client quit. Part of me did wonder about doing a
constant stream of data to userspace.. but then I quickly realised I had
no idea how to do that, and what to do if userspace lets us overrun our
buffers ;)

I've got a libdrm patch to expose the new IOCTL (also attached), but I
don't have a very good solution for hooking that into mesa and
synchronising with frames. I applied a VERY dirty kludge for testing.

Does anyone know if you can pass userdata parameters to a hrtimer? From
the API, it looked not - although in that case, how do you avoid needing
horrid global state variables?

Regards,

-- 
Peter Clifton

Electrical Engineering Division,
Engineering Department,
University of Cambridge,
9, JJ Thomson Avenue,
Cambridge
CB3 0FA

Tel: +44 (0)7729 980173 - (No signal in the lab!)
Tel: +44 (0)1223 748328 - (Shared lab phone, ask for me)
>From 3a5b5950624e88bcbd44073847d27e11c8199218 Mon Sep 17 00:00:00 2001
From: Peter Clifton <pc...@cam.ac.uk>
Date: Sun, 31 Oct 2010 01:27:58 +0000
Subject: [PATCH] Hacky little instdone and instdone1 profiler

---
 drivers/gpu/drm/i915/Makefile          |    1 +
 drivers/gpu/drm/i915/i915_debugfs.c    |    1 +
 drivers/gpu/drm/i915/i915_dma.c        |    7 +
 drivers/gpu/drm/i915/i915_drv.h        |   12 ++
 drivers/gpu/drm/i915/i915_gem.c        |    4 +
 drivers/gpu/drm/i915/i915_trace_idle.c |  309 ++++++++++++++++++++++++++++++++
 include/drm/i915_drm.h                 |    8 +
 7 files changed, 342 insertions(+), 0 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/i915_trace_idle.c

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index fdc833d..45aacf8 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -10,6 +10,7 @@ i915-y := i915_drv.o i915_dma.o i915_irq.o i915_mem.o \
 	  i915_gem_debug.o \
 	  i915_gem_evict.o \
 	  i915_gem_tiling.o \
+	  i915_trace_idle.o \
 	  i915_trace_points.o \
 	  intel_display.o \
 	  intel_crt.o \
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 7698983..82d331a 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -1043,6 +1043,7 @@ static struct drm_info_list i915_debugfs_list[] = {
 	{"i915_sr_status", i915_sr_status, 0},
 	{"i915_opregion", i915_opregion, 0},
 	{"i915_gem_framebuffer", i915_gem_framebuffer_info, 0},
+	{"i915_trace_idle", i915_trace_idle_debugfs_info, 0},
 };
 #define I915_DEBUGFS_ENTRIES ARRAY_SIZE(i915_debugfs_list)
 
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 785ee11..a41da27 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -2057,6 +2057,9 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags)
 	dev_priv->mchdev_lock = &mchdev_lock;
 	spin_unlock(&mchdev_lock);
 
+	/* XXX: Not sure if this belongs here or not */
+	i915_trace_idle_init (dev);
+
 	return 0;
 
 out_workqueue_free:
@@ -2077,6 +2080,9 @@ int i915_driver_unload(struct drm_device *dev)
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	int ret;
 
+	/* XXX: Not sure if this belongs here or not */
+	i915_trace_idle_finish (dev);
+
 	spin_lock(&mchdev_lock);
 	i915_mch_dev = NULL;
 	spin_unlock(&mchdev_lock);
@@ -2263,6 +2269,7 @@ struct drm_ioctl_desc i915_ioctls[] = {
 	DRM_IOCTL_DEF_DRV(I915_GEM_MADVISE, i915_gem_madvise_ioctl, DRM_UNLOCKED),
 	DRM_IOCTL_DEF_DRV(I915_OVERLAY_PUT_IMAGE, intel_overlay_put_image, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED),
 	DRM_IOCTL_DEF_DRV(I915_OVERLAY_ATTRS, intel_overlay_attrs, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(I915_TRACE_IDLE, i915_trace_idle_ioctl, DRM_AUTH|DRM_UNLOCKED),
 };
 
 int i915_max_ioctl = DRM_ARRAY_SIZE(i915_ioctls);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 2c2c19b..274af4b 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -707,6 +707,9 @@ typedef struct drm_i915_private {
 
 	/* list of fbdev register on this device */
 	struct intel_fbdev *fbdev;
+
+	/* Idle tracing data */
+	struct trace_idle_data *trace_idle_data;
 } drm_i915_private_t;
 
 /** driver private structure attached to each drm_gem_object */
@@ -1015,6 +1018,8 @@ int i915_gem_get_tiling(struct drm_device *dev, void *data,
 			struct drm_file *file_priv);
 int i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
 				struct drm_file *file_priv);
+int i915_trace_idle_ioctl(struct drm_device *dev, void *data,
+			  struct drm_file *file_priv);
 void i915_gem_load(struct drm_device *dev);
 int i915_gem_init_object(struct drm_gem_object *obj);
 struct drm_gem_object * i915_gem_alloc_object(struct drm_device *dev,
@@ -1114,6 +1119,13 @@ extern int i915_restore_state(struct drm_device *dev);
 extern int i915_save_state(struct drm_device *dev);
 extern int i915_restore_state(struct drm_device *dev);
 
+/* i915_trace_idle.c */
+int i915_trace_idle_init(struct drm_device *dev);
+void i915_trace_idle_finish(struct drm_device *dev);
+int i915_trace_idle_start(struct drm_device *dev);
+int i915_trace_idle_stop(struct drm_device *dev);
+int i915_trace_idle_debugfs_info(struct seq_file *m, void *data);
+
 /* intel_i2c.c */
 extern int intel_setup_gmbus(struct drm_device *dev);
 extern void intel_teardown_gmbus(struct drm_device *dev);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 6c2618d..392b575 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3651,6 +3651,10 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 		return -EINVAL;
 	}
 
+	if (args->flags & I915_EXEC_TRACE_IDLE) {
+		DRM_INFO("Batchbuffer with I915_EXEC_TRACE_IDLE\n");
+	}
+
 	if (args->buffer_count < 1) {
 		DRM_ERROR("execbuf with %d buffers\n", args->buffer_count);
 		return -EINVAL;
diff --git a/drivers/gpu/drm/i915/i915_trace_idle.c b/drivers/gpu/drm/i915/i915_trace_idle.c
new file mode 100644
index 0000000..cbb640c
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_trace_idle.c
@@ -0,0 +1,309 @@
+/*
+ * Copyright © 2010 Peter Clifton
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *	Peter Clifton <pc...@cam.ac.uk>
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/hrtimer.h>
+#include <linux/ktime.h>
+#include <linux/seq_file.h>
+
+#include <linux/input.h>
+#include <linux/slab.h>
+#include "drmP.h"
+#include "intel_drv.h"
+#include "i915_drm.h"
+#include "i915_drv.h"
+
+#define SAMPLE_INTERVAL_US 10
+#define US_TO_NS(x)	(x * 1E3L)
+#define MS_TO_NS(x)	(x * 1E6L)
+
+/* Number of sample buffer entries */
+#define SAMPLE_BUFFER_LENGTH 8192
+
+/* XXX: OH DEAR GOODNESS DO I HATE HAVING TO MAKE THIS NASTY HACK! */
+static struct drm_device *global_dev = NULL;
+
+struct idle_sample {
+	u32 instdone;
+	u32 instdone1;
+};
+
+struct trace_idle_data {
+	bool tracing;
+	bool warned_overflow;
+	int max_samples;
+	int num_samples;
+	struct idle_sample *samples;
+	struct hrtimer timer;
+	spinlock_t samples_lock;
+};
+
+
+static enum hrtimer_restart
+i915_trace_idle_timer_callback(struct hrtimer *timer)
+{
+	struct drm_device *dev = global_dev; /* XXX: SHOULD BE PASSED TO THE TIMER SOMEHOW? */
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct trace_idle_data *idle_data = dev_priv->trace_idle_data;
+	struct idle_sample *sample;
+	u32 instdone;
+	u32 instdone1;
+	unsigned long irqflags;
+
+	if (INTEL_INFO(dev)->gen < 4) {
+		instdone = I915_READ(INSTDONE);
+		instdone1 = 0;
+	} else {
+		instdone = I915_READ(INSTDONE_I965);
+		instdone1 = I915_READ(INSTDONE1);
+	}
+
+	/* Obtain a lock to ensure we don't colide with data readout */
+	spin_lock_irqsave(&idle_data->samples_lock, irqflags);
+
+	if (idle_data->num_samples == idle_data->max_samples) {
+		if (!idle_data->warned_overflow)
+			printk(KERN_ERR "Overflow in trace idle buffer\n");
+		idle_data->warned_overflow = true;
+		return HRTIMER_NORESTART;
+	}
+
+	sample = &idle_data->samples[idle_data->num_samples++];
+	sample->instdone = instdone;
+	sample->instdone1 = instdone1;
+
+	/* Release the lock */
+	spin_unlock_irqrestore(&idle_data->samples_lock, irqflags);
+
+	hrtimer_forward_now(timer, ns_to_ktime(US_TO_NS(SAMPLE_INTERVAL_US)));
+	return HRTIMER_RESTART;
+}
+
+int
+i915_trace_idle_init(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct trace_idle_data *idle_data;
+	int ret;
+
+	/* Allocate our book-keeping structure */
+	idle_data = kzalloc (sizeof (*idle_data), GFP_KERNEL);
+	if (!idle_data) {
+		printk(KERN_ERR "Failed to allocate idle tracing sample buffer\n");
+		return -ENOMEM;
+	}
+
+	idle_data->max_samples = SAMPLE_BUFFER_LENGTH;
+	idle_data->samples_lock = SPIN_LOCK_UNLOCKED;
+
+	/* Allocate memory for the recorded samples */
+	idle_data->samples = kmalloc (idle_data->max_samples *
+					sizeof (struct idle_sample),
+				      GFP_KERNEL);
+	if (!idle_data->samples) {
+		printk(KERN_ERR "Failed to allocate idle tracing sample buffer\n");
+		ret = -ENOMEM;
+		goto cleanup_idle_data;
+	}
+
+	dev_priv->trace_idle_data = idle_data;
+
+	/* XXX: THIS NEXT LINE IS MURDERING KITTENS */
+	global_dev = dev;
+
+	printk(KERN_INFO "Initialised support for tracing GPU idle data\n");
+	return 0;
+
+cleanup_idle_data:
+	kfree (idle_data);
+	return ret;
+}
+
+
+void
+i915_trace_idle_finish(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct trace_idle_data *idle_data = dev_priv->trace_idle_data;
+	if (idle_data->tracing)
+		i915_trace_idle_stop(dev);
+
+	kfree (idle_data->samples);
+	kfree (idle_data);
+	dev_priv->trace_idle_data = NULL;
+
+	printk(KERN_INFO "Cleaned up support for tracing GPU idle data\n");
+}
+
+int
+i915_trace_idle_start(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct trace_idle_data *idle_data = dev_priv->trace_idle_data;
+	unsigned long irqflags;
+	ktime_t ktime;
+
+	if (!idle_data) {
+		printk(KERN_ERR "called with no initialization\n");
+		return -EINVAL;
+	}
+
+	/* Obtain a lock to ensure we don't colide with data readout */
+	spin_lock_irqsave(&idle_data->samples_lock, irqflags);
+
+	if (idle_data->tracing) {
+		/* XXX: A race between two clients doing idle tracing? */
+		/* Release the lock */
+		spin_unlock_irqrestore(&idle_data->samples_lock, irqflags);
+		printk(KERN_INFO "Already tracing GPU idle performance\n");
+		return -EBUSY;
+	}
+
+	/* Zero any previous samples recorded */
+	idle_data->num_samples = 0;
+	idle_data->tracing = true;
+	idle_data->warned_overflow = false;
+
+	/* Release the lock */
+	spin_unlock_irqrestore(&idle_data->samples_lock, irqflags);
+
+	hrtimer_init(&idle_data->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	idle_data->timer.function = &i915_trace_idle_timer_callback;
+	/* XXX: Wouldn't it be nice if we could pass data to the timer callback? */
+
+	ktime = ktime_set(0, US_TO_NS(SAMPLE_INTERVAL_US));
+	hrtimer_start(&idle_data->timer, ktime, HRTIMER_MODE_REL);
+	return 0;
+}
+
+int
+i915_trace_idle_stop(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct trace_idle_data *idle_data = dev_priv->trace_idle_data;
+	unsigned long irqflags;
+
+	if (!idle_data) {
+		printk(KERN_ERR "called with no initialization\n");
+		return -EINVAL;
+	}
+
+	/* Obtain a lock to ensure we don't colide with data readout */
+	spin_lock_irqsave(&idle_data->samples_lock, irqflags);
+
+	if (!idle_data->tracing) {
+		/* XXX: A race between two clients doing idle tracing? */
+		printk(KERN_INFO "Not currently tracing GPU idle performance\n");
+		return -EINVAL;
+	}
+
+	idle_data->tracing = false;
+
+	/* Release the lock */
+	spin_unlock_irqrestore(&idle_data->samples_lock, irqflags);
+
+	hrtimer_cancel(&idle_data->timer);
+	return 0;
+}
+
+int
+i915_trace_idle_debugfs_info(struct seq_file *m, void *data)
+{
+	struct drm_info_node *node = (struct drm_info_node *) m->private;
+	struct drm_device *dev = node->minor->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct trace_idle_data *idle_data = dev_priv->trace_idle_data;
+	unsigned long irqflags;
+	struct idle_sample *samples;
+	int num_samples;
+	bool warned_overflow;
+	bool tracing;
+	int i;
+
+	if (!idle_data) {
+		seq_printf(m, "Idle tracing not inisialized\n");
+		return 0;
+	}
+
+	/* Allocate some space to copy the data to. */
+	/* We can't do this whilst holding the spinlock. Since I don't
+	 * know if seq_printf and friends are safe to call whilst I hold
+	 * a spinlock, I'm copying the data here.
+	 */
+	samples = kmalloc (SAMPLE_BUFFER_LENGTH * sizeof (struct idle_sample),
+			   GFP_KERNEL);
+	if (!samples) {
+		printk(KERN_ERR "Failed to allocate temporary sample buffer for output\n");
+		return -ENOMEM;
+	}
+
+	/* Obtain a lock to ensure we don't colide with the sampling timer */
+	spin_lock_irqsave(&idle_data->samples_lock, irqflags);
+
+	/* Copy the samples */
+	num_samples = idle_data->num_samples;
+	warned_overflow = idle_data->warned_overflow;
+	tracing = idle_data->tracing;
+
+	if (num_samples > 0)
+		memcpy (samples, idle_data->samples,
+			num_samples * sizeof (struct idle_sample));
+
+	/* Release the lock */
+	spin_unlock_irqrestore(&idle_data->samples_lock, irqflags);
+
+	seq_printf(m, "The sample buffer has %d samples out of a max possible of %d\n",
+		      num_samples, SAMPLE_BUFFER_LENGTH);
+	if (warned_overflow)
+		seq_printf(m, "The sample buffer overflowed, so later samples were lost\n");
+
+	seq_printf(m, "SAMPLE:  INSTDONE    INSTDONE1\n");
+	for (i = 0; i < num_samples; i++) {
+		seq_printf(m, "%06d:  0x%08x  0x%08x\n",
+		               i, samples[i].instdone, samples[i].instdone1);
+	}
+	seq_printf(m, "END\n");
+
+	kfree (samples);
+	return 0;
+}
+
+
+/* IOCTL controlling idle tracing */
+int
+i915_trace_idle_ioctl(struct drm_device *dev, void *data,
+		      struct drm_file *file_priv)
+{
+	struct drm_i915_trace_idle *args = data;
+
+	if (args->start_trace)
+		i915_trace_idle_start (dev);
+	else
+		i915_trace_idle_stop (dev);
+
+	return 0;
+};
diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h
index 8c641be..32eb48f 100644
--- a/include/drm/i915_drm.h
+++ b/include/drm/i915_drm.h
@@ -198,6 +198,7 @@ typedef struct _drm_i915_sarea {
 #define DRM_I915_OVERLAY_PUT_IMAGE	0x27
 #define DRM_I915_OVERLAY_ATTRS	0x28
 #define DRM_I915_GEM_EXECBUFFER2	0x29
+#define DRM_I915_TRACE_IDLE	0x30
 
 #define DRM_IOCTL_I915_INIT		DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t)
 #define DRM_IOCTL_I915_FLUSH		DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH)
@@ -239,6 +240,7 @@ typedef struct _drm_i915_sarea {
 #define DRM_IOCTL_I915_GEM_MADVISE	DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_MADVISE, struct drm_i915_gem_madvise)
 #define DRM_IOCTL_I915_OVERLAY_PUT_IMAGE	DRM_IOW(DRM_COMMAND_BASE + DRM_IOCTL_I915_OVERLAY_ATTRS, struct drm_intel_overlay_put_image)
 #define DRM_IOCTL_I915_OVERLAY_ATTRS	DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_OVERLAY_ATTRS, struct drm_intel_overlay_attrs)
+#define DRM_IOCTL_I915_TRACE_IDLE	DRM_IOW(DRM_COMMAND_BASE + DRM_I915_TRACE_IDLE, struct drm_i915_trace_idle)
 
 /* Allow drivers to submit batchbuffers directly to hardware, relying
  * on the security mechanisms provided by hardware.
@@ -633,11 +635,17 @@ struct drm_i915_gem_execbuffer2 {
 #define I915_EXEC_RENDER                 (1<<0)
 #define I915_EXEC_BSD                    (2<<0)
 #define I915_EXEC_BLT                    (3<<0)
+
+#define I915_EXEC_TRACE_IDLE             (1<<3)
 	__u64 flags;
 	__u64 rsvd1;
 	__u64 rsvd2;
 };
 
+struct drm_i915_trace_idle {
+	__u32 start_trace;
+};
+
 struct drm_i915_gem_pin {
 	/** Handle of the buffer to be pinned. */
 	__u32 handle;
-- 
1.7.1

>From 9178e7600a31b5dcf52ae216125da9d4ef8703f2 Mon Sep 17 00:00:00 2001
From: Peter Clifton <pc...@cam.ac.uk>
Date: Sun, 31 Oct 2010 02:17:25 +0000
Subject: [PATCH] Expose the IOCTL for tracing the GPU's idle status

---
 include/drm/i915_drm.h    |    8 ++++++++
 intel/intel_bufmgr.c      |    5 +++++
 intel/intel_bufmgr.h      |    1 +
 intel/intel_bufmgr_gem.c  |   21 +++++++++++++++++++++
 intel/intel_bufmgr_priv.h |    2 ++
 5 files changed, 37 insertions(+), 0 deletions(-)

diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h
index 19da2c0..874f721 100644
--- a/include/drm/i915_drm.h
+++ b/include/drm/i915_drm.h
@@ -189,6 +189,7 @@ typedef struct _drm_i915_sarea {
 #define DRM_I915_OVERLAY_PUT_IMAGE	0x27
 #define DRM_I915_OVERLAY_ATTRS	0x28
 #define DRM_I915_GEM_EXECBUFFER2	0x29
+#define DRM_I915_TRACE_IDLE	0x30
 
 #define DRM_IOCTL_I915_INIT		DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t)
 #define DRM_IOCTL_I915_FLUSH		DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH)
@@ -230,6 +231,7 @@ typedef struct _drm_i915_sarea {
 #define DRM_IOCTL_I915_GEM_MADVISE	DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_MADVISE, struct drm_i915_gem_madvise)
 #define DRM_IOCTL_I915_OVERLAY_PUT_IMAGE	DRM_IOW(DRM_COMMAND_BASE + DRM_IOCTL_I915_OVERLAY_ATTRS, struct drm_intel_overlay_put_image)
 #define DRM_IOCTL_I915_OVERLAY_ATTRS	DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_OVERLAY_ATTRS, struct drm_intel_overlay_attrs)
+#define DRM_IOCTL_I915_TRACE_IDLE	DRM_IOW(DRM_COMMAND_BASE + DRM_I915_TRACE_IDLE, struct drm_i915_trace_idle)
 
 /* Allow drivers to submit batchbuffers directly to hardware, relying
  * on the security mechanisms provided by hardware.
@@ -625,11 +627,17 @@ struct drm_i915_gem_execbuffer2 {
 #define I915_EXEC_RENDER                 (1<<0)
 #define I915_EXEC_BSD                    (2<<0)
 #define I915_EXEC_BLT                    (3<<0)
+
+#define I915_EXEC_TRACE_IDLE             (1<<3)
 	__u64 flags;
 	__u64 rsvd1;
 	__u64 rsvd2;
 };
 
+struct drm_i915_trace_idle {
+	__u32 start_trace;
+};
+
 struct drm_i915_gem_pin {
 	/** Handle of the buffer to be pinned. */
 	__u32 handle;
diff --git a/intel/intel_bufmgr.c b/intel/intel_bufmgr.c
index 2b4e888..184365c 100644
--- a/intel/intel_bufmgr.c
+++ b/intel/intel_bufmgr.c
@@ -274,3 +274,8 @@ int drm_intel_get_pipe_from_crtc_id(drm_intel_bufmgr *bufmgr, int crtc_id)
 		return bufmgr->get_pipe_from_crtc_id(bufmgr, crtc_id);
 	return -1;
 }
+
+int drm_intel_trace_idle(drm_intel_bufmgr *bufmgr, int start_trace)
+{
+	return bufmgr->trace_idle(bufmgr, start_trace);
+}
diff --git a/intel/intel_bufmgr.h b/intel/intel_bufmgr.h
index 9df5168..c390686 100644
--- a/intel/intel_bufmgr.h
+++ b/intel/intel_bufmgr.h
@@ -135,6 +135,7 @@ int drm_intel_bo_madvise(drm_intel_bo *bo, int madv);
 int drm_intel_bo_disable_reuse(drm_intel_bo *bo);
 int drm_intel_bo_is_reusable(drm_intel_bo *bo);
 int drm_intel_bo_references(drm_intel_bo *bo, drm_intel_bo *target_bo);
+int drm_intel_trace_idle(drm_intel_bufmgr *bufmgr, int start_trace);
 
 /* drm_intel_bufmgr_gem.c */
 drm_intel_bufmgr *drm_intel_bufmgr_gem_init(int fd, int batch_size);
diff --git a/intel/intel_bufmgr_gem.c b/intel/intel_bufmgr_gem.c
index 37a3691..ba840cd 100644
--- a/intel/intel_bufmgr_gem.c
+++ b/intel/intel_bufmgr_gem.c
@@ -2022,6 +2022,26 @@ drm_intel_gem_bo_references(drm_intel_bo *bo, drm_intel_bo *target_bo)
 	return 0;
 }
 
+static int
+drm_intel_gem_trace_idle(drm_intel_bufmgr *bufmgr, int start_trace)
+{
+	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
+	struct drm_i915_trace_idle trace_idle;
+	int ret;
+
+	memset(&trace_idle, 0, sizeof(trace_idle));
+	trace_idle.start_trace = start_trace;
+	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_TRACE_IDLE, &trace_idle);
+
+	if (ret) {
+		fprintf(stderr, "DRM_IOCTL_I915_TRACE_IDLE failed: %s\n",
+			strerror(errno));
+		return -errno;
+	}
+
+	return 0;
+}
+
 static void
 add_bucket(drm_intel_bufmgr_gem *bufmgr_gem, int size)
 {
@@ -2207,6 +2227,7 @@ drm_intel_bufmgr_gem_init(int fd, int batch_size)
 	bufmgr_gem->bufmgr.get_pipe_from_crtc_id =
 	    drm_intel_gem_get_pipe_from_crtc_id;
 	bufmgr_gem->bufmgr.bo_references = drm_intel_gem_bo_references;
+	bufmgr_gem->bufmgr.trace_idle = drm_intel_gem_trace_idle;
 
 	init_cache_buckets(bufmgr_gem);
 
diff --git a/intel/intel_bufmgr_priv.h b/intel/intel_bufmgr_priv.h
index 87e91e7..7a2594f 100644
--- a/intel/intel_bufmgr_priv.h
+++ b/intel/intel_bufmgr_priv.h
@@ -276,6 +276,8 @@ struct _drm_intel_bufmgr {
 	/** Returns true if target_bo is in the relocation tree rooted at bo. */
 	int (*bo_references) (drm_intel_bo *bo, drm_intel_bo *target_bo);
 
+	int (*trace_idle) (drm_intel_bufmgr *bufmgr, int start_trace);
+
 	/**< Enables verbose debugging printouts */
 	int debug;
 };
-- 
1.7.1

diff --git a/src/mesa/drivers/dri/intel/intel_batchbuffer.c b/src/mesa/drivers/dri/intel/intel_batchbuffer.c
index 9b39823..005794b 100644
--- a/src/mesa/drivers/dri/intel/intel_batchbuffer.c
+++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.c
@@ -125,6 +125,8 @@ _intel_batchbuffer_flush(struct intel_batchbuffer *batch, const char *file,
    if (intel->first_post_swapbuffers_batch == NULL) {
       intel->first_post_swapbuffers_batch = intel->batch->buf;
       drm_intel_bo_reference(intel->first_post_swapbuffers_batch);
+      /* XXX: HACK PUTTING THIS HERE */
+      drm_intel_trace_idle (intel->bufmgr, 1);
    }
 
    if (used == 0)
diff --git a/src/mesa/drivers/dri/intel/intel_context.c b/src/mesa/drivers/dri/intel/intel_context.c
index 7ace50b..577f7f8 100644
--- a/src/mesa/drivers/dri/intel/intel_context.c
+++ b/src/mesa/drivers/dri/intel/intel_context.c
@@ -566,6 +566,8 @@ intel_glFlush(struct gl_context *ctx)
    intel_flush(ctx);
    intel_flush_front(ctx);
    intel->need_throttle = GL_TRUE;
+   /* XXX: HACK PUTTING THIS HERE */
+   drm_intel_trace_idle (intel->bufmgr, 0);
 }
 
 void
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to