Hi guys, I thought I'd attach this, as it is now gone 2AM and I doubt I'm going to finish it "tonight". I was hoping to elicit some initial review to suggest whether the design was sane or not.
I'd originally imagined tying the profiling lifetime to the execution / completion of individual batch-buffers, but for now I'd like to get it partly working like this, and perhaps develop some user-space program to view the results and see if they make sense. The basic (kernel side) functionality is there, albeit limited by some hard-coded timing parameters and buffer sizes. I might look at whether it makes sense to do some in-kernel over-sampling and percentage generation, or just spit raw register dumps out to the debugfs interface for userspace to do that. (Buffer size might play a part in that decision). The locking is a little rough, and I probably need to double-buffer the sample buffers so I can always be sure the debugfs inteface gets a complete "frame" / "buffer" / whatever.. trace's worth of data. Perhaps I should put a semaphore around the debug data output which sleeps until the profiling has been stopped. (E.g. at the end of a frame). Currently the debugfs routine just takes a spinlock and copies out whatever samples have been gathered, meaning the only reliable way to see a full frame's worth of data is for it to be the last frame before the instrumented client quit. Part of me did wonder about doing a constant stream of data to userspace.. but then I quickly realised I had no idea how to do that, and what to do if userspace lets us overrun our buffers ;) I've got a libdrm patch to expose the new IOCTL (also attached), but I don't have a very good solution for hooking that into mesa and synchronising with frames. I applied a VERY dirty kludge for testing. Does anyone know if you can pass userdata parameters to a hrtimer? From the API, it looked not - although in that case, how do you avoid needing horrid global state variables? Regards, -- Peter Clifton Electrical Engineering Division, Engineering Department, University of Cambridge, 9, JJ Thomson Avenue, Cambridge CB3 0FA Tel: +44 (0)7729 980173 - (No signal in the lab!) Tel: +44 (0)1223 748328 - (Shared lab phone, ask for me)
>From 3a5b5950624e88bcbd44073847d27e11c8199218 Mon Sep 17 00:00:00 2001 From: Peter Clifton <pc...@cam.ac.uk> Date: Sun, 31 Oct 2010 01:27:58 +0000 Subject: [PATCH] Hacky little instdone and instdone1 profiler --- drivers/gpu/drm/i915/Makefile | 1 + drivers/gpu/drm/i915/i915_debugfs.c | 1 + drivers/gpu/drm/i915/i915_dma.c | 7 + drivers/gpu/drm/i915/i915_drv.h | 12 ++ drivers/gpu/drm/i915/i915_gem.c | 4 + drivers/gpu/drm/i915/i915_trace_idle.c | 309 ++++++++++++++++++++++++++++++++ include/drm/i915_drm.h | 8 + 7 files changed, 342 insertions(+), 0 deletions(-) create mode 100644 drivers/gpu/drm/i915/i915_trace_idle.c diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index fdc833d..45aacf8 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -10,6 +10,7 @@ i915-y := i915_drv.o i915_dma.o i915_irq.o i915_mem.o \ i915_gem_debug.o \ i915_gem_evict.o \ i915_gem_tiling.o \ + i915_trace_idle.o \ i915_trace_points.o \ intel_display.o \ intel_crt.o \ diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 7698983..82d331a 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -1043,6 +1043,7 @@ static struct drm_info_list i915_debugfs_list[] = { {"i915_sr_status", i915_sr_status, 0}, {"i915_opregion", i915_opregion, 0}, {"i915_gem_framebuffer", i915_gem_framebuffer_info, 0}, + {"i915_trace_idle", i915_trace_idle_debugfs_info, 0}, }; #define I915_DEBUGFS_ENTRIES ARRAY_SIZE(i915_debugfs_list) diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 785ee11..a41da27 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -2057,6 +2057,9 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags) dev_priv->mchdev_lock = &mchdev_lock; spin_unlock(&mchdev_lock); + /* XXX: Not sure if this belongs here or not */ + i915_trace_idle_init (dev); + return 0; out_workqueue_free: @@ -2077,6 +2080,9 @@ int i915_driver_unload(struct drm_device *dev) struct drm_i915_private *dev_priv = dev->dev_private; int ret; + /* XXX: Not sure if this belongs here or not */ + i915_trace_idle_finish (dev); + spin_lock(&mchdev_lock); i915_mch_dev = NULL; spin_unlock(&mchdev_lock); @@ -2263,6 +2269,7 @@ struct drm_ioctl_desc i915_ioctls[] = { DRM_IOCTL_DEF_DRV(I915_GEM_MADVISE, i915_gem_madvise_ioctl, DRM_UNLOCKED), DRM_IOCTL_DEF_DRV(I915_OVERLAY_PUT_IMAGE, intel_overlay_put_image, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED), DRM_IOCTL_DEF_DRV(I915_OVERLAY_ATTRS, intel_overlay_attrs, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(I915_TRACE_IDLE, i915_trace_idle_ioctl, DRM_AUTH|DRM_UNLOCKED), }; int i915_max_ioctl = DRM_ARRAY_SIZE(i915_ioctls); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 2c2c19b..274af4b 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -707,6 +707,9 @@ typedef struct drm_i915_private { /* list of fbdev register on this device */ struct intel_fbdev *fbdev; + + /* Idle tracing data */ + struct trace_idle_data *trace_idle_data; } drm_i915_private_t; /** driver private structure attached to each drm_gem_object */ @@ -1015,6 +1018,8 @@ int i915_gem_get_tiling(struct drm_device *dev, void *data, struct drm_file *file_priv); int i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); +int i915_trace_idle_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv); void i915_gem_load(struct drm_device *dev); int i915_gem_init_object(struct drm_gem_object *obj); struct drm_gem_object * i915_gem_alloc_object(struct drm_device *dev, @@ -1114,6 +1119,13 @@ extern int i915_restore_state(struct drm_device *dev); extern int i915_save_state(struct drm_device *dev); extern int i915_restore_state(struct drm_device *dev); +/* i915_trace_idle.c */ +int i915_trace_idle_init(struct drm_device *dev); +void i915_trace_idle_finish(struct drm_device *dev); +int i915_trace_idle_start(struct drm_device *dev); +int i915_trace_idle_stop(struct drm_device *dev); +int i915_trace_idle_debugfs_info(struct seq_file *m, void *data); + /* intel_i2c.c */ extern int intel_setup_gmbus(struct drm_device *dev); extern void intel_teardown_gmbus(struct drm_device *dev); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 6c2618d..392b575 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3651,6 +3651,10 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, return -EINVAL; } + if (args->flags & I915_EXEC_TRACE_IDLE) { + DRM_INFO("Batchbuffer with I915_EXEC_TRACE_IDLE\n"); + } + if (args->buffer_count < 1) { DRM_ERROR("execbuf with %d buffers\n", args->buffer_count); return -EINVAL; diff --git a/drivers/gpu/drm/i915/i915_trace_idle.c b/drivers/gpu/drm/i915/i915_trace_idle.c new file mode 100644 index 0000000..cbb640c --- /dev/null +++ b/drivers/gpu/drm/i915/i915_trace_idle.c @@ -0,0 +1,309 @@ +/* + * Copyright © 2010 Peter Clifton + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Authors: + * Peter Clifton <pc...@cam.ac.uk> + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/hrtimer.h> +#include <linux/ktime.h> +#include <linux/seq_file.h> + +#include <linux/input.h> +#include <linux/slab.h> +#include "drmP.h" +#include "intel_drv.h" +#include "i915_drm.h" +#include "i915_drv.h" + +#define SAMPLE_INTERVAL_US 10 +#define US_TO_NS(x) (x * 1E3L) +#define MS_TO_NS(x) (x * 1E6L) + +/* Number of sample buffer entries */ +#define SAMPLE_BUFFER_LENGTH 8192 + +/* XXX: OH DEAR GOODNESS DO I HATE HAVING TO MAKE THIS NASTY HACK! */ +static struct drm_device *global_dev = NULL; + +struct idle_sample { + u32 instdone; + u32 instdone1; +}; + +struct trace_idle_data { + bool tracing; + bool warned_overflow; + int max_samples; + int num_samples; + struct idle_sample *samples; + struct hrtimer timer; + spinlock_t samples_lock; +}; + + +static enum hrtimer_restart +i915_trace_idle_timer_callback(struct hrtimer *timer) +{ + struct drm_device *dev = global_dev; /* XXX: SHOULD BE PASSED TO THE TIMER SOMEHOW? */ + struct drm_i915_private *dev_priv = dev->dev_private; + struct trace_idle_data *idle_data = dev_priv->trace_idle_data; + struct idle_sample *sample; + u32 instdone; + u32 instdone1; + unsigned long irqflags; + + if (INTEL_INFO(dev)->gen < 4) { + instdone = I915_READ(INSTDONE); + instdone1 = 0; + } else { + instdone = I915_READ(INSTDONE_I965); + instdone1 = I915_READ(INSTDONE1); + } + + /* Obtain a lock to ensure we don't colide with data readout */ + spin_lock_irqsave(&idle_data->samples_lock, irqflags); + + if (idle_data->num_samples == idle_data->max_samples) { + if (!idle_data->warned_overflow) + printk(KERN_ERR "Overflow in trace idle buffer\n"); + idle_data->warned_overflow = true; + return HRTIMER_NORESTART; + } + + sample = &idle_data->samples[idle_data->num_samples++]; + sample->instdone = instdone; + sample->instdone1 = instdone1; + + /* Release the lock */ + spin_unlock_irqrestore(&idle_data->samples_lock, irqflags); + + hrtimer_forward_now(timer, ns_to_ktime(US_TO_NS(SAMPLE_INTERVAL_US))); + return HRTIMER_RESTART; +} + +int +i915_trace_idle_init(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + struct trace_idle_data *idle_data; + int ret; + + /* Allocate our book-keeping structure */ + idle_data = kzalloc (sizeof (*idle_data), GFP_KERNEL); + if (!idle_data) { + printk(KERN_ERR "Failed to allocate idle tracing sample buffer\n"); + return -ENOMEM; + } + + idle_data->max_samples = SAMPLE_BUFFER_LENGTH; + idle_data->samples_lock = SPIN_LOCK_UNLOCKED; + + /* Allocate memory for the recorded samples */ + idle_data->samples = kmalloc (idle_data->max_samples * + sizeof (struct idle_sample), + GFP_KERNEL); + if (!idle_data->samples) { + printk(KERN_ERR "Failed to allocate idle tracing sample buffer\n"); + ret = -ENOMEM; + goto cleanup_idle_data; + } + + dev_priv->trace_idle_data = idle_data; + + /* XXX: THIS NEXT LINE IS MURDERING KITTENS */ + global_dev = dev; + + printk(KERN_INFO "Initialised support for tracing GPU idle data\n"); + return 0; + +cleanup_idle_data: + kfree (idle_data); + return ret; +} + + +void +i915_trace_idle_finish(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + struct trace_idle_data *idle_data = dev_priv->trace_idle_data; + if (idle_data->tracing) + i915_trace_idle_stop(dev); + + kfree (idle_data->samples); + kfree (idle_data); + dev_priv->trace_idle_data = NULL; + + printk(KERN_INFO "Cleaned up support for tracing GPU idle data\n"); +} + +int +i915_trace_idle_start(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + struct trace_idle_data *idle_data = dev_priv->trace_idle_data; + unsigned long irqflags; + ktime_t ktime; + + if (!idle_data) { + printk(KERN_ERR "called with no initialization\n"); + return -EINVAL; + } + + /* Obtain a lock to ensure we don't colide with data readout */ + spin_lock_irqsave(&idle_data->samples_lock, irqflags); + + if (idle_data->tracing) { + /* XXX: A race between two clients doing idle tracing? */ + /* Release the lock */ + spin_unlock_irqrestore(&idle_data->samples_lock, irqflags); + printk(KERN_INFO "Already tracing GPU idle performance\n"); + return -EBUSY; + } + + /* Zero any previous samples recorded */ + idle_data->num_samples = 0; + idle_data->tracing = true; + idle_data->warned_overflow = false; + + /* Release the lock */ + spin_unlock_irqrestore(&idle_data->samples_lock, irqflags); + + hrtimer_init(&idle_data->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + idle_data->timer.function = &i915_trace_idle_timer_callback; + /* XXX: Wouldn't it be nice if we could pass data to the timer callback? */ + + ktime = ktime_set(0, US_TO_NS(SAMPLE_INTERVAL_US)); + hrtimer_start(&idle_data->timer, ktime, HRTIMER_MODE_REL); + return 0; +} + +int +i915_trace_idle_stop(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + struct trace_idle_data *idle_data = dev_priv->trace_idle_data; + unsigned long irqflags; + + if (!idle_data) { + printk(KERN_ERR "called with no initialization\n"); + return -EINVAL; + } + + /* Obtain a lock to ensure we don't colide with data readout */ + spin_lock_irqsave(&idle_data->samples_lock, irqflags); + + if (!idle_data->tracing) { + /* XXX: A race between two clients doing idle tracing? */ + printk(KERN_INFO "Not currently tracing GPU idle performance\n"); + return -EINVAL; + } + + idle_data->tracing = false; + + /* Release the lock */ + spin_unlock_irqrestore(&idle_data->samples_lock, irqflags); + + hrtimer_cancel(&idle_data->timer); + return 0; +} + +int +i915_trace_idle_debugfs_info(struct seq_file *m, void *data) +{ + struct drm_info_node *node = (struct drm_info_node *) m->private; + struct drm_device *dev = node->minor->dev; + struct drm_i915_private *dev_priv = dev->dev_private; + struct trace_idle_data *idle_data = dev_priv->trace_idle_data; + unsigned long irqflags; + struct idle_sample *samples; + int num_samples; + bool warned_overflow; + bool tracing; + int i; + + if (!idle_data) { + seq_printf(m, "Idle tracing not inisialized\n"); + return 0; + } + + /* Allocate some space to copy the data to. */ + /* We can't do this whilst holding the spinlock. Since I don't + * know if seq_printf and friends are safe to call whilst I hold + * a spinlock, I'm copying the data here. + */ + samples = kmalloc (SAMPLE_BUFFER_LENGTH * sizeof (struct idle_sample), + GFP_KERNEL); + if (!samples) { + printk(KERN_ERR "Failed to allocate temporary sample buffer for output\n"); + return -ENOMEM; + } + + /* Obtain a lock to ensure we don't colide with the sampling timer */ + spin_lock_irqsave(&idle_data->samples_lock, irqflags); + + /* Copy the samples */ + num_samples = idle_data->num_samples; + warned_overflow = idle_data->warned_overflow; + tracing = idle_data->tracing; + + if (num_samples > 0) + memcpy (samples, idle_data->samples, + num_samples * sizeof (struct idle_sample)); + + /* Release the lock */ + spin_unlock_irqrestore(&idle_data->samples_lock, irqflags); + + seq_printf(m, "The sample buffer has %d samples out of a max possible of %d\n", + num_samples, SAMPLE_BUFFER_LENGTH); + if (warned_overflow) + seq_printf(m, "The sample buffer overflowed, so later samples were lost\n"); + + seq_printf(m, "SAMPLE: INSTDONE INSTDONE1\n"); + for (i = 0; i < num_samples; i++) { + seq_printf(m, "%06d: 0x%08x 0x%08x\n", + i, samples[i].instdone, samples[i].instdone1); + } + seq_printf(m, "END\n"); + + kfree (samples); + return 0; +} + + +/* IOCTL controlling idle tracing */ +int +i915_trace_idle_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + struct drm_i915_trace_idle *args = data; + + if (args->start_trace) + i915_trace_idle_start (dev); + else + i915_trace_idle_stop (dev); + + return 0; +}; diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h index 8c641be..32eb48f 100644 --- a/include/drm/i915_drm.h +++ b/include/drm/i915_drm.h @@ -198,6 +198,7 @@ typedef struct _drm_i915_sarea { #define DRM_I915_OVERLAY_PUT_IMAGE 0x27 #define DRM_I915_OVERLAY_ATTRS 0x28 #define DRM_I915_GEM_EXECBUFFER2 0x29 +#define DRM_I915_TRACE_IDLE 0x30 #define DRM_IOCTL_I915_INIT DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t) #define DRM_IOCTL_I915_FLUSH DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH) @@ -239,6 +240,7 @@ typedef struct _drm_i915_sarea { #define DRM_IOCTL_I915_GEM_MADVISE DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_MADVISE, struct drm_i915_gem_madvise) #define DRM_IOCTL_I915_OVERLAY_PUT_IMAGE DRM_IOW(DRM_COMMAND_BASE + DRM_IOCTL_I915_OVERLAY_ATTRS, struct drm_intel_overlay_put_image) #define DRM_IOCTL_I915_OVERLAY_ATTRS DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_OVERLAY_ATTRS, struct drm_intel_overlay_attrs) +#define DRM_IOCTL_I915_TRACE_IDLE DRM_IOW(DRM_COMMAND_BASE + DRM_I915_TRACE_IDLE, struct drm_i915_trace_idle) /* Allow drivers to submit batchbuffers directly to hardware, relying * on the security mechanisms provided by hardware. @@ -633,11 +635,17 @@ struct drm_i915_gem_execbuffer2 { #define I915_EXEC_RENDER (1<<0) #define I915_EXEC_BSD (2<<0) #define I915_EXEC_BLT (3<<0) + +#define I915_EXEC_TRACE_IDLE (1<<3) __u64 flags; __u64 rsvd1; __u64 rsvd2; }; +struct drm_i915_trace_idle { + __u32 start_trace; +}; + struct drm_i915_gem_pin { /** Handle of the buffer to be pinned. */ __u32 handle; -- 1.7.1
>From 9178e7600a31b5dcf52ae216125da9d4ef8703f2 Mon Sep 17 00:00:00 2001 From: Peter Clifton <pc...@cam.ac.uk> Date: Sun, 31 Oct 2010 02:17:25 +0000 Subject: [PATCH] Expose the IOCTL for tracing the GPU's idle status --- include/drm/i915_drm.h | 8 ++++++++ intel/intel_bufmgr.c | 5 +++++ intel/intel_bufmgr.h | 1 + intel/intel_bufmgr_gem.c | 21 +++++++++++++++++++++ intel/intel_bufmgr_priv.h | 2 ++ 5 files changed, 37 insertions(+), 0 deletions(-) diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h index 19da2c0..874f721 100644 --- a/include/drm/i915_drm.h +++ b/include/drm/i915_drm.h @@ -189,6 +189,7 @@ typedef struct _drm_i915_sarea { #define DRM_I915_OVERLAY_PUT_IMAGE 0x27 #define DRM_I915_OVERLAY_ATTRS 0x28 #define DRM_I915_GEM_EXECBUFFER2 0x29 +#define DRM_I915_TRACE_IDLE 0x30 #define DRM_IOCTL_I915_INIT DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t) #define DRM_IOCTL_I915_FLUSH DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH) @@ -230,6 +231,7 @@ typedef struct _drm_i915_sarea { #define DRM_IOCTL_I915_GEM_MADVISE DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_MADVISE, struct drm_i915_gem_madvise) #define DRM_IOCTL_I915_OVERLAY_PUT_IMAGE DRM_IOW(DRM_COMMAND_BASE + DRM_IOCTL_I915_OVERLAY_ATTRS, struct drm_intel_overlay_put_image) #define DRM_IOCTL_I915_OVERLAY_ATTRS DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_OVERLAY_ATTRS, struct drm_intel_overlay_attrs) +#define DRM_IOCTL_I915_TRACE_IDLE DRM_IOW(DRM_COMMAND_BASE + DRM_I915_TRACE_IDLE, struct drm_i915_trace_idle) /* Allow drivers to submit batchbuffers directly to hardware, relying * on the security mechanisms provided by hardware. @@ -625,11 +627,17 @@ struct drm_i915_gem_execbuffer2 { #define I915_EXEC_RENDER (1<<0) #define I915_EXEC_BSD (2<<0) #define I915_EXEC_BLT (3<<0) + +#define I915_EXEC_TRACE_IDLE (1<<3) __u64 flags; __u64 rsvd1; __u64 rsvd2; }; +struct drm_i915_trace_idle { + __u32 start_trace; +}; + struct drm_i915_gem_pin { /** Handle of the buffer to be pinned. */ __u32 handle; diff --git a/intel/intel_bufmgr.c b/intel/intel_bufmgr.c index 2b4e888..184365c 100644 --- a/intel/intel_bufmgr.c +++ b/intel/intel_bufmgr.c @@ -274,3 +274,8 @@ int drm_intel_get_pipe_from_crtc_id(drm_intel_bufmgr *bufmgr, int crtc_id) return bufmgr->get_pipe_from_crtc_id(bufmgr, crtc_id); return -1; } + +int drm_intel_trace_idle(drm_intel_bufmgr *bufmgr, int start_trace) +{ + return bufmgr->trace_idle(bufmgr, start_trace); +} diff --git a/intel/intel_bufmgr.h b/intel/intel_bufmgr.h index 9df5168..c390686 100644 --- a/intel/intel_bufmgr.h +++ b/intel/intel_bufmgr.h @@ -135,6 +135,7 @@ int drm_intel_bo_madvise(drm_intel_bo *bo, int madv); int drm_intel_bo_disable_reuse(drm_intel_bo *bo); int drm_intel_bo_is_reusable(drm_intel_bo *bo); int drm_intel_bo_references(drm_intel_bo *bo, drm_intel_bo *target_bo); +int drm_intel_trace_idle(drm_intel_bufmgr *bufmgr, int start_trace); /* drm_intel_bufmgr_gem.c */ drm_intel_bufmgr *drm_intel_bufmgr_gem_init(int fd, int batch_size); diff --git a/intel/intel_bufmgr_gem.c b/intel/intel_bufmgr_gem.c index 37a3691..ba840cd 100644 --- a/intel/intel_bufmgr_gem.c +++ b/intel/intel_bufmgr_gem.c @@ -2022,6 +2022,26 @@ drm_intel_gem_bo_references(drm_intel_bo *bo, drm_intel_bo *target_bo) return 0; } +static int +drm_intel_gem_trace_idle(drm_intel_bufmgr *bufmgr, int start_trace) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; + struct drm_i915_trace_idle trace_idle; + int ret; + + memset(&trace_idle, 0, sizeof(trace_idle)); + trace_idle.start_trace = start_trace; + ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_TRACE_IDLE, &trace_idle); + + if (ret) { + fprintf(stderr, "DRM_IOCTL_I915_TRACE_IDLE failed: %s\n", + strerror(errno)); + return -errno; + } + + return 0; +} + static void add_bucket(drm_intel_bufmgr_gem *bufmgr_gem, int size) { @@ -2207,6 +2227,7 @@ drm_intel_bufmgr_gem_init(int fd, int batch_size) bufmgr_gem->bufmgr.get_pipe_from_crtc_id = drm_intel_gem_get_pipe_from_crtc_id; bufmgr_gem->bufmgr.bo_references = drm_intel_gem_bo_references; + bufmgr_gem->bufmgr.trace_idle = drm_intel_gem_trace_idle; init_cache_buckets(bufmgr_gem); diff --git a/intel/intel_bufmgr_priv.h b/intel/intel_bufmgr_priv.h index 87e91e7..7a2594f 100644 --- a/intel/intel_bufmgr_priv.h +++ b/intel/intel_bufmgr_priv.h @@ -276,6 +276,8 @@ struct _drm_intel_bufmgr { /** Returns true if target_bo is in the relocation tree rooted at bo. */ int (*bo_references) (drm_intel_bo *bo, drm_intel_bo *target_bo); + int (*trace_idle) (drm_intel_bufmgr *bufmgr, int start_trace); + /**< Enables verbose debugging printouts */ int debug; }; -- 1.7.1
diff --git a/src/mesa/drivers/dri/intel/intel_batchbuffer.c b/src/mesa/drivers/dri/intel/intel_batchbuffer.c index 9b39823..005794b 100644 --- a/src/mesa/drivers/dri/intel/intel_batchbuffer.c +++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.c @@ -125,6 +125,8 @@ _intel_batchbuffer_flush(struct intel_batchbuffer *batch, const char *file, if (intel->first_post_swapbuffers_batch == NULL) { intel->first_post_swapbuffers_batch = intel->batch->buf; drm_intel_bo_reference(intel->first_post_swapbuffers_batch); + /* XXX: HACK PUTTING THIS HERE */ + drm_intel_trace_idle (intel->bufmgr, 1); } if (used == 0) diff --git a/src/mesa/drivers/dri/intel/intel_context.c b/src/mesa/drivers/dri/intel/intel_context.c index 7ace50b..577f7f8 100644 --- a/src/mesa/drivers/dri/intel/intel_context.c +++ b/src/mesa/drivers/dri/intel/intel_context.c @@ -566,6 +566,8 @@ intel_glFlush(struct gl_context *ctx) intel_flush(ctx); intel_flush_front(ctx); intel->need_throttle = GL_TRUE; + /* XXX: HACK PUTTING THIS HERE */ + drm_intel_trace_idle (intel->bufmgr, 0); } void
_______________________________________________ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx