From: Tvrtko Ursulin <tvrtko.ursu...@intel.com>

We can rely on context complete interrupt to wake up the waiters
apart in the case where requests are merged into a single ELSP
submission. In this case we inject MI_USER_INTERRUPTS in the
ring buffer to ensure prompt wake-ups.

This optimization has the effect on for example GLBenchmark
Egypt off-screen test of decreasing the number of generated
interrupts per second by a factor of two, and context switched
by factor of five to six.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursu...@intel.com>
Cc: Chris Wilson <ch...@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.h  |  1 +
 drivers/gpu/drm/i915/i915_irq.c  | 24 ++++++++++++------------
 drivers/gpu/drm/i915/intel_lrc.c | 26 +++++++++++++++++++++++++-
 3 files changed, 38 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 1d28d90ed901..f6fd2023aaf0 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2811,6 +2811,7 @@ ibx_disable_display_interrupt(struct drm_i915_private 
*dev_priv, uint32_t bits)
        ibx_display_interrupt_update(dev_priv, bits, 0);
 }
 
+void intel_notify_ring(struct intel_engine_cs *ring);
 
 /* i915_gem.c */
 int i915_gem_create_ioctl(struct drm_device *dev, void *data,
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 3f8c753997ba..a2087bad0d64 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -994,7 +994,7 @@ static void ironlake_rps_change_irq_handler(struct 
drm_device *dev)
        return;
 }
 
-static void notify_ring(struct intel_engine_cs *ring)
+void intel_notify_ring(struct intel_engine_cs *ring)
 {
        if (!intel_ring_initialized(ring))
                return;
@@ -1291,9 +1291,9 @@ static void ilk_gt_irq_handler(struct drm_device *dev,
 {
        if (gt_iir &
            (GT_RENDER_USER_INTERRUPT | GT_RENDER_PIPECTL_NOTIFY_INTERRUPT))
-               notify_ring(&dev_priv->ring[RCS]);
+               intel_notify_ring(&dev_priv->ring[RCS]);
        if (gt_iir & ILK_BSD_USER_INTERRUPT)
-               notify_ring(&dev_priv->ring[VCS]);
+               intel_notify_ring(&dev_priv->ring[VCS]);
 }
 
 static void snb_gt_irq_handler(struct drm_device *dev,
@@ -1303,11 +1303,11 @@ static void snb_gt_irq_handler(struct drm_device *dev,
 
        if (gt_iir &
            (GT_RENDER_USER_INTERRUPT | GT_RENDER_PIPECTL_NOTIFY_INTERRUPT))
-               notify_ring(&dev_priv->ring[RCS]);
+               intel_notify_ring(&dev_priv->ring[RCS]);
        if (gt_iir & GT_BSD_USER_INTERRUPT)
-               notify_ring(&dev_priv->ring[VCS]);
+               intel_notify_ring(&dev_priv->ring[VCS]);
        if (gt_iir & GT_BLT_USER_INTERRUPT)
-               notify_ring(&dev_priv->ring[BCS]);
+               intel_notify_ring(&dev_priv->ring[BCS]);
 
        if (gt_iir & (GT_BLT_CS_ERROR_INTERRUPT |
                      GT_BSD_CS_ERROR_INTERRUPT |
@@ -1322,7 +1322,7 @@ static __always_inline void
 gen8_cs_irq_handler(struct intel_engine_cs *ring, u32 iir, int test_shift)
 {
        if (iir & (GT_RENDER_USER_INTERRUPT << test_shift))
-               notify_ring(ring);
+               intel_notify_ring(ring);
        if (iir & (GT_CONTEXT_SWITCH_INTERRUPT << test_shift))
                intel_lrc_irq_handler(ring);
 }
@@ -1629,7 +1629,7 @@ static void gen6_rps_irq_handler(struct drm_i915_private 
*dev_priv, u32 pm_iir)
 
        if (HAS_VEBOX(dev_priv->dev)) {
                if (pm_iir & PM_VEBOX_USER_INTERRUPT)
-                       notify_ring(&dev_priv->ring[VECS]);
+                       intel_notify_ring(&dev_priv->ring[VECS]);
 
                if (pm_iir & PM_VEBOX_CS_ERROR_INTERRUPT)
                        DRM_DEBUG("Command parser error, pm_iir 0x%08x\n", 
pm_iir);
@@ -3961,7 +3961,7 @@ static irqreturn_t i8xx_irq_handler(int irq, void *arg)
                new_iir = I915_READ16(IIR); /* Flush posted writes */
 
                if (iir & I915_USER_INTERRUPT)
-                       notify_ring(&dev_priv->ring[RCS]);
+                       intel_notify_ring(&dev_priv->ring[RCS]);
 
                for_each_pipe(dev_priv, pipe) {
                        int plane = pipe;
@@ -4157,7 +4157,7 @@ static irqreturn_t i915_irq_handler(int irq, void *arg)
                new_iir = I915_READ(IIR); /* Flush posted writes */
 
                if (iir & I915_USER_INTERRUPT)
-                       notify_ring(&dev_priv->ring[RCS]);
+                       intel_notify_ring(&dev_priv->ring[RCS]);
 
                for_each_pipe(dev_priv, pipe) {
                        int plane = pipe;
@@ -4387,9 +4387,9 @@ static irqreturn_t i965_irq_handler(int irq, void *arg)
                new_iir = I915_READ(IIR); /* Flush posted writes */
 
                if (iir & I915_USER_INTERRUPT)
-                       notify_ring(&dev_priv->ring[RCS]);
+                       intel_notify_ring(&dev_priv->ring[RCS]);
                if (iir & I915_BSD_USER_INTERRUPT)
-                       notify_ring(&dev_priv->ring[VCS]);
+                       intel_notify_ring(&dev_priv->ring[VCS]);
 
                for_each_pipe(dev_priv, pipe) {
                        if (pipe_stats[pipe] & 
PIPE_START_VBLANK_INTERRUPT_STATUS &&
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 27f06198a51e..d9be878dbde7 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -359,6 +359,13 @@ static void execlists_elsp_write(struct 
drm_i915_gem_request *rq0,
        spin_unlock(&dev_priv->uncore.lock);
 }
 
+static void execlists_emit_user_interrupt(struct drm_i915_gem_request *req)
+{
+       struct intel_ringbuffer *ringbuf = req->ringbuf;
+
+       iowrite32(MI_USER_INTERRUPT, ringbuf->virtual_start + req->tail - 8);
+}
+
 static int execlists_update_context(struct drm_i915_gem_request *rq)
 {
        struct intel_engine_cs *ring = rq->ring;
@@ -433,6 +440,12 @@ static void execlists_context_unqueue(struct 
intel_engine_cs *ring)
                        cursor->elsp_submitted = req0->elsp_submitted;
                        list_move_tail(&req0->execlist_link,
                                       &ring->execlist_retired_req_list);
+                       /*
+                        * When merging requests make sure there is still
+                        * something after each batch buffer to wake up waiters.
+                        */
+                       if (cursor != req0)
+                               execlists_emit_user_interrupt(req0);
                        req0 = cursor;
                } else {
                        req1 = cursor;
@@ -472,6 +485,12 @@ static bool execlists_check_remove_request(struct 
intel_engine_cs *ring,
 
        assert_spin_locked(&ring->execlist_lock);
 
+       /*
+        * This is effectively a context complete interrupt so wake
+        * up potential waiters on this ring.
+        */
+       intel_notify_ring(ring);
+
        head_req = list_first_entry_or_null(&ring->execlist_queue,
                                            struct drm_i915_gem_request,
                                            execlist_link);
@@ -1812,7 +1831,12 @@ static int gen8_emit_request(struct drm_i915_gem_request 
*request)
                                (I915_GEM_HWS_INDEX << 
MI_STORE_DWORD_INDEX_SHIFT)));
        intel_logical_ring_emit(ringbuf, 0);
        intel_logical_ring_emit(ringbuf, i915_gem_request_get_seqno(request));
-       intel_logical_ring_emit(ringbuf, MI_USER_INTERRUPT);
+       /*
+        * Following noop is a placeholder for an optional MI_USER_INTERRUPT
+        * which will be patched in at ELSP submission time if this request
+        * will not be ending with a context complete interrupt.
+        */
+       intel_logical_ring_emit(ringbuf, MI_NOOP);
        intel_logical_ring_emit(ringbuf, MI_NOOP);
        intel_logical_ring_advance_and_submit(request);
 
-- 
1.9.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to