From: Tvrtko Ursulin <tvrtko.ursu...@intel.com>

This eliminates six needless spin lock/unlock pairs when writing out ELSP. Apart
from tidier code main benefit is between 0.51% and 0.73% speedup on some OGL
tests under CHV (bench_OglBatch4 bench_OglDeferred respectively).

Kindly benchmarked by Ben Widawsky.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursu...@intel.com>
Cc: Dave Gordon <david.s.gor...@intel.com>
Cc: Daniel Vetter <daniel.vet...@ffwll.ch>
Cc: Ben Widawsky <b...@bwidawsk.net>
---
 drivers/gpu/drm/i915/i915_drv.h     | 15 +++++++++++++++
 drivers/gpu/drm/i915/intel_lrc.c    | 13 ++++++-------
 drivers/gpu/drm/i915/intel_uncore.c | 14 --------------
 3 files changed, 21 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 66f0c60..33d577a 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3197,6 +3197,21 @@ int vlv_freq_opcode(struct drm_i915_private *dev_priv, 
int val);
 #define POSTING_READ(reg)      (void)I915_READ_NOTRACE(reg)
 #define POSTING_READ16(reg)    (void)I915_READ16_NOTRACE(reg)
 
+/* Raw MMIO access with no forcewake handling, use with care. */
+#define __raw_i915_read8(dev_priv__, reg__) readb((dev_priv__)->regs + (reg__))
+#define __raw_i915_write8(dev_priv__, reg__, val__) writeb(val__, 
(dev_priv__)->regs + (reg__))
+
+#define __raw_i915_read16(dev_priv__, reg__) readw((dev_priv__)->regs + 
(reg__))
+#define __raw_i915_write16(dev_priv__, reg__, val__) writew(val__, 
(dev_priv__)->regs + (reg__))
+
+#define __raw_i915_read32(dev_priv__, reg__) readl((dev_priv__)->regs + 
(reg__))
+#define __raw_i915_write32(dev_priv__, reg__, val__) writel(val__, 
(dev_priv__)->regs + (reg__))
+
+#define __raw_i915_read64(dev_priv__, reg__) readq((dev_priv__)->regs + 
(reg__))
+#define __raw_i915_write64(dev_priv__, reg__, val__) writeq(val__, 
(dev_priv__)->regs + (reg__))
+
+#define __raw_posting_read(dev_priv__, reg__) 
(void)__raw_i915_read32(dev_priv__, reg__)
+
 /* "Broadcast RGB" property */
 #define INTEL_BROADCAST_RGB_AUTO 0
 #define INTEL_BROADCAST_RGB_FULL 1
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index e405b61..e22b866 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -305,6 +305,7 @@ static void execlists_elsp_write(struct intel_engine_cs 
*ring,
         * Instead, we do the runtime_pm_get/put when creating/destroying 
requests.
         */
        spin_lock_irqsave(&dev_priv->uncore.lock, flags);
+
        if (IS_CHERRYVIEW(dev) || INTEL_INFO(dev)->gen >= 9) {
                if (dev_priv->uncore.fw_rendercount++ == 0)
                        dev_priv->uncore.funcs.force_wake_get(dev_priv,
@@ -322,19 +323,17 @@ static void execlists_elsp_write(struct intel_engine_cs 
*ring,
                        dev_priv->uncore.funcs.force_wake_get(dev_priv,
                                                              FORCEWAKE_ALL);
        }
-       spin_unlock_irqrestore(&dev_priv->uncore.lock, flags);
 
-       I915_WRITE(RING_ELSP(ring), desc[1]);
-       I915_WRITE(RING_ELSP(ring), desc[0]);
-       I915_WRITE(RING_ELSP(ring), desc[3]);
+       __raw_i915_write32(dev_priv, RING_ELSP(ring), desc[1]);
+       __raw_i915_write32(dev_priv, RING_ELSP(ring), desc[0]);
+       __raw_i915_write32(dev_priv, RING_ELSP(ring), desc[3]);
        /* The context is automatically loaded after the following */
-       I915_WRITE(RING_ELSP(ring), desc[2]);
+       __raw_i915_write32(dev_priv, RING_ELSP(ring), desc[2]);
 
        /* ELSP is a wo register, so use another nearby reg for posting instead 
*/
-       POSTING_READ(RING_EXECLIST_STATUS(ring));
+       __raw_posting_read(dev_priv, RING_EXECLIST_STATUS(ring));
 
        /* Release Force Wakeup (see the big comment above). */
-       spin_lock_irqsave(&dev_priv->uncore.lock, flags);
        if (IS_CHERRYVIEW(dev) || INTEL_INFO(dev)->gen >= 9) {
                if (--dev_priv->uncore.fw_rendercount == 0)
                        dev_priv->uncore.funcs.force_wake_put(dev_priv,
diff --git a/drivers/gpu/drm/i915/intel_uncore.c 
b/drivers/gpu/drm/i915/intel_uncore.c
index e9561de..9a31932 100644
--- a/drivers/gpu/drm/i915/intel_uncore.c
+++ b/drivers/gpu/drm/i915/intel_uncore.c
@@ -26,20 +26,6 @@
 
 #define FORCEWAKE_ACK_TIMEOUT_MS 2
 
-#define __raw_i915_read8(dev_priv__, reg__) readb((dev_priv__)->regs + (reg__))
-#define __raw_i915_write8(dev_priv__, reg__, val__) writeb(val__, 
(dev_priv__)->regs + (reg__))
-
-#define __raw_i915_read16(dev_priv__, reg__) readw((dev_priv__)->regs + 
(reg__))
-#define __raw_i915_write16(dev_priv__, reg__, val__) writew(val__, 
(dev_priv__)->regs + (reg__))
-
-#define __raw_i915_read32(dev_priv__, reg__) readl((dev_priv__)->regs + 
(reg__))
-#define __raw_i915_write32(dev_priv__, reg__, val__) writel(val__, 
(dev_priv__)->regs + (reg__))
-
-#define __raw_i915_read64(dev_priv__, reg__) readq((dev_priv__)->regs + 
(reg__))
-#define __raw_i915_write64(dev_priv__, reg__, val__) writeq(val__, 
(dev_priv__)->regs + (reg__))
-
-#define __raw_posting_read(dev_priv__, reg__) 
(void)__raw_i915_read32(dev_priv__, reg__)
-
 static void
 assert_device_not_suspended(struct drm_i915_private *dev_priv)
 {
-- 
2.2.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to