From: Oscar Mateo <oscar.ma...@intel.com>

Notice that the BSD invalidate bit is no longer present in GEN8, so
we can consolidate the blt and bsd ring flushes into one.

Signed-off-by: Oscar Mateo <oscar.ma...@intel.com>
---
 drivers/gpu/drm/i915/intel_lrc.c        | 80 +++++++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/intel_ringbuffer.c |  7 ---
 drivers/gpu/drm/i915/intel_ringbuffer.h | 11 +++++
 3 files changed, 91 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 3debe8b..3d7fcd6 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -343,6 +343,81 @@ static int gen8_init_render_ring(struct intel_engine_cs 
*ring)
        return ret;
 }
 
+static int gen8_emit_flush(struct intel_engine_cs *ring,
+                          struct intel_context *ctx,
+                          u32 invalidate_domains,
+                          u32 unused)
+{
+       struct intel_ringbuffer *ringbuf = logical_ringbuf_get(ring, ctx);
+       uint32_t cmd;
+       int ret;
+
+       ret = intel_logical_ring_begin(ring, ctx, 4);
+       if (ret)
+               return ret;
+
+       cmd = MI_FLUSH_DW + 1;
+
+       /*
+        * Bspec vol 1c.3 - blitter engine command streamer:
+        * "If ENABLED, all TLBs will be invalidated once the flush
+        * operation is complete. This bit is only valid when the
+        * Post-Sync Operation field is a value of 1h or 3h."
+        */
+       if (invalidate_domains & I915_GEM_DOMAIN_RENDER)
+               cmd |= MI_INVALIDATE_TLB | MI_FLUSH_DW_STORE_INDEX |
+                       MI_FLUSH_DW_OP_STOREDW;
+       intel_logical_ring_emit(ringbuf, cmd);
+       intel_logical_ring_emit(ringbuf, I915_GEM_HWS_SCRATCH_ADDR | 
MI_FLUSH_DW_USE_GTT);
+       intel_logical_ring_emit(ringbuf, 0); /* upper addr */
+       intel_logical_ring_emit(ringbuf, 0); /* value */
+       intel_logical_ring_advance(ringbuf);
+
+       return 0;
+}
+
+static int gen8_emit_flush_render(struct intel_engine_cs *ring,
+                                 struct intel_context *ctx,
+                                 u32 invalidate_domains,
+                                 u32 flush_domains)
+{
+       struct intel_ringbuffer *ringbuf = logical_ringbuf_get(ring, ctx);
+       u32 flags = 0;
+       u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;
+       int ret;
+
+       flags |= PIPE_CONTROL_CS_STALL;
+
+       if (flush_domains) {
+               flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
+               flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
+       }
+       if (invalidate_domains) {
+               flags |= PIPE_CONTROL_TLB_INVALIDATE;
+               flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
+               flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
+               flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
+               flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
+               flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
+               flags |= PIPE_CONTROL_QW_WRITE;
+               flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
+       }
+
+       ret = intel_logical_ring_begin(ring, ctx, 6);
+       if (ret)
+               return ret;
+
+       intel_logical_ring_emit(ringbuf, GFX_OP_PIPE_CONTROL(6));
+       intel_logical_ring_emit(ringbuf, flags);
+       intel_logical_ring_emit(ringbuf, scratch_addr);
+       intel_logical_ring_emit(ringbuf, 0);
+       intel_logical_ring_emit(ringbuf, 0);
+       intel_logical_ring_emit(ringbuf, 0);
+       intel_logical_ring_advance(ringbuf);
+
+       return 0;
+}
+
 static u32 gen8_get_seqno(struct intel_engine_cs *ring, bool lazy_coherency)
 {
        return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
@@ -491,6 +566,7 @@ static int logical_render_ring_init(struct drm_device *dev)
        ring->set_seqno = gen8_set_seqno;
        ring->submit_ctx = gen8_submit_ctx;
        ring->emit_request = gen8_emit_request_render;
+       ring->emit_flush = gen8_emit_flush_render;
 
        return logical_ring_init(dev, ring);
 }
@@ -511,6 +587,7 @@ static int logical_bsd_ring_init(struct drm_device *dev)
        ring->set_seqno = gen8_set_seqno;
        ring->submit_ctx = gen8_submit_ctx;
        ring->emit_request = gen8_emit_request;
+       ring->emit_flush = gen8_emit_flush;
 
        return logical_ring_init(dev, ring);
 }
@@ -531,6 +608,7 @@ static int logical_bsd2_ring_init(struct drm_device *dev)
        ring->set_seqno = gen8_set_seqno;
        ring->submit_ctx = gen8_submit_ctx;
        ring->emit_request = gen8_emit_request;
+       ring->emit_flush = gen8_emit_flush;
 
        return logical_ring_init(dev, ring);
 }
@@ -551,6 +629,7 @@ static int logical_blt_ring_init(struct drm_device *dev)
        ring->set_seqno = gen8_set_seqno;
        ring->submit_ctx = gen8_submit_ctx;
        ring->emit_request = gen8_emit_request;
+       ring->emit_flush = gen8_emit_flush;
 
        return logical_ring_init(dev, ring);
 }
@@ -571,6 +650,7 @@ static int logical_vebox_ring_init(struct drm_device *dev)
        ring->set_seqno = gen8_set_seqno;
        ring->submit_ctx = gen8_submit_ctx;
        ring->emit_request = gen8_emit_request;
+       ring->emit_flush = gen8_emit_flush;
 
        return logical_ring_init(dev, ring);
 }
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c 
b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 137ee9a..a128f6f 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -33,13 +33,6 @@
 #include "i915_trace.h"
 #include "intel_drv.h"
 
-/* Early gen2 devices have a cacheline of just 32 bytes, using 64 is overkill,
- * but keeps the logic simple. Indeed, the whole purpose of this macro is just
- * to give some inclination as to some of the magic values used in the various
- * workarounds!
- */
-#define CACHELINE_BYTES 64
-
 bool
 intel_ring_initialized(struct intel_engine_cs *ring)
 {
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h 
b/drivers/gpu/drm/i915/intel_ringbuffer.h
index d8ded14..527db2a 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -5,6 +5,13 @@
 
 #define I915_CMD_HASH_ORDER 9
 
+/* Early gen2 devices have a cacheline of just 32 bytes, using 64 is overkill,
+ * but keeps the logic simple. Indeed, the whole purpose of this macro is just
+ * to give some inclination as to some of the magic values used in the various
+ * workarounds!
+ */
+#define CACHELINE_BYTES 64
+
 /*
  * Gen2 BSpec "1. Programming Environment" / 1.4.4.6 "Ring Buffer Use"
  * Gen3 BSpec "vol1c Memory Interface Functions" / 2.3.4.5 "Ring Buffer Use"
@@ -153,6 +160,10 @@ struct  intel_engine_cs {
                                      struct intel_context *ctx, u32 value);
        int             (*emit_request)(struct intel_engine_cs *ring,
                                        struct intel_context *ctx);
+       int __must_check (*emit_flush)(struct intel_engine_cs *ring,
+                                      struct intel_context *ctx,
+                                      u32 invalidate_domains,
+                                      u32 flush_domains);
 
        /**
         * List of objects currently involved in rendering from the
-- 
1.9.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to