From: Daniele Ceraolo Spurio <daniele.ceraolospu...@intel.com>

These functions are independent from the backend used and can therefore
be split out of the exelists submission file, so they can be re-used by
the upcoming GuC submission backend.

Based on a patch by Chris Wilson.

Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospu...@intel.com>
Cc: Chris P Wilson <chris.p.wil...@intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursu...@linux.intel.com>
Reviewed-by: John Harrison <john.c.harri...@intel.com>
---
 drivers/gpu/drm/i915/Makefile                 |   1 +
 drivers/gpu/drm/i915/gt/gen8_engine_cs.c      | 390 +++++++++++++++++
 drivers/gpu/drm/i915/gt/gen8_engine_cs.h      |  27 ++
 .../drm/i915/gt/intel_execlists_submission.c  | 395 +-----------------
 4 files changed, 424 insertions(+), 389 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/gt/gen8_engine_cs.c
 create mode 100644 drivers/gpu/drm/i915/gt/gen8_engine_cs.h

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index aedbd8f52be8..f9ef5199b124 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -82,6 +82,7 @@ gt-y += \
        gt/gen6_engine_cs.o \
        gt/gen6_ppgtt.o \
        gt/gen7_renderclear.o \
+       gt/gen8_engine_cs.o \
        gt/gen8_ppgtt.o \
        gt/intel_breadcrumbs.o \
        gt/intel_context.o \
diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c 
b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
new file mode 100644
index 000000000000..6a92daa39f40
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
@@ -0,0 +1,390 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2014 Intel Corporation
+ */
+
+#include "gen8_engine_cs.h" /* XXX */
+#include "i915_drv.h"
+#include "intel_execlists_submission.h" /* XXX */
+#include "intel_gpu_commands.h"
+#include "intel_ring.h"
+
+int gen8_emit_flush_rcs(struct i915_request *rq, u32 mode)
+{
+       bool vf_flush_wa = false, dc_flush_wa = false;
+       u32 *cs, flags = 0;
+       int len;
+
+       flags |= PIPE_CONTROL_CS_STALL;
+
+       if (mode & EMIT_FLUSH) {
+               flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
+               flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
+               flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
+               flags |= PIPE_CONTROL_FLUSH_ENABLE;
+       }
+
+       if (mode & EMIT_INVALIDATE) {
+               flags |= PIPE_CONTROL_TLB_INVALIDATE;
+               flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
+               flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
+               flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
+               flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
+               flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
+               flags |= PIPE_CONTROL_QW_WRITE;
+               flags |= PIPE_CONTROL_STORE_DATA_INDEX;
+
+               /*
+                * On GEN9: before VF_CACHE_INVALIDATE we need to emit a NULL
+                * pipe control.
+                */
+               if (IS_GEN(rq->engine->i915, 9))
+                       vf_flush_wa = true;
+
+               /* WaForGAMHang:kbl */
+               if (IS_KBL_GT_REVID(rq->engine->i915, 0, KBL_REVID_B0))
+                       dc_flush_wa = true;
+       }
+
+       len = 6;
+
+       if (vf_flush_wa)
+               len += 6;
+
+       if (dc_flush_wa)
+               len += 12;
+
+       cs = intel_ring_begin(rq, len);
+       if (IS_ERR(cs))
+               return PTR_ERR(cs);
+
+       if (vf_flush_wa)
+               cs = gen8_emit_pipe_control(cs, 0, 0);
+
+       if (dc_flush_wa)
+               cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_DC_FLUSH_ENABLE,
+                                           0);
+
+       cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
+
+       if (dc_flush_wa)
+               cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_CS_STALL, 0);
+
+       intel_ring_advance(rq, cs);
+
+       return 0;
+}
+
+int gen8_emit_flush_xcs(struct i915_request *rq, u32 mode)
+{
+       u32 cmd, *cs;
+
+       cs = intel_ring_begin(rq, 4);
+       if (IS_ERR(cs))
+               return PTR_ERR(cs);
+
+       cmd = MI_FLUSH_DW + 1;
+
+       /* We always require a command barrier so that subsequent
+        * commands, such as breadcrumb interrupts, are strictly ordered
+        * wrt the contents of the write cache being flushed to memory
+        * (and thus being coherent from the CPU).
+        */
+       cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
+
+       if (mode & EMIT_INVALIDATE) {
+               cmd |= MI_INVALIDATE_TLB;
+               if (rq->engine->class == VIDEO_DECODE_CLASS)
+                       cmd |= MI_INVALIDATE_BSD;
+       }
+
+       *cs++ = cmd;
+       *cs++ = LRC_PPHWSP_SCRATCH_ADDR;
+       *cs++ = 0; /* upper addr */
+       *cs++ = 0; /* value */
+       intel_ring_advance(rq, cs);
+
+       return 0;
+}
+
+int gen11_emit_flush_rcs(struct i915_request *rq, u32 mode)
+{
+       if (mode & EMIT_FLUSH) {
+               u32 *cs;
+               u32 flags = 0;
+
+               flags |= PIPE_CONTROL_CS_STALL;
+
+               flags |= PIPE_CONTROL_TILE_CACHE_FLUSH;
+               flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
+               flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
+               flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
+               flags |= PIPE_CONTROL_FLUSH_ENABLE;
+               flags |= PIPE_CONTROL_QW_WRITE;
+               flags |= PIPE_CONTROL_STORE_DATA_INDEX;
+
+               cs = intel_ring_begin(rq, 6);
+               if (IS_ERR(cs))
+                       return PTR_ERR(cs);
+
+               cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
+               intel_ring_advance(rq, cs);
+       }
+
+       if (mode & EMIT_INVALIDATE) {
+               u32 *cs;
+               u32 flags = 0;
+
+               flags |= PIPE_CONTROL_CS_STALL;
+
+               flags |= PIPE_CONTROL_COMMAND_CACHE_INVALIDATE;
+               flags |= PIPE_CONTROL_TLB_INVALIDATE;
+               flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
+               flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
+               flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
+               flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
+               flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
+               flags |= PIPE_CONTROL_QW_WRITE;
+               flags |= PIPE_CONTROL_STORE_DATA_INDEX;
+
+               cs = intel_ring_begin(rq, 6);
+               if (IS_ERR(cs))
+                       return PTR_ERR(cs);
+
+               cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
+               intel_ring_advance(rq, cs);
+       }
+
+       return 0;
+}
+
+static u32 preparser_disable(bool state)
+{
+       return MI_ARB_CHECK | 1 << 8 | state;
+}
+
+static i915_reg_t aux_inv_reg(const struct intel_engine_cs *engine)
+{
+       static const i915_reg_t vd[] = {
+               GEN12_VD0_AUX_NV,
+               GEN12_VD1_AUX_NV,
+               GEN12_VD2_AUX_NV,
+               GEN12_VD3_AUX_NV,
+       };
+
+       static const i915_reg_t ve[] = {
+               GEN12_VE0_AUX_NV,
+               GEN12_VE1_AUX_NV,
+       };
+
+       if (engine->class == VIDEO_DECODE_CLASS)
+               return vd[engine->instance];
+
+       if (engine->class == VIDEO_ENHANCEMENT_CLASS)
+               return ve[engine->instance];
+
+       GEM_BUG_ON("unknown aux_inv reg\n");
+       return INVALID_MMIO_REG;
+}
+
+static u32 *gen12_emit_aux_table_inv(const i915_reg_t inv_reg, u32 *cs)
+{
+       *cs++ = MI_LOAD_REGISTER_IMM(1);
+       *cs++ = i915_mmio_reg_offset(inv_reg);
+       *cs++ = AUX_INV;
+       *cs++ = MI_NOOP;
+
+       return cs;
+}
+
+int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode)
+{
+       if (mode & EMIT_FLUSH) {
+               u32 flags = 0;
+               u32 *cs;
+
+               flags |= PIPE_CONTROL_TILE_CACHE_FLUSH;
+               flags |= PIPE_CONTROL_FLUSH_L3;
+               flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
+               flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
+               /* Wa_1409600907:tgl */
+               flags |= PIPE_CONTROL_DEPTH_STALL;
+               flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
+               flags |= PIPE_CONTROL_FLUSH_ENABLE;
+
+               flags |= PIPE_CONTROL_STORE_DATA_INDEX;
+               flags |= PIPE_CONTROL_QW_WRITE;
+
+               flags |= PIPE_CONTROL_CS_STALL;
+
+               cs = intel_ring_begin(rq, 6);
+               if (IS_ERR(cs))
+                       return PTR_ERR(cs);
+
+               cs = gen12_emit_pipe_control(cs,
+                                            PIPE_CONTROL0_HDC_PIPELINE_FLUSH,
+                                            flags, LRC_PPHWSP_SCRATCH_ADDR);
+               intel_ring_advance(rq, cs);
+       }
+
+       if (mode & EMIT_INVALIDATE) {
+               u32 flags = 0;
+               u32 *cs;
+
+               flags |= PIPE_CONTROL_COMMAND_CACHE_INVALIDATE;
+               flags |= PIPE_CONTROL_TLB_INVALIDATE;
+               flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
+               flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
+               flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
+               flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
+               flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
+
+               flags |= PIPE_CONTROL_STORE_DATA_INDEX;
+               flags |= PIPE_CONTROL_QW_WRITE;
+
+               flags |= PIPE_CONTROL_CS_STALL;
+
+               cs = intel_ring_begin(rq, 8 + 4);
+               if (IS_ERR(cs))
+                       return PTR_ERR(cs);
+
+               /*
+                * Prevent the pre-parser from skipping past the TLB
+                * invalidate and loading a stale page for the batch
+                * buffer / request payload.
+                */
+               *cs++ = preparser_disable(true);
+
+               cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
+
+               /* hsdes: 1809175790 */
+               cs = gen12_emit_aux_table_inv(GEN12_GFX_CCS_AUX_NV, cs);
+
+               *cs++ = preparser_disable(false);
+               intel_ring_advance(rq, cs);
+       }
+
+       return 0;
+}
+
+int gen12_emit_flush_xcs(struct i915_request *rq, u32 mode)
+{
+       intel_engine_mask_t aux_inv = 0;
+       u32 cmd, *cs;
+
+       cmd = 4;
+       if (mode & EMIT_INVALIDATE)
+               cmd += 2;
+       if (mode & EMIT_INVALIDATE)
+               aux_inv = rq->engine->mask & ~BIT(BCS0);
+       if (aux_inv)
+               cmd += 2 * hweight8(aux_inv) + 2;
+
+       cs = intel_ring_begin(rq, cmd);
+       if (IS_ERR(cs))
+               return PTR_ERR(cs);
+
+       if (mode & EMIT_INVALIDATE)
+               *cs++ = preparser_disable(true);
+
+       cmd = MI_FLUSH_DW + 1;
+
+       /* We always require a command barrier so that subsequent
+        * commands, such as breadcrumb interrupts, are strictly ordered
+        * wrt the contents of the write cache being flushed to memory
+        * (and thus being coherent from the CPU).
+        */
+       cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
+
+       if (mode & EMIT_INVALIDATE) {
+               cmd |= MI_INVALIDATE_TLB;
+               if (rq->engine->class == VIDEO_DECODE_CLASS)
+                       cmd |= MI_INVALIDATE_BSD;
+       }
+
+       *cs++ = cmd;
+       *cs++ = LRC_PPHWSP_SCRATCH_ADDR;
+       *cs++ = 0; /* upper addr */
+       *cs++ = 0; /* value */
+
+       if (aux_inv) { /* hsdes: 1809175790 */
+               struct intel_engine_cs *engine;
+               unsigned int tmp;
+
+               *cs++ = MI_LOAD_REGISTER_IMM(hweight8(aux_inv));
+               for_each_engine_masked(engine, rq->engine->gt,
+                                      aux_inv, tmp) {
+                       *cs++ = i915_mmio_reg_offset(aux_inv_reg(engine));
+                       *cs++ = AUX_INV;
+               }
+               *cs++ = MI_NOOP;
+       }
+
+       if (mode & EMIT_INVALIDATE)
+               *cs++ = preparser_disable(false);
+
+       intel_ring_advance(rq, cs);
+
+       return 0;
+}
+
+int gen8_emit_bb_start_noarb(struct i915_request *rq,
+                            u64 offset, u32 len,
+                            const unsigned int flags)
+{
+       u32 *cs;
+
+       cs = intel_ring_begin(rq, 4);
+       if (IS_ERR(cs))
+               return PTR_ERR(cs);
+
+       /*
+        * WaDisableCtxRestoreArbitration:bdw,chv
+        *
+        * We don't need to perform MI_ARB_ENABLE as often as we do (in
+        * particular all the gen that do not need the w/a at all!), if we
+        * took care to make sure that on every switch into this context
+        * (both ordinary and for preemption) that arbitrartion was enabled
+        * we would be fine.  However, for gen8 there is another w/a that
+        * requires us to not preempt inside GPGPU execution, so we keep
+        * arbitration disabled for gen8 batches. Arbitration will be
+        * re-enabled before we close the request
+        * (engine->emit_fini_breadcrumb).
+        */
+       *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
+
+       /* FIXME(BDW+): Address space and security selectors. */
+       *cs++ = MI_BATCH_BUFFER_START_GEN8 |
+               (flags & I915_DISPATCH_SECURE ? 0 : BIT(8));
+       *cs++ = lower_32_bits(offset);
+       *cs++ = upper_32_bits(offset);
+
+       intel_ring_advance(rq, cs);
+
+       return 0;
+}
+
+int gen8_emit_bb_start(struct i915_request *rq,
+                      u64 offset, u32 len,
+                      const unsigned int flags)
+{
+       u32 *cs;
+
+       cs = intel_ring_begin(rq, 6);
+       if (IS_ERR(cs))
+               return PTR_ERR(cs);
+
+       *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
+
+       *cs++ = MI_BATCH_BUFFER_START_GEN8 |
+               (flags & I915_DISPATCH_SECURE ? 0 : BIT(8));
+       *cs++ = lower_32_bits(offset);
+       *cs++ = upper_32_bits(offset);
+
+       *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
+       *cs++ = MI_NOOP;
+
+       intel_ring_advance(rq, cs);
+
+       return 0;
+}
diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.h 
b/drivers/gpu/drm/i915/gt/gen8_engine_cs.h
new file mode 100644
index 000000000000..925961c3e646
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2014 Intel Corporation
+ */
+
+#ifndef __GEN8_ENGINE_CS_H__
+#define __GEN8_ENGINE_CS_H__
+
+#include <linux/types.h>
+
+struct i915_request;
+
+int gen8_emit_flush_rcs(struct i915_request *request, u32 mode);
+int gen11_emit_flush_rcs(struct i915_request *request, u32 mode);
+int gen12_emit_flush_rcs(struct i915_request *request, u32 mode);
+
+int gen8_emit_flush_xcs(struct i915_request *request, u32 mode);
+int gen12_emit_flush_xcs(struct i915_request *request, u32 mode);
+
+int gen8_emit_bb_start_noarb(struct i915_request *rq,
+                            u64 offset, u32 len,
+                            const unsigned int flags);
+int gen8_emit_bb_start(struct i915_request *rq,
+                      u64 offset, u32 len,
+                      const unsigned int flags);
+
+#endif /* __GEN8_ENGINE_CS_H__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c 
b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
index e1d35ab17e6f..9d73e9052c05 100644
--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
@@ -112,6 +112,7 @@
 #include "i915_perf.h"
 #include "i915_trace.h"
 #include "i915_vgpu.h"
+#include "gen8_engine_cs.h"
 #include "intel_breadcrumbs.h"
 #include "intel_context.h"
 #include "intel_engine_pm.h"
@@ -4475,67 +4476,6 @@ static void execlists_reset_finish(struct 
intel_engine_cs *engine)
                     atomic_read(&execlists->tasklet.count));
 }
 
-static int gen8_emit_bb_start_noarb(struct i915_request *rq,
-                                   u64 offset, u32 len,
-                                   const unsigned int flags)
-{
-       u32 *cs;
-
-       cs = intel_ring_begin(rq, 4);
-       if (IS_ERR(cs))
-               return PTR_ERR(cs);
-
-       /*
-        * WaDisableCtxRestoreArbitration:bdw,chv
-        *
-        * We don't need to perform MI_ARB_ENABLE as often as we do (in
-        * particular all the gen that do not need the w/a at all!), if we
-        * took care to make sure that on every switch into this context
-        * (both ordinary and for preemption) that arbitrartion was enabled
-        * we would be fine.  However, for gen8 there is another w/a that
-        * requires us to not preempt inside GPGPU execution, so we keep
-        * arbitration disabled for gen8 batches. Arbitration will be
-        * re-enabled before we close the request
-        * (engine->emit_fini_breadcrumb).
-        */
-       *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
-
-       /* FIXME(BDW+): Address space and security selectors. */
-       *cs++ = MI_BATCH_BUFFER_START_GEN8 |
-               (flags & I915_DISPATCH_SECURE ? 0 : BIT(8));
-       *cs++ = lower_32_bits(offset);
-       *cs++ = upper_32_bits(offset);
-
-       intel_ring_advance(rq, cs);
-
-       return 0;
-}
-
-static int gen8_emit_bb_start(struct i915_request *rq,
-                             u64 offset, u32 len,
-                             const unsigned int flags)
-{
-       u32 *cs;
-
-       cs = intel_ring_begin(rq, 6);
-       if (IS_ERR(cs))
-               return PTR_ERR(cs);
-
-       *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
-
-       *cs++ = MI_BATCH_BUFFER_START_GEN8 |
-               (flags & I915_DISPATCH_SECURE ? 0 : BIT(8));
-       *cs++ = lower_32_bits(offset);
-       *cs++ = upper_32_bits(offset);
-
-       *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
-       *cs++ = MI_NOOP;
-
-       intel_ring_advance(rq, cs);
-
-       return 0;
-}
-
 static void gen8_logical_ring_enable_irq(struct intel_engine_cs *engine)
 {
        ENGINE_WRITE(engine, RING_IMR,
@@ -4548,329 +4488,6 @@ static void gen8_logical_ring_disable_irq(struct 
intel_engine_cs *engine)
        ENGINE_WRITE(engine, RING_IMR, ~engine->irq_keep_mask);
 }
 
-static int gen8_emit_flush(struct i915_request *request, u32 mode)
-{
-       u32 cmd, *cs;
-
-       cs = intel_ring_begin(request, 4);
-       if (IS_ERR(cs))
-               return PTR_ERR(cs);
-
-       cmd = MI_FLUSH_DW + 1;
-
-       /* We always require a command barrier so that subsequent
-        * commands, such as breadcrumb interrupts, are strictly ordered
-        * wrt the contents of the write cache being flushed to memory
-        * (and thus being coherent from the CPU).
-        */
-       cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
-
-       if (mode & EMIT_INVALIDATE) {
-               cmd |= MI_INVALIDATE_TLB;
-               if (request->engine->class == VIDEO_DECODE_CLASS)
-                       cmd |= MI_INVALIDATE_BSD;
-       }
-
-       *cs++ = cmd;
-       *cs++ = LRC_PPHWSP_SCRATCH_ADDR;
-       *cs++ = 0; /* upper addr */
-       *cs++ = 0; /* value */
-       intel_ring_advance(request, cs);
-
-       return 0;
-}
-
-static int gen8_emit_flush_render(struct i915_request *request,
-                                 u32 mode)
-{
-       bool vf_flush_wa = false, dc_flush_wa = false;
-       u32 *cs, flags = 0;
-       int len;
-
-       flags |= PIPE_CONTROL_CS_STALL;
-
-       if (mode & EMIT_FLUSH) {
-               flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
-               flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
-               flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
-               flags |= PIPE_CONTROL_FLUSH_ENABLE;
-       }
-
-       if (mode & EMIT_INVALIDATE) {
-               flags |= PIPE_CONTROL_TLB_INVALIDATE;
-               flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
-               flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
-               flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
-               flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
-               flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
-               flags |= PIPE_CONTROL_QW_WRITE;
-               flags |= PIPE_CONTROL_STORE_DATA_INDEX;
-
-               /*
-                * On GEN9: before VF_CACHE_INVALIDATE we need to emit a NULL
-                * pipe control.
-                */
-               if (IS_GEN(request->engine->i915, 9))
-                       vf_flush_wa = true;
-
-               /* WaForGAMHang:kbl */
-               if (IS_KBL_GT_REVID(request->engine->i915, 0, KBL_REVID_B0))
-                       dc_flush_wa = true;
-       }
-
-       len = 6;
-
-       if (vf_flush_wa)
-               len += 6;
-
-       if (dc_flush_wa)
-               len += 12;
-
-       cs = intel_ring_begin(request, len);
-       if (IS_ERR(cs))
-               return PTR_ERR(cs);
-
-       if (vf_flush_wa)
-               cs = gen8_emit_pipe_control(cs, 0, 0);
-
-       if (dc_flush_wa)
-               cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_DC_FLUSH_ENABLE,
-                                           0);
-
-       cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
-
-       if (dc_flush_wa)
-               cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_CS_STALL, 0);
-
-       intel_ring_advance(request, cs);
-
-       return 0;
-}
-
-static int gen11_emit_flush_render(struct i915_request *request,
-                                  u32 mode)
-{
-       if (mode & EMIT_FLUSH) {
-               u32 *cs;
-               u32 flags = 0;
-
-               flags |= PIPE_CONTROL_CS_STALL;
-
-               flags |= PIPE_CONTROL_TILE_CACHE_FLUSH;
-               flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
-               flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
-               flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
-               flags |= PIPE_CONTROL_FLUSH_ENABLE;
-               flags |= PIPE_CONTROL_QW_WRITE;
-               flags |= PIPE_CONTROL_STORE_DATA_INDEX;
-
-               cs = intel_ring_begin(request, 6);
-               if (IS_ERR(cs))
-                       return PTR_ERR(cs);
-
-               cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
-               intel_ring_advance(request, cs);
-       }
-
-       if (mode & EMIT_INVALIDATE) {
-               u32 *cs;
-               u32 flags = 0;
-
-               flags |= PIPE_CONTROL_CS_STALL;
-
-               flags |= PIPE_CONTROL_COMMAND_CACHE_INVALIDATE;
-               flags |= PIPE_CONTROL_TLB_INVALIDATE;
-               flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
-               flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
-               flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
-               flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
-               flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
-               flags |= PIPE_CONTROL_QW_WRITE;
-               flags |= PIPE_CONTROL_STORE_DATA_INDEX;
-
-               cs = intel_ring_begin(request, 6);
-               if (IS_ERR(cs))
-                       return PTR_ERR(cs);
-
-               cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
-               intel_ring_advance(request, cs);
-       }
-
-       return 0;
-}
-
-static u32 preparser_disable(bool state)
-{
-       return MI_ARB_CHECK | 1 << 8 | state;
-}
-
-static i915_reg_t aux_inv_reg(const struct intel_engine_cs *engine)
-{
-       static const i915_reg_t vd[] = {
-               GEN12_VD0_AUX_NV,
-               GEN12_VD1_AUX_NV,
-               GEN12_VD2_AUX_NV,
-               GEN12_VD3_AUX_NV,
-       };
-
-       static const i915_reg_t ve[] = {
-               GEN12_VE0_AUX_NV,
-               GEN12_VE1_AUX_NV,
-       };
-
-       if (engine->class == VIDEO_DECODE_CLASS)
-               return vd[engine->instance];
-
-       if (engine->class == VIDEO_ENHANCEMENT_CLASS)
-               return ve[engine->instance];
-
-       GEM_BUG_ON("unknown aux_inv_reg\n");
-
-       return INVALID_MMIO_REG;
-}
-
-static u32 *
-gen12_emit_aux_table_inv(const i915_reg_t inv_reg, u32 *cs)
-{
-       *cs++ = MI_LOAD_REGISTER_IMM(1);
-       *cs++ = i915_mmio_reg_offset(inv_reg);
-       *cs++ = AUX_INV;
-       *cs++ = MI_NOOP;
-
-       return cs;
-}
-
-static int gen12_emit_flush_render(struct i915_request *request,
-                                  u32 mode)
-{
-       if (mode & EMIT_FLUSH) {
-               u32 flags = 0;
-               u32 *cs;
-
-               flags |= PIPE_CONTROL_TILE_CACHE_FLUSH;
-               flags |= PIPE_CONTROL_FLUSH_L3;
-               flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
-               flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
-               /* Wa_1409600907:tgl */
-               flags |= PIPE_CONTROL_DEPTH_STALL;
-               flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
-               flags |= PIPE_CONTROL_FLUSH_ENABLE;
-
-               flags |= PIPE_CONTROL_STORE_DATA_INDEX;
-               flags |= PIPE_CONTROL_QW_WRITE;
-
-               flags |= PIPE_CONTROL_CS_STALL;
-
-               cs = intel_ring_begin(request, 6);
-               if (IS_ERR(cs))
-                       return PTR_ERR(cs);
-
-               cs = gen12_emit_pipe_control(cs,
-                                            PIPE_CONTROL0_HDC_PIPELINE_FLUSH,
-                                            flags, LRC_PPHWSP_SCRATCH_ADDR);
-               intel_ring_advance(request, cs);
-       }
-
-       if (mode & EMIT_INVALIDATE) {
-               u32 flags = 0;
-               u32 *cs;
-
-               flags |= PIPE_CONTROL_COMMAND_CACHE_INVALIDATE;
-               flags |= PIPE_CONTROL_TLB_INVALIDATE;
-               flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
-               flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
-               flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
-               flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
-               flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
-
-               flags |= PIPE_CONTROL_STORE_DATA_INDEX;
-               flags |= PIPE_CONTROL_QW_WRITE;
-
-               flags |= PIPE_CONTROL_CS_STALL;
-
-               cs = intel_ring_begin(request, 8 + 4);
-               if (IS_ERR(cs))
-                       return PTR_ERR(cs);
-
-               /*
-                * Prevent the pre-parser from skipping past the TLB
-                * invalidate and loading a stale page for the batch
-                * buffer / request payload.
-                */
-               *cs++ = preparser_disable(true);
-
-               cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
-
-               /* hsdes: 1809175790 */
-               cs = gen12_emit_aux_table_inv(GEN12_GFX_CCS_AUX_NV, cs);
-
-               *cs++ = preparser_disable(false);
-               intel_ring_advance(request, cs);
-       }
-
-       return 0;
-}
-
-static int gen12_emit_flush(struct i915_request *request, u32 mode)
-{
-       intel_engine_mask_t aux_inv = 0;
-       u32 cmd, *cs;
-
-       cmd = 4;
-       if (mode & EMIT_INVALIDATE)
-               cmd += 2;
-       if (mode & EMIT_INVALIDATE)
-               aux_inv = request->engine->mask & ~BIT(BCS0);
-       if (aux_inv)
-               cmd += 2 * hweight8(aux_inv) + 2;
-
-       cs = intel_ring_begin(request, cmd);
-       if (IS_ERR(cs))
-               return PTR_ERR(cs);
-
-       if (mode & EMIT_INVALIDATE)
-               *cs++ = preparser_disable(true);
-
-       cmd = MI_FLUSH_DW + 1;
-
-       /* We always require a command barrier so that subsequent
-        * commands, such as breadcrumb interrupts, are strictly ordered
-        * wrt the contents of the write cache being flushed to memory
-        * (and thus being coherent from the CPU).
-        */
-       cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
-
-       if (mode & EMIT_INVALIDATE) {
-               cmd |= MI_INVALIDATE_TLB;
-               if (request->engine->class == VIDEO_DECODE_CLASS)
-                       cmd |= MI_INVALIDATE_BSD;
-       }
-
-       *cs++ = cmd;
-       *cs++ = LRC_PPHWSP_SCRATCH_ADDR;
-       *cs++ = 0; /* upper addr */
-       *cs++ = 0; /* value */
-
-       if (aux_inv) { /* hsdes: 1809175790 */
-               struct intel_engine_cs *engine;
-               unsigned int tmp;
-
-               *cs++ = MI_LOAD_REGISTER_IMM(hweight8(aux_inv));
-               for_each_engine_masked(engine, request->engine->gt,
-                                      aux_inv, tmp) {
-                       *cs++ = i915_mmio_reg_offset(aux_inv_reg(engine));
-                       *cs++ = AUX_INV;
-               }
-               *cs++ = MI_NOOP;
-       }
-
-       if (mode & EMIT_INVALIDATE)
-               *cs++ = preparser_disable(false);
-
-       intel_ring_advance(request, cs);
-
-       return 0;
-}
 
 static void assert_request_valid(struct i915_request *rq)
 {
@@ -5113,12 +4730,12 @@ logical_ring_default_vfuncs(struct intel_engine_cs 
*engine)
        engine->cops = &execlists_context_ops;
        engine->request_alloc = execlists_request_alloc;
 
-       engine->emit_flush = gen8_emit_flush;
+       engine->emit_flush = gen8_emit_flush_xcs;
        engine->emit_init_breadcrumb = gen8_emit_init_breadcrumb;
        engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb;
        if (INTEL_GEN(engine->i915) >= 12) {
                engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb;
-               engine->emit_flush = gen12_emit_flush;
+               engine->emit_flush = gen12_emit_flush_xcs;
        }
        engine->set_default_submission = intel_execlists_set_default_submission;
 
@@ -5162,15 +4779,15 @@ static void rcs_submission_override(struct 
intel_engine_cs *engine)
 {
        switch (INTEL_GEN(engine->i915)) {
        case 12:
-               engine->emit_flush = gen12_emit_flush_render;
+               engine->emit_flush = gen12_emit_flush_rcs;
                engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_rcs;
                break;
        case 11:
-               engine->emit_flush = gen11_emit_flush_render;
+               engine->emit_flush = gen11_emit_flush_rcs;
                engine->emit_fini_breadcrumb = gen11_emit_fini_breadcrumb_rcs;
                break;
        default:
-               engine->emit_flush = gen8_emit_flush_render;
+               engine->emit_flush = gen8_emit_flush_rcs;
                engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_rcs;
                break;
        }
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to