i915: split wa_bb code to its own file

Matthew Auld Fri, 27 Nov 2020 04:08:09 -0800

From: Daniele Ceraolo Spurio <daniele.ceraolospu...@intel.com>

Continuing the split of back-end independent code from the execlist
submission specific file.


Based on a patch by Chris Wilson.

Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospu...@intel.com>
Cc: Chris P Wilson <chris.p.wil...@intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursu...@linux.intel.com>
Reviewed-by: John Harrison <john.c.harri...@intel.com>
---
 drivers/gpu/drm/i915/Makefile                 |   1 +
 .../drm/i915/gt/intel_engine_workaround_bb.c  | 335 ++++++++++++++++++
 .../drm/i915/gt/intel_engine_workaround_bb.h  |  14 +
 .../drm/i915/gt/intel_execlists_submission.c  | 327 +----------------
 4 files changed, 352 insertions(+), 325 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/gt/intel_engine_workaround_bb.c
 create mode 100644 drivers/gpu/drm/i915/gt/intel_engine_workaround_bb.h

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index f9ef5199b124..2445cc990e15 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -92,6 +92,7 @@ gt-y += \
        gt/intel_engine_heartbeat.o \
        gt/intel_engine_pm.o \
        gt/intel_engine_user.o \
+       gt/intel_engine_workaround_bb.o \
        gt/intel_execlists_submission.o \
        gt/intel_ggtt.o \
        gt/intel_ggtt_fencing.o \
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_workaround_bb.c 
b/drivers/gpu/drm/i915/gt/intel_engine_workaround_bb.c
new file mode 100644
index 000000000000..b03bdfc92bb2
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_engine_workaround_bb.c
@@ -0,0 +1,335 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2014 Intel Corporation
+ */
+
+#include "i915_drv.h"
+#include "intel_engine_types.h"
+#include "intel_engine_workaround_bb.h"
+#include "intel_execlists_submission.h" /* XXX */
+#include "intel_gpu_commands.h"
+#include "intel_gt.h"
+
+/*
+ * In this WA we need to set GEN8_L3SQCREG4[21:21] and reset it after
+ * PIPE_CONTROL instruction. This is required for the flush to happen correctly
+ * but there is a slight complication as this is applied in WA batch where the
+ * values are only initialized once so we cannot take register value at the
+ * beginning and reuse it further; hence we save its value to memory, upload a
+ * constant value with bit21 set and then we restore it back with the saved 
value.
+ * To simplify the WA, a constant value is formed by using the default value
+ * of this register. This shouldn't be a problem because we are only modifying
+ * it for a short period and this batch in non-premptible. We can ofcourse
+ * use additional instructions that read the actual value of the register
+ * at that time and set our bit of interest but it makes the WA complicated.
+ *
+ * This WA is also required for Gen9 so extracting as a function avoids
+ * code duplication.
+ */
+static u32 *
+gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, u32 *batch)
+{
+       /* NB no one else is allowed to scribble over scratch + 256! */
+       *batch++ = MI_STORE_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT;
+       *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
+       *batch++ = intel_gt_scratch_offset(engine->gt,
+                                          
INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA);
+       *batch++ = 0;
+
+       *batch++ = MI_LOAD_REGISTER_IMM(1);
+       *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
+       *batch++ = 0x40400000 | GEN8_LQSC_FLUSH_COHERENT_LINES;
+
+       batch = gen8_emit_pipe_control(batch,
+                                      PIPE_CONTROL_CS_STALL |
+                                      PIPE_CONTROL_DC_FLUSH_ENABLE,
+                                      0);
+
+       *batch++ = MI_LOAD_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT;
+       *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
+       *batch++ = intel_gt_scratch_offset(engine->gt,
+                                          
INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA);
+       *batch++ = 0;
+
+       return batch;
+}
+
+/*
+ * Typically we only have one indirect_ctx and per_ctx batch buffer which are
+ * initialized at the beginning and shared across all contexts but this field
+ * helps us to have multiple batches at different offsets and select them based
+ * on a criteria. At the moment this batch always start at the beginning of 
the page
+ * and at this point we don't have multiple wa_ctx batch buffers.
+ *
+ * The number of WA applied are not known at the beginning; we use this field
+ * to return the no of DWORDS written.
+ *
+ * It is to be noted that this batch does not contain MI_BATCH_BUFFER_END
+ * so it adds NOOPs as padding to make it cacheline aligned.
+ * MI_BATCH_BUFFER_END will be added to perctx batch and both of them together
+ * makes a complete batch buffer.
+ */
+static u32 *gen8_init_indirectctx_bb(struct intel_engine_cs *engine, u32 
*batch)
+{
+       /* WaDisableCtxRestoreArbitration:bdw,chv */
+       *batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
+
+       /* WaFlushCoherentL3CacheLinesAtContextSwitch:bdw */
+       if (IS_BROADWELL(engine->i915))
+               batch = gen8_emit_flush_coherentl3_wa(engine, batch);
+
+       /* WaClearSlmSpaceAtContextSwitch:bdw,chv */
+       /* Actual scratch location is at 128 bytes offset */
+       batch = gen8_emit_pipe_control(batch,
+                                      PIPE_CONTROL_FLUSH_L3 |
+                                      PIPE_CONTROL_STORE_DATA_INDEX |
+                                      PIPE_CONTROL_CS_STALL |
+                                      PIPE_CONTROL_QW_WRITE,
+                                      LRC_PPHWSP_SCRATCH_ADDR);
+
+       *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
+
+       /* Pad to end of cacheline */
+       while ((unsigned long)batch % CACHELINE_BYTES)
+               *batch++ = MI_NOOP;
+
+       /*
+        * MI_BATCH_BUFFER_END is not required in Indirect ctx BB because
+        * execution depends on the length specified in terms of cache lines
+        * in the register CTX_RCS_INDIRECT_CTX
+        */
+
+       return batch;
+}
+
+struct lri {
+       i915_reg_t reg;
+       u32 value;
+};
+
+static u32 *emit_lri(u32 *batch, const struct lri *lri, unsigned int count)
+{
+       GEM_BUG_ON(!count || count > 63);
+
+       *batch++ = MI_LOAD_REGISTER_IMM(count);
+       do {
+               *batch++ = i915_mmio_reg_offset(lri->reg);
+               *batch++ = lri->value;
+       } while (lri++, --count);
+       *batch++ = MI_NOOP;
+
+       return batch;
+}
+
+static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 
*batch)
+{
+       static const struct lri lri[] = {
+               /* WaDisableGatherAtSetShaderCommonSlice:skl,bxt,kbl,glk */
+               {
+                       COMMON_SLICE_CHICKEN2,
+                       
__MASKED_FIELD(GEN9_DISABLE_GATHER_AT_SET_SHADER_COMMON_SLICE,
+                                      0),
+               },
+
+               /* BSpec: 11391 */
+               {
+                       FF_SLICE_CHICKEN,
+                       __MASKED_FIELD(FF_SLICE_CHICKEN_CL_PROVOKING_VERTEX_FIX,
+                                      
FF_SLICE_CHICKEN_CL_PROVOKING_VERTEX_FIX),
+               },
+
+               /* BSpec: 11299 */
+               {
+                       _3D_CHICKEN3,
+                       __MASKED_FIELD(_3D_CHICKEN_SF_PROVOKING_VERTEX_FIX,
+                                      _3D_CHICKEN_SF_PROVOKING_VERTEX_FIX),
+               }
+       };
+
+       *batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
+
+       /* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt,glk */
+       batch = gen8_emit_flush_coherentl3_wa(engine, batch);
+
+       /* WaClearSlmSpaceAtContextSwitch:skl,bxt,kbl,glk,cfl */
+       batch = gen8_emit_pipe_control(batch,
+                                      PIPE_CONTROL_FLUSH_L3 |
+                                      PIPE_CONTROL_STORE_DATA_INDEX |
+                                      PIPE_CONTROL_CS_STALL |
+                                      PIPE_CONTROL_QW_WRITE,
+                                      LRC_PPHWSP_SCRATCH_ADDR);
+
+       batch = emit_lri(batch, lri, ARRAY_SIZE(lri));
+
+       /* WaMediaPoolStateCmdInWABB:bxt,glk */
+       if (HAS_POOLED_EU(engine->i915)) {
+               /*
+                * EU pool configuration is setup along with golden context
+                * during context initialization. This value depends on
+                * device type (2x6 or 3x6) and needs to be updated based
+                * on which subslice is disabled especially for 2x6
+                * devices, however it is safe to load default
+                * configuration of 3x6 device instead of masking off
+                * corresponding bits because HW ignores bits of a disabled
+                * subslice and drops down to appropriate config. Please
+                * see render_state_setup() in i915_gem_render_state.c for
+                * possible configurations, to avoid duplication they are
+                * not shown here again.
+                */
+               *batch++ = GEN9_MEDIA_POOL_STATE;
+               *batch++ = GEN9_MEDIA_POOL_ENABLE;
+               *batch++ = 0x00777000;
+               *batch++ = 0;
+               *batch++ = 0;
+               *batch++ = 0;
+       }
+
+       *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
+
+       /* Pad to end of cacheline */
+       while ((unsigned long)batch % CACHELINE_BYTES)
+               *batch++ = MI_NOOP;
+
+       return batch;
+}
+
+static u32 *
+gen10_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
+{
+       int i;
+
+       /*
+        * WaPipeControlBefore3DStateSamplePattern: cnl
+        *
+        * Ensure the engine is idle prior to programming a
+        * 3DSTATE_SAMPLE_PATTERN during a context restore.
+        */
+       batch = gen8_emit_pipe_control(batch,
+                                      PIPE_CONTROL_CS_STALL,
+                                      0);
+       /*
+        * WaPipeControlBefore3DStateSamplePattern says we need 4 dwords for
+        * the PIPE_CONTROL followed by 12 dwords of 0x0, so 16 dwords in
+        * total. However, a PIPE_CONTROL is 6 dwords long, not 4, which is
+        * confusing. Since gen8_emit_pipe_control() already advances the
+        * batch by 6 dwords, we advance the other 10 here, completing a
+        * cacheline. It's not clear if the workaround requires this padding
+        * before other commands, or if it's just the regular padding we would
+        * already have for the workaround bb, so leave it here for now.
+        */
+       for (i = 0; i < 10; i++)
+               *batch++ = MI_NOOP;
+
+       /* Pad to end of cacheline */
+       while ((unsigned long)batch % CACHELINE_BYTES)
+               *batch++ = MI_NOOP;
+
+       return batch;
+}
+
+#define CTX_WA_BB_OBJ_SIZE (PAGE_SIZE)
+
+static int lrc_setup_wa_ctx(struct intel_engine_cs *engine)
+{
+       struct drm_i915_gem_object *obj;
+       struct i915_vma *vma;
+       int err;
+
+       obj = i915_gem_object_create_shmem(engine->i915, CTX_WA_BB_OBJ_SIZE);
+       if (IS_ERR(obj))
+               return PTR_ERR(obj);
+
+       vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL);
+       if (IS_ERR(vma)) {
+               err = PTR_ERR(vma);
+               goto err;
+       }
+
+       err = i915_ggtt_pin(vma, NULL, 0, PIN_HIGH);
+       if (err)
+               goto err;
+
+       engine->wa_ctx.vma = vma;
+       return 0;
+
+err:
+       i915_gem_object_put(obj);
+       return err;
+}
+
+typedef u32 *(*wa_bb_func_t)(struct intel_engine_cs *engine, u32 *batch);
+
+int intel_init_workaround_bb(struct intel_engine_cs *engine)
+{
+       struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx;
+       struct i915_wa_ctx_bb *wa_bb[2] = { &wa_ctx->indirect_ctx,
+                                           &wa_ctx->per_ctx };
+       wa_bb_func_t wa_bb_fn[2];
+       void *batch, *batch_ptr;
+       unsigned int i;
+       int ret;
+
+       if (engine->class != RENDER_CLASS)
+               return 0;
+
+       switch (INTEL_GEN(engine->i915)) {
+       case 12:
+       case 11:
+               return 0;
+       case 10:
+               wa_bb_fn[0] = gen10_init_indirectctx_bb;
+               wa_bb_fn[1] = NULL;
+               break;
+       case 9:
+               wa_bb_fn[0] = gen9_init_indirectctx_bb;
+               wa_bb_fn[1] = NULL;
+               break;
+       case 8:
+               wa_bb_fn[0] = gen8_init_indirectctx_bb;
+               wa_bb_fn[1] = NULL;
+               break;
+       default:
+               MISSING_CASE(INTEL_GEN(engine->i915));
+               return 0;
+       }
+
+       ret = lrc_setup_wa_ctx(engine);
+       if (ret) {
+               drm_dbg(&engine->i915->drm,
+                       "Failed to setup context WA page: %d\n", ret);
+               return ret;
+       }
+
+       batch = i915_gem_object_pin_map(wa_ctx->vma->obj, I915_MAP_WB);
+
+       /*
+        * Emit the two workaround batch buffers, recording the offset from the
+        * start of the workaround batch buffer object for each and their
+        * respective sizes.
+        */
+       batch_ptr = batch;
+       for (i = 0; i < ARRAY_SIZE(wa_bb_fn); i++) {
+               wa_bb[i]->offset = batch_ptr - batch;
+               if (GEM_DEBUG_WARN_ON(!IS_ALIGNED(wa_bb[i]->offset,
+                                                 CACHELINE_BYTES))) {
+                       ret = -EINVAL;
+                       break;
+               }
+               if (wa_bb_fn[i])
+                       batch_ptr = wa_bb_fn[i](engine, batch_ptr);
+               wa_bb[i]->size = batch_ptr - (batch + wa_bb[i]->offset);
+       }
+       GEM_BUG_ON(batch_ptr - batch > CTX_WA_BB_OBJ_SIZE);
+
+       __i915_gem_object_flush_map(wa_ctx->vma->obj, 0, batch_ptr - batch);
+       __i915_gem_object_release_map(wa_ctx->vma->obj);
+       if (ret)
+               intel_fini_workaround_bb(engine);
+
+       return ret;
+}
+
+void intel_fini_workaround_bb(struct intel_engine_cs *engine)
+{
+       i915_vma_unpin_and_release(&engine->wa_ctx.vma, 0);
+}
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_workaround_bb.h 
b/drivers/gpu/drm/i915/gt/intel_engine_workaround_bb.h
new file mode 100644
index 000000000000..88771d77fd42
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_engine_workaround_bb.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2014 Intel Corporation
+ */
+
+#ifndef __INTEL_ENGINE_WORKAROUND_BB_H__
+#define __INTEL_ENGINE_WORKAROUND_BB_H__
+
+struct intel_engine_cs;
+
+int intel_init_workaround_bb(struct intel_engine_cs *engine);
+void intel_fini_workaround_bb(struct intel_engine_cs *engine);
+
+#endif /* __INTEL_ENGINE_WORKAROUND_BB_H__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c 
b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
index 9069a456d2f7..1cc93ea6b7f0 100644
--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
@@ -116,6 +116,7 @@
 #include "intel_breadcrumbs.h"
 #include "intel_context.h"
 #include "intel_engine_pm.h"
+#include "intel_engine_workaround_bb.h"
 #include "intel_execlists_submission.h"
 #include "intel_gt.h"
 #include "intel_gt_pm.h"
@@ -3695,330 +3696,6 @@ static int execlists_request_alloc(struct i915_request 
*request)
        return 0;
 }
 
-/*
- * In this WA we need to set GEN8_L3SQCREG4[21:21] and reset it after
- * PIPE_CONTROL instruction. This is required for the flush to happen correctly
- * but there is a slight complication as this is applied in WA batch where the
- * values are only initialized once so we cannot take register value at the
- * beginning and reuse it further; hence we save its value to memory, upload a
- * constant value with bit21 set and then we restore it back with the saved 
value.
- * To simplify the WA, a constant value is formed by using the default value
- * of this register. This shouldn't be a problem because we are only modifying
- * it for a short period and this batch in non-premptible. We can ofcourse
- * use additional instructions that read the actual value of the register
- * at that time and set our bit of interest but it makes the WA complicated.
- *
- * This WA is also required for Gen9 so extracting as a function avoids
- * code duplication.
- */
-static u32 *
-gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, u32 *batch)
-{
-       /* NB no one else is allowed to scribble over scratch + 256! */
-       *batch++ = MI_STORE_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT;
-       *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
-       *batch++ = intel_gt_scratch_offset(engine->gt,
-                                          
INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA);
-       *batch++ = 0;
-
-       *batch++ = MI_LOAD_REGISTER_IMM(1);
-       *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
-       *batch++ = 0x40400000 | GEN8_LQSC_FLUSH_COHERENT_LINES;
-
-       batch = gen8_emit_pipe_control(batch,
-                                      PIPE_CONTROL_CS_STALL |
-                                      PIPE_CONTROL_DC_FLUSH_ENABLE,
-                                      0);
-
-       *batch++ = MI_LOAD_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT;
-       *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
-       *batch++ = intel_gt_scratch_offset(engine->gt,
-                                          
INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA);
-       *batch++ = 0;
-
-       return batch;
-}
-
-/*
- * Typically we only have one indirect_ctx and per_ctx batch buffer which are
- * initialized at the beginning and shared across all contexts but this field
- * helps us to have multiple batches at different offsets and select them based
- * on a criteria. At the moment this batch always start at the beginning of 
the page
- * and at this point we don't have multiple wa_ctx batch buffers.
- *
- * The number of WA applied are not known at the beginning; we use this field
- * to return the no of DWORDS written.
- *
- * It is to be noted that this batch does not contain MI_BATCH_BUFFER_END
- * so it adds NOOPs as padding to make it cacheline aligned.
- * MI_BATCH_BUFFER_END will be added to perctx batch and both of them together
- * makes a complete batch buffer.
- */
-static u32 *gen8_init_indirectctx_bb(struct intel_engine_cs *engine, u32 
*batch)
-{
-       /* WaDisableCtxRestoreArbitration:bdw,chv */
-       *batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
-
-       /* WaFlushCoherentL3CacheLinesAtContextSwitch:bdw */
-       if (IS_BROADWELL(engine->i915))
-               batch = gen8_emit_flush_coherentl3_wa(engine, batch);
-
-       /* WaClearSlmSpaceAtContextSwitch:bdw,chv */
-       /* Actual scratch location is at 128 bytes offset */
-       batch = gen8_emit_pipe_control(batch,
-                                      PIPE_CONTROL_FLUSH_L3 |
-                                      PIPE_CONTROL_STORE_DATA_INDEX |
-                                      PIPE_CONTROL_CS_STALL |
-                                      PIPE_CONTROL_QW_WRITE,
-                                      LRC_PPHWSP_SCRATCH_ADDR);
-
-       *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
-
-       /* Pad to end of cacheline */
-       while ((unsigned long)batch % CACHELINE_BYTES)
-               *batch++ = MI_NOOP;
-
-       /*
-        * MI_BATCH_BUFFER_END is not required in Indirect ctx BB because
-        * execution depends on the length specified in terms of cache lines
-        * in the register CTX_RCS_INDIRECT_CTX
-        */
-
-       return batch;
-}
-
-struct lri {
-       i915_reg_t reg;
-       u32 value;
-};
-
-static u32 *emit_lri(u32 *batch, const struct lri *lri, unsigned int count)
-{
-       GEM_BUG_ON(!count || count > 63);
-
-       *batch++ = MI_LOAD_REGISTER_IMM(count);
-       do {
-               *batch++ = i915_mmio_reg_offset(lri->reg);
-               *batch++ = lri->value;
-       } while (lri++, --count);
-       *batch++ = MI_NOOP;
-
-       return batch;
-}
-
-static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 
*batch)
-{
-       static const struct lri lri[] = {
-               /* WaDisableGatherAtSetShaderCommonSlice:skl,bxt,kbl,glk */
-               {
-                       COMMON_SLICE_CHICKEN2,
-                       
__MASKED_FIELD(GEN9_DISABLE_GATHER_AT_SET_SHADER_COMMON_SLICE,
-                                      0),
-               },
-
-               /* BSpec: 11391 */
-               {
-                       FF_SLICE_CHICKEN,
-                       __MASKED_FIELD(FF_SLICE_CHICKEN_CL_PROVOKING_VERTEX_FIX,
-                                      
FF_SLICE_CHICKEN_CL_PROVOKING_VERTEX_FIX),
-               },
-
-               /* BSpec: 11299 */
-               {
-                       _3D_CHICKEN3,
-                       __MASKED_FIELD(_3D_CHICKEN_SF_PROVOKING_VERTEX_FIX,
-                                      _3D_CHICKEN_SF_PROVOKING_VERTEX_FIX),
-               }
-       };
-
-       *batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
-
-       /* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt,glk */
-       batch = gen8_emit_flush_coherentl3_wa(engine, batch);
-
-       /* WaClearSlmSpaceAtContextSwitch:skl,bxt,kbl,glk,cfl */
-       batch = gen8_emit_pipe_control(batch,
-                                      PIPE_CONTROL_FLUSH_L3 |
-                                      PIPE_CONTROL_STORE_DATA_INDEX |
-                                      PIPE_CONTROL_CS_STALL |
-                                      PIPE_CONTROL_QW_WRITE,
-                                      LRC_PPHWSP_SCRATCH_ADDR);
-
-       batch = emit_lri(batch, lri, ARRAY_SIZE(lri));
-
-       /* WaMediaPoolStateCmdInWABB:bxt,glk */
-       if (HAS_POOLED_EU(engine->i915)) {
-               /*
-                * EU pool configuration is setup along with golden context
-                * during context initialization. This value depends on
-                * device type (2x6 or 3x6) and needs to be updated based
-                * on which subslice is disabled especially for 2x6
-                * devices, however it is safe to load default
-                * configuration of 3x6 device instead of masking off
-                * corresponding bits because HW ignores bits of a disabled
-                * subslice and drops down to appropriate config. Please
-                * see render_state_setup() in i915_gem_render_state.c for
-                * possible configurations, to avoid duplication they are
-                * not shown here again.
-                */
-               *batch++ = GEN9_MEDIA_POOL_STATE;
-               *batch++ = GEN9_MEDIA_POOL_ENABLE;
-               *batch++ = 0x00777000;
-               *batch++ = 0;
-               *batch++ = 0;
-               *batch++ = 0;
-       }
-
-       *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
-
-       /* Pad to end of cacheline */
-       while ((unsigned long)batch % CACHELINE_BYTES)
-               *batch++ = MI_NOOP;
-
-       return batch;
-}
-
-static u32 *
-gen10_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
-{
-       int i;
-
-       /*
-        * WaPipeControlBefore3DStateSamplePattern: cnl
-        *
-        * Ensure the engine is idle prior to programming a
-        * 3DSTATE_SAMPLE_PATTERN during a context restore.
-        */
-       batch = gen8_emit_pipe_control(batch,
-                                      PIPE_CONTROL_CS_STALL,
-                                      0);
-       /*
-        * WaPipeControlBefore3DStateSamplePattern says we need 4 dwords for
-        * the PIPE_CONTROL followed by 12 dwords of 0x0, so 16 dwords in
-        * total. However, a PIPE_CONTROL is 6 dwords long, not 4, which is
-        * confusing. Since gen8_emit_pipe_control() already advances the
-        * batch by 6 dwords, we advance the other 10 here, completing a
-        * cacheline. It's not clear if the workaround requires this padding
-        * before other commands, or if it's just the regular padding we would
-        * already have for the workaround bb, so leave it here for now.
-        */
-       for (i = 0; i < 10; i++)
-               *batch++ = MI_NOOP;
-
-       /* Pad to end of cacheline */
-       while ((unsigned long)batch % CACHELINE_BYTES)
-               *batch++ = MI_NOOP;
-
-       return batch;
-}
-
-#define CTX_WA_BB_OBJ_SIZE (PAGE_SIZE)
-
-static int lrc_setup_wa_ctx(struct intel_engine_cs *engine)
-{
-       struct drm_i915_gem_object *obj;
-       struct i915_vma *vma;
-       int err;
-
-       obj = i915_gem_object_create_shmem(engine->i915, CTX_WA_BB_OBJ_SIZE);
-       if (IS_ERR(obj))
-               return PTR_ERR(obj);
-
-       vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL);
-       if (IS_ERR(vma)) {
-               err = PTR_ERR(vma);
-               goto err;
-       }
-
-       err = i915_ggtt_pin(vma, NULL, 0, PIN_HIGH);
-       if (err)
-               goto err;
-
-       engine->wa_ctx.vma = vma;
-       return 0;
-
-err:
-       i915_gem_object_put(obj);
-       return err;
-}
-
-static void lrc_destroy_wa_ctx(struct intel_engine_cs *engine)
-{
-       i915_vma_unpin_and_release(&engine->wa_ctx.vma, 0);
-}
-
-typedef u32 *(*wa_bb_func_t)(struct intel_engine_cs *engine, u32 *batch);
-
-static int intel_init_workaround_bb(struct intel_engine_cs *engine)
-{
-       struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx;
-       struct i915_wa_ctx_bb *wa_bb[2] = { &wa_ctx->indirect_ctx,
-                                           &wa_ctx->per_ctx };
-       wa_bb_func_t wa_bb_fn[2];
-       void *batch, *batch_ptr;
-       unsigned int i;
-       int ret;
-
-       if (engine->class != RENDER_CLASS)
-               return 0;
-
-       switch (INTEL_GEN(engine->i915)) {
-       case 12:
-       case 11:
-               return 0;
-       case 10:
-               wa_bb_fn[0] = gen10_init_indirectctx_bb;
-               wa_bb_fn[1] = NULL;
-               break;
-       case 9:
-               wa_bb_fn[0] = gen9_init_indirectctx_bb;
-               wa_bb_fn[1] = NULL;
-               break;
-       case 8:
-               wa_bb_fn[0] = gen8_init_indirectctx_bb;
-               wa_bb_fn[1] = NULL;
-               break;
-       default:
-               MISSING_CASE(INTEL_GEN(engine->i915));
-               return 0;
-       }
-
-       ret = lrc_setup_wa_ctx(engine);
-       if (ret) {
-               drm_dbg(&engine->i915->drm,
-                       "Failed to setup context WA page: %d\n", ret);
-               return ret;
-       }
-
-       batch = i915_gem_object_pin_map(wa_ctx->vma->obj, I915_MAP_WB);
-
-       /*
-        * Emit the two workaround batch buffers, recording the offset from the
-        * start of the workaround batch buffer object for each and their
-        * respective sizes.
-        */
-       batch_ptr = batch;
-       for (i = 0; i < ARRAY_SIZE(wa_bb_fn); i++) {
-               wa_bb[i]->offset = batch_ptr - batch;
-               if (GEM_DEBUG_WARN_ON(!IS_ALIGNED(wa_bb[i]->offset,
-                                                 CACHELINE_BYTES))) {
-                       ret = -EINVAL;
-                       break;
-               }
-               if (wa_bb_fn[i])
-                       batch_ptr = wa_bb_fn[i](engine, batch_ptr);
-               wa_bb[i]->size = batch_ptr - (batch + wa_bb[i]->offset);
-       }
-       GEM_BUG_ON(batch_ptr - batch > CTX_WA_BB_OBJ_SIZE);
-
-       __i915_gem_object_flush_map(wa_ctx->vma->obj, 0, batch_ptr - batch);
-       __i915_gem_object_release_map(wa_ctx->vma->obj);
-       if (ret)
-               lrc_destroy_wa_ctx(engine);
-
-       return ret;
-}
-
 static void reset_csb_pointers(struct intel_engine_cs *engine)
 {
        struct intel_engine_execlists * const execlists = &engine->execlists;
@@ -4707,7 +4384,7 @@ static void execlists_release(struct intel_engine_cs 
*engine)
        execlists_shutdown(engine);
 
        intel_engine_cleanup_common(engine);
-       lrc_destroy_wa_ctx(engine);
+       intel_fini_workaround_bb(engine);
 }
 
 static void
-- 
2.26.2

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

[Intel-gfx] [RFC PATCH 007/162] drm/i915: split wa_bb code to its own file

Reply via email to