From: John Harrison <john.c.harri...@intel.com>

Gen12 introduces a completely new and different scheme for
implementing engine relative MMIO accesses - MI_LRI_MMIO_REMAP. This
requires using the base address of instance zero of the relevant
engine class. And then, it is only valid if the register in
question falls within a certain range as specified by a table.

Signed-off-by: John Harrison <john.c.harri...@intel.com>
CC: Daniele Ceraolo Spurio <daniele.ceraolospu...@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_engine_cs.c    | 185 ++++++++++++++++++-
 drivers/gpu/drm/i915/gt/intel_engine_types.h |   7 +
 drivers/gpu/drm/i915/gt/intel_gpu_commands.h |   9 +-
 drivers/gpu/drm/i915/i915_perf.c             |   3 +-
 4 files changed, 195 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c 
b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index efe1c377d797..a65e8ccd9d8d 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -236,6 +236,127 @@ static u32 __engine_mmio_base(struct drm_i915_private 
*i915,
        return bases[i].base;
 }
 
+static void lri_init_remap_base(struct intel_engine_cs *engine)
+{
+       struct intel_engine_cs *remap_engine;
+
+       engine->lri_mmio_base = 0;
+
+       if (INTEL_GEN(engine->i915) < 12)
+               return;
+
+       remap_engine = engine->i915->gt.engine_class[engine->class][0];
+       GEM_BUG_ON(!remap_engine);
+
+       engine->lri_mmio_base = remap_engine->mmio_base;
+}
+
+static void lri_add_range(struct intel_engine_cs *engine, u32 min, u32 max)
+{
+       GEM_BUG_ON(engine->lri_num_ranges >= INTEL_MAX_LRI_RANGES);
+
+       engine->lri_ranges[engine->lri_num_ranges].min = min;
+       engine->lri_ranges[engine->lri_num_ranges].max = max;
+       engine->lri_num_ranges++;
+}
+
+static void lri_init_remap_ranges(struct intel_engine_cs *engine)
+{
+       bool has_aux_tables = true;     /* Removed after TGL? */
+       u32 offset;
+
+       engine->lri_num_ranges = 0;
+
+       if (INTEL_GEN(engine->i915) < 12)
+               return;
+
+       switch (engine->class) {
+       case RENDER_CLASS:
+               /* Hardware Front End */
+               lri_add_range(engine, 0x000 + engine->mmio_base,
+                             0x7FF + engine->mmio_base);
+
+               /* TRTT */
+               lri_add_range(engine, 0x4400, 0x441F);
+
+               /* Aux Tables - REMOVEDBY(GEN:HAS:1406929672) */
+               if (has_aux_tables)
+                       lri_add_range(engine, 0x4200, 0x420F);
+               break;
+
+       case VIDEO_DECODE_CLASS:
+               lri_add_range(engine, 0x0000 + engine->mmio_base,
+                             0x3FFF + engine->mmio_base);
+
+               /* TRTT */
+               offset = ((engine->instance & 0x1) * 0x20) +
+                        ((engine->instance >> 1) * 0x100);
+               lri_add_range(engine, 0x4420 + offset, 0x443F + offset);
+
+               /* Aux Tables - REMOVEDBY(GEN:HAS:1406929672) */
+               if (has_aux_tables) {
+                       switch (engine->instance) {
+                       case 0:
+                               lri_add_range(engine, 0x4210, 0x421F);
+                               break;
+
+                       case 1:
+                               lri_add_range(engine, 0x4220, 0x422F);
+                               break;
+
+                       case 2:
+                               lri_add_range(engine, 0x4290, 0x429F);
+                               break;
+
+                       case 3:
+                               lri_add_range(engine, 0x42A0, 0x42AF);
+                               break;
+
+                       default:
+                               break;
+                       }
+               }
+               break;
+
+       case VIDEO_ENHANCEMENT_CLASS:
+               lri_add_range(engine, 0x0000 + engine->mmio_base,
+                             0x3FFF + engine->mmio_base);
+
+               /* TRTT */
+               offset = engine->instance * 0x100;
+               lri_add_range(engine, 0x4460 + offset, 0x447F + offset);
+
+               /* Aux Tables - REMOVEDBY(GEN:HAS:1406929672) */
+               if (has_aux_tables) {
+                       switch (engine->instance) {
+                       case 0:
+                               lri_add_range(engine, 0x4230, 0x423F);
+                               break;
+
+                       case 1:
+                               lri_add_range(engine, 0x42B0, 0x42BF);
+                               break;
+
+                       case 2:
+                               lri_add_range(engine, 0x4290, 0x429F);
+                               break;
+
+                       case 3:
+                               // Same address as instance 1???
+                               lri_add_range(engine, 0x42B0, 0x42BF);
+                               break;
+
+                       default:
+                               break;
+                       }
+               }
+               break;
+
+       default:
+               break;
+       }
+}
+
 static u32 i915_get_lri_cmd_legacy(const struct intel_engine_cs *engine,
                                   u32 word_count)
 {
@@ -249,6 +370,27 @@ static u32 i915_get_lri_cmd_add_offset(const struct 
intel_engine_cs *engine,
               MI_LRI_ADD_CS_MMIO_START_GEN11;
 }
 
+static u32 i915_get_lri_cmd_remap(const struct intel_engine_cs *engine,
+                                 u32 word_count)
+{
+       u32 word;
+
+       word = __MI_LOAD_REGISTER_IMM(word_count);
+
+       /* if (lri_is_reg_in_remap_table(engine, reg)) ??? */
+               word |= MI_LRI_MMIO_REMAP_GEN12;
+
+       /*
+        * NB: To gate this on the reg address will require knowing
+        * all reg addresses in advance. This is not currently the
+        * case as some LRI commands are built from multiple sources.
+        * Also, what if some regs require remap and some do not?
+        * The LRI command would need to be split into multiple pieces.
+        */
+
+       return word;
+}
+
 static bool i915_engine_has_relative_lri(const struct intel_engine_cs *engine)
 {
        if (INTEL_GEN(engine->i915) < 11)
@@ -262,18 +404,53 @@ static bool i915_engine_has_relative_lri(const struct 
intel_engine_cs *engine)
 
 static void lri_init(struct intel_engine_cs *engine)
 {
-       if (i915_engine_has_relative_lri(engine))
-               engine->get_lri_cmd = i915_get_lri_cmd_add_offset;
-       else
+       if (i915_engine_has_relative_lri(engine)) {
+               if (INTEL_GEN(engine->i915) < 12)
+                       engine->get_lri_cmd = i915_get_lri_cmd_add_offset;
+               else {
+                       engine->get_lri_cmd = i915_get_lri_cmd_remap;
+
+                       lri_init_remap_base(engine);
+                       lri_init_remap_ranges(engine);
+               }
+       } else
                engine->get_lri_cmd = i915_get_lri_cmd_legacy;
 }
 
+static bool lri_is_reg_in_remap_table(const struct intel_engine_cs *engine,
+                                     i915_reg_t reg)
+{
+       int i;
+       u32 offset = i915_mmio_reg_offset(reg);
+
+       for (i = 0; i < engine->lri_num_ranges; i++) {
+               if (offset < engine->lri_ranges[i].min)
+                       continue;
+
+               if (offset > engine->lri_ranges[i].max)
+                       continue;
+
+               return true;
+       }
+
+       return false;
+}
+
 u32 i915_get_lri_reg(const struct intel_engine_cs *engine, i915_reg_t reg)
 {
        if (!i915_engine_has_relative_lri(engine))
                return i915_mmio_reg_offset(reg);
 
-       return i915_mmio_reg_offset(reg) - engine->mmio_base;
+       if (INTEL_GEN(engine->i915) < 12)
+               return i915_mmio_reg_offset(reg) - engine->mmio_base;
+
+       if (!WARN_ON(lri_is_reg_in_remap_table(engine, reg))) {
+               /* Is this meant to happen? */
+               return i915_mmio_reg_offset(reg);
+       }
+
+       return i915_mmio_reg_offset(reg) - engine->mmio_base +
+              engine->lri_mmio_base;
 }
 
 static void __sprint_engine_name(struct intel_engine_cs *engine)
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h 
b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index 7ca6c86a33f6..1e26f668e73b 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -306,6 +306,13 @@ struct intel_engine_cs {
        u32 context_size;
        u32 mmio_base;
 
+#define INTEL_MAX_LRI_RANGES   3
+       struct lri_range {
+               u32 min, max;
+       } lri_ranges[INTEL_MAX_LRI_RANGES];
+       u32 lri_num_ranges;
+       u32 lri_mmio_base;
+
        u32 (*get_lri_cmd)(const struct intel_engine_cs *engine,
                           u32 word_count);
 
diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h 
b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
index eaa019df0ce7..0ee62a61d7b5 100644
--- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
+++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
@@ -130,14 +130,15 @@
  *   simply ignores the register load under certain conditions.
  * - One can actually load arbitrary many arbitrary registers: Simply issue x
  *   address/value pairs. Don't overdue it, though, x <= 2^4 must hold!
- * - Newer hardware supports engine relative addressing but older hardware does
- *   not. This is required for hw engine load balancing. Hence the MI_LRI
- *   instruction itself is prefixed with '__' and should only be used on
- *   legacy hardware code paths. Generic code must always use the MI_LRI
+ * - Newer hardware supports engine relative addressing but using multiple
+ *   incompatible schemes. This is required for hw engine load balancing. Hence
+ *   the MI_LRI instruction itself is prefixed with '__' and should only be
+ *   used on legacy hardware code paths. Generic code must always use the 
MI_LRI
  *   and i915_get_lri_reg() helper functions instead.
  */
 #define __MI_LOAD_REGISTER_IMM(x)      MI_INSTR(0x22, 2*(x)-1)
 #define   MI_LRI_FORCE_POSTED          (1<<12)
+#define   MI_LRI_MMIO_REMAP_GEN12              (1<<17)
 #define   MI_LRI_ADD_CS_MMIO_START_GEN11       (1<<19)
 #define MI_STORE_REGISTER_MEM        MI_INSTR(0x24, 1)
 #define MI_STORE_REGISTER_MEM_GEN8   MI_INSTR(0x24, 2)
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 83abdda05ba2..f88642209283 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -1694,7 +1694,8 @@ gen8_update_reg_state_unlocked(struct i915_perf_stream 
*stream,
 
        /*
         * NB: The LRI instruction is generated by the hardware.
-        * Should we read it in and assert that the offset flag is set?
+        * Should we read it in and assert that the appropriate
+        * offset flag is set?
         */
 
        CTX_REG(ce->engine, reg_state, ctx_oactxctrl, GEN8_OACTXCONTROL,
-- 
2.21.0.5.gaeb582a983

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to