From: Ville Syrjälä <ville.syrj...@linux.intel.com>

Since SKL the eLLC has been sitting on the far side of the system
agent, meaning the display engine can utilize it. Let's enable that.

I chose WB for the caching mode, because my numbers are indicating
that WT might actually be WB and WC might actually be UC. I'm not
100% sure that is indeed the case but at least my simple rendercopy
based benchmark didn't see any difference in performance.

Also if I configure things to do LLCeLLC+WT I still get cache dirt
on my screen, suggesting that is in fact operating in WB mode
anyway. This is also the reason I had to fix the MOCS target cache
to really say PTE rather than LLC+eLLC.

Caveat: I've not benchmarked any real workloads. IIRC Eero did
benchmark an earlier version, but that didn't have the PTE vs.
LLC+eLLC MOCS fix so it wasn't actually doing the right thing
most likely.

Cc: Eero Tamminen <eero.t.tammi...@intel.com>
Signed-off-by: Ville Syrjälä <ville.syrj...@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h     | 3 +--
 drivers/gpu/drm/i915/i915_gem_gtt.c | 7 +++++--
 drivers/gpu/drm/i915/i915_gem_gtt.h | 2 +-
 drivers/gpu/drm/i915/intel_mocs.c   | 2 +-
 4 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 35d0782c077e..2a4f33fa2bba 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2517,8 +2517,7 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915,
 #define HAS_LLC(dev_priv)      (INTEL_INFO(dev_priv)->has_llc)
 #define HAS_SNOOP(dev_priv)    (INTEL_INFO(dev_priv)->has_snoop)
 #define HAS_EDRAM(dev_priv)    ((dev_priv)->edram_size_mb)
-#define HAS_WT(dev_priv)       ((IS_HASWELL(dev_priv) || \
-                                IS_BROADWELL(dev_priv)) && HAS_EDRAM(dev_priv))
+#define HAS_WT(dev_priv)       HAS_EDRAM(dev_priv)
 
 #define HWS_NEEDS_PHYSICAL(dev_priv)   
(INTEL_INFO(dev_priv)->hws_needs_physical)
 
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c 
b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 8f460cc4cc1f..038fbf52a997 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -3071,7 +3071,7 @@ static void cnl_setup_private_ppat(struct intel_ppat 
*ppat)
 
        __alloc_ppat_entry(ppat, 0, GEN8_PPAT_WB | GEN8_PPAT_LLC);
        __alloc_ppat_entry(ppat, 1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC);
-       __alloc_ppat_entry(ppat, 2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC);
+       __alloc_ppat_entry(ppat, 2, GEN8_PPAT_WB | GEN8_PPAT_ELLC_OVERRIDE);
        __alloc_ppat_entry(ppat, 3, GEN8_PPAT_UC);
        __alloc_ppat_entry(ppat, 4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | 
GEN8_PPAT_AGE(0));
        __alloc_ppat_entry(ppat, 5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | 
GEN8_PPAT_AGE(1));
@@ -3109,7 +3109,10 @@ static void bdw_setup_private_ppat(struct intel_ppat 
*ppat)
 
        __alloc_ppat_entry(ppat, 0, GEN8_PPAT_WB | GEN8_PPAT_LLC);      /* for 
normal objects, no eLLC */
        __alloc_ppat_entry(ppat, 1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC);  /* for 
something pointing to ptes? */
-       __alloc_ppat_entry(ppat, 2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC);  /* for 
scanout with eLLC */
+       if (INTEL_GEN(ppat->i915) >= 9)
+               __alloc_ppat_entry(ppat, 2, GEN8_PPAT_WB | 
GEN8_PPAT_ELLC_OVERRIDE); /* for scanout with eLLC */
+       else
+               __alloc_ppat_entry(ppat, 2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC); 
/* for scanout with eLLC */
        __alloc_ppat_entry(ppat, 3, GEN8_PPAT_UC);                      /* 
Uncached objects, mostly for scanout */
        __alloc_ppat_entry(ppat, 4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | 
GEN8_PPAT_AGE(0));
        __alloc_ppat_entry(ppat, 5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | 
GEN8_PPAT_AGE(1));
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h 
b/drivers/gpu/drm/i915/i915_gem_gtt.h
index f597f35b109b..47adc7268867 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -139,7 +139,7 @@ typedef u64 gen8_ppgtt_pml4e_t;
 #define PPAT_UNCACHED                  (_PAGE_PWT | _PAGE_PCD)
 #define PPAT_CACHED_PDE                        0 /* WB LLC */
 #define PPAT_CACHED                    _PAGE_PAT /* WB LLCeLLC */
-#define PPAT_DISPLAY_ELLC              _PAGE_PCD /* WT eLLC */
+#define PPAT_DISPLAY_ELLC              _PAGE_PCD /* WT LLCeLLC (HSW/BDW) or WB 
eLLC (SKL+) */
 
 #define CHV_PPAT_SNOOP                 (1<<6)
 #define GEN8_PPAT_AGE(x)               ((x)<<4)
diff --git a/drivers/gpu/drm/i915/intel_mocs.c 
b/drivers/gpu/drm/i915/intel_mocs.c
index 274ba78500c0..d984ccff94ef 100644
--- a/drivers/gpu/drm/i915/intel_mocs.c
+++ b/drivers/gpu/drm/i915/intel_mocs.c
@@ -115,7 +115,7 @@ struct drm_i915_mocs_table {
                   LE_1_UC | LE_TC_2_LLC_ELLC, \
                   L3_1_UC), \
        MOCS_ENTRY(I915_MOCS_PTE, \
-                  LE_0_PAGETABLE | LE_TC_2_LLC_ELLC | LE_LRUM(3), \
+                  LE_0_PAGETABLE | LE_TC_0_PAGETABLE | LE_LRUM(3), \
                   L3_3_WB)
 
 static const struct drm_i915_mocs_entry skylake_mocs_table[] = {
-- 
2.21.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to