Haswell GT3e has the unique feature of supporting Write-Through cacheing
of objects within the eLLC/LLC. The purpose of this is to enable the display
plane to remain coherent whilst objects lie resident in the eLLC/LLC - so
that we, in theory, get the best of both worlds, perfect display and fast
access.

v2: Actually do the clflush on transition to WT, nagging by Ville.

Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk>
Cc: Ville Syrjälä <ville.syrj...@linux.intel.com>
Cc: Kenneth Graunke <kenn...@whitecape.org>
---
 drivers/gpu/drm/i915/i915_dma.c     |  3 +++
 drivers/gpu/drm/i915/i915_drv.h     |  4 +++-
 drivers/gpu/drm/i915/i915_gem.c     | 29 +++++++++++++++--------------
 drivers/gpu/drm/i915/i915_gem_gtt.c | 11 ++++++++++-
 include/uapi/drm/i915_drm.h         |  1 +
 5 files changed, 32 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 8da0b3d..75989fc 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -976,6 +976,9 @@ static int i915_getparam(struct drm_device *dev, void *data,
        case I915_PARAM_HAS_LLC:
                value = HAS_LLC(dev);
                break;
+       case I915_PARAM_HAS_WT:
+               value = HAS_WT(dev);
+               break;
        case I915_PARAM_HAS_ALIASING_PPGTT:
                value = dev_priv->mm.aliasing_ppgtt ? 1 : 0;
                break;
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 34d2b9d..d27a82a 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -452,6 +452,7 @@ enum i915_cache_level {
        I915_CACHE_NONE = 0,
        I915_CACHE_LLC,
        I915_CACHE_LLC_MLC, /* gen6+, in docs at least! */
+       I915_CACHE_WT,
 };
 
 typedef uint32_t gen6_gtt_pte_t;
@@ -1344,7 +1345,7 @@ struct drm_i915_gem_object {
        unsigned int pending_fenced_gpu_access:1;
        unsigned int fenced_gpu_access:1;
 
-       unsigned int cache_level:2;
+       unsigned int cache_level:3;
 
        unsigned int has_aliasing_ppgtt_mapping:1;
        unsigned int has_global_gtt_mapping:1;
@@ -1547,6 +1548,7 @@ struct drm_i915_file_private {
 #define HAS_BLT(dev)            (INTEL_INFO(dev)->has_blt_ring)
 #define HAS_VEBOX(dev)          (INTEL_INFO(dev)->has_vebox_ring)
 #define HAS_LLC(dev)            (INTEL_INFO(dev)->has_llc)
+#define HAS_WT(dev)            (IS_HASWELL(dev) && ((struct drm_i915_private 
*)(dev)->dev_private)->ellc_size)
 #define I915_NEED_GFX_HWS(dev) (INTEL_INFO(dev)->need_gfx_hws)
 
 #define HAS_HW_CONTEXTS(dev)   (INTEL_INFO(dev)->gen >= 5)
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 99362f7..6b33494 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3447,27 +3447,27 @@ int i915_gem_object_set_cache_level(struct 
drm_i915_gem_object *obj,
                i915_gem_obj_ggtt_set_color(obj, cache_level);
        }
 
-       if (cache_level == I915_CACHE_NONE) {
-               u32 old_read_domains, old_write_domain;
-
+       if (cache_level == I915_CACHE_NONE ||
+           cache_level == I915_CACHE_WT) {
                /* If we're coming from LLC cached, then we haven't
                 * actually been tracking whether the data is in the
                 * CPU cache or not, since we only allow one bit set
                 * in obj->write_domain and have been skipping the clflushes.
-                * Just set it to the CPU cache for now.
+                * Do them now.
                 */
-               WARN_ON(obj->base.write_domain & ~I915_GEM_DOMAIN_CPU);
-               WARN_ON(obj->base.read_domains & ~I915_GEM_DOMAIN_CPU);
+               if (obj->pages && !obj->stolen) {
+                       u32 old_write_domain;
 
-               old_read_domains = obj->base.read_domains;
-               old_write_domain = obj->base.write_domain;
+                       trace_i915_gem_object_clflush(obj);
+                       drm_clflush_sg(obj->pages);
 
-               obj->base.read_domains = I915_GEM_DOMAIN_CPU;
-               obj->base.write_domain = I915_GEM_DOMAIN_CPU;
+                       old_write_domain = obj->base.write_domain;
+                       obj->base.write_domain &= ~I915_GEM_DOMAIN_CPU;
 
-               trace_i915_gem_object_change_domain(obj,
-                                                   old_read_domains,
-                                                   old_write_domain);
+                       trace_i915_gem_object_change_domain(obj,
+                                                           
obj->base.read_domains,
+                                                           old_write_domain);
+               }
        }
 
        obj->cache_level = cache_level;
@@ -3565,7 +3565,8 @@ i915_gem_object_pin_to_display_plane(struct 
drm_i915_gem_object *obj,
         * of uncaching, which would allow us to flush all the LLC-cached data
         * with that bit in the PTE to main memory with just one PIPE_CONTROL.
         */
-       ret = i915_gem_object_set_cache_level(obj, I915_CACHE_NONE);
+       ret = i915_gem_object_set_cache_level(obj,
+                                             HAS_WT(obj->base.dev) ? 
I915_CACHE_WT : I915_CACHE_NONE);
        if (ret)
                return ret;
 
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c 
b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 0522d00..072a348 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -54,6 +54,7 @@
                                         (((bits) & 0x8) << (11 - 3)))
 #define HSW_WB_LLC_AGE0                        HSW_CACHEABILITY_CONTROL(0x3)
 #define HSW_WB_ELLC_LLC_AGE0           HSW_CACHEABILITY_CONTROL(0xb)
+#define HSW_WT_ELLC_LLC_AGE0           HSW_CACHEABILITY_CONTROL(0x6)
 
 static gen6_gtt_pte_t gen6_pte_encode(dma_addr_t addr,
                                      enum i915_cache_level level)
@@ -116,8 +117,16 @@ static gen6_gtt_pte_t iris_pte_encode(dma_addr_t addr,
        gen6_gtt_pte_t pte = GEN6_PTE_VALID;
        pte |= HSW_PTE_ADDR_ENCODE(addr);
 
-       if (level != I915_CACHE_NONE)
+       switch (level) {
+       case I915_CACHE_NONE:
+               break;
+       case I915_CACHE_WT:
+               pte |= HSW_WT_ELLC_LLC_AGE0;
+               break;
+       default:
                pte |= HSW_WB_ELLC_LLC_AGE0;
+               break;
+       }
 
        return pte;
 }
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index e47cf00..e831292 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -338,6 +338,7 @@ typedef struct drm_i915_irq_wait {
 #define I915_PARAM_HAS_PINNED_BATCHES   24
 #define I915_PARAM_HAS_EXEC_NO_RELOC    25
 #define I915_PARAM_HAS_EXEC_HANDLE_LUT   26
+#define I915_PARAM_HAS_WT               27
 
 typedef struct drm_i915_getparam {
        int param;
-- 
1.8.3.2

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to