If the object is coherent, we can simply update the cache domain on the
whole object rather than calculate the before/after clflushes. The
advantage is that we then get correct tracking of ellided flushes when
changing coherency later.

Testcase: igt/gem_pwrite_snooped
Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahti...@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h |  6 +++---
 drivers/gpu/drm/i915/i915_gem.c | 45 +++++++++++++++++++++--------------------
 2 files changed, 26 insertions(+), 25 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 3002996ddbed..8de104b63209 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3353,9 +3353,9 @@ int i915_gem_obj_prepare_shmem_read(struct 
drm_i915_gem_object *obj,
                                    unsigned int *needs_clflush);
 int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
                                     unsigned int *needs_clflush);
-#define CLFLUSH_BEFORE 0x1
-#define CLFLUSH_AFTER 0x2
-#define CLFLUSH_FLAGS (CLFLUSH_BEFORE | CLFLUSH_AFTER)
+#define CLFLUSH_BEFORE BIT(0)
+#define CLFLUSH_AFTER  BIT(1)
+#define CLFLUSH_FLAGS  (CLFLUSH_BEFORE | CLFLUSH_AFTER)
 
 static inline void
 i915_gem_obj_finish_shmem_access(struct drm_i915_gem_object *obj)
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index aca1eaddafb4..202bb850f260 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -788,6 +788,15 @@ int i915_gem_obj_prepare_shmem_read(struct 
drm_i915_gem_object *obj,
        if (ret)
                return ret;
 
+       if (i915_gem_object_is_coherent(obj) ||
+           !static_cpu_has(X86_FEATURE_CLFLUSH)) {
+               ret = i915_gem_object_set_to_cpu_domain(obj, false);
+               if (ret)
+                       goto err_unpin;
+               else
+                       goto out;
+       }
+
        i915_gem_object_flush_gtt_write_domain(obj);
 
        /* If we're not in the cpu read domain, set ourself into the gtt
@@ -796,16 +805,9 @@ int i915_gem_obj_prepare_shmem_read(struct 
drm_i915_gem_object *obj,
         * anyway again before the next pread happens.
         */
        if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU))
-               *needs_clflush = !i915_gem_object_is_coherent(obj);
-
-       if (*needs_clflush && !static_cpu_has(X86_FEATURE_CLFLUSH)) {
-               ret = i915_gem_object_set_to_cpu_domain(obj, false);
-               if (ret)
-                       goto err_unpin;
-
-               *needs_clflush = 0;
-       }
+               *needs_clflush = CLFLUSH_BEFORE;
 
+out:
        /* return with the pages pinned */
        return 0;
 
@@ -838,6 +840,15 @@ int i915_gem_obj_prepare_shmem_write(struct 
drm_i915_gem_object *obj,
        if (ret)
                return ret;
 
+       if (i915_gem_object_is_coherent(obj) ||
+           !static_cpu_has(X86_FEATURE_CLFLUSH)) {
+               ret = i915_gem_object_set_to_cpu_domain(obj, true);
+               if (ret)
+                       goto err_unpin;
+               else
+                       goto out;
+       }
+
        i915_gem_object_flush_gtt_write_domain(obj);
 
        /* If we're not in the cpu write domain, set ourself into the
@@ -846,25 +857,15 @@ int i915_gem_obj_prepare_shmem_write(struct 
drm_i915_gem_object *obj,
         * right away and we therefore have to clflush anyway.
         */
        if (obj->base.write_domain != I915_GEM_DOMAIN_CPU)
-               *needs_clflush |= cpu_write_needs_clflush(obj) << 1;
+               *needs_clflush |= CLFLUSH_AFTER;
 
        /* Same trick applies to invalidate partially written cachelines read
         * before writing.
         */
        if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU))
-               *needs_clflush |= !i915_gem_object_is_coherent(obj);
-
-       if (*needs_clflush && !static_cpu_has(X86_FEATURE_CLFLUSH)) {
-               ret = i915_gem_object_set_to_cpu_domain(obj, true);
-               if (ret)
-                       goto err_unpin;
-
-               *needs_clflush = 0;
-       }
-
-       if ((*needs_clflush & CLFLUSH_AFTER) == 0)
-               obj->cache_dirty = true;
+               *needs_clflush |= CLFLUSH_BEFORE;
 
+out:
        intel_fb_obj_invalidate(obj, ORIGIN_CPU);
        obj->mm.dirty = true;
        /* return with the pages pinned */
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to