Be consistent, and even when we know we had used a WC, flush the mapped
object after writing into it. The flush understands the mapping type and
will only clflush if !I915_MAP_WC, but will always insert a wmb [sfence]
so that we can be sure that all writes are visible.

v2: Add the unconditional wmb so we are know that we always flush the
writes to memory/HW at that point.

Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuopp...@linux.intel.com>
---
 drivers/gpu/drm/i915/gem/i915_gem_object_blt.c          | 8 ++++++--
 drivers/gpu/drm/i915/gem/i915_gem_pages.c               | 1 +
 drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c | 2 ++
 drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c      | 2 ++
 drivers/gpu/drm/i915/gt/selftest_ring_submission.c      | 2 ++
 drivers/gpu/drm/i915/gt/selftest_rps.c                  | 2 ++
 drivers/gpu/drm/i915/selftests/i915_request.c           | 9 +++++++--
 7 files changed, 22 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c 
b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c
index 2fc7737ef5f4..f457d7130491 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c
@@ -78,10 +78,12 @@ struct i915_vma *intel_emit_vma_fill_blt(struct 
intel_context *ce,
        } while (rem);
 
        *cmd = MI_BATCH_BUFFER_END;
-       intel_gt_chipset_flush(ce->vm->gt);
 
+       i915_gem_object_flush_map(pool->obj);
        i915_gem_object_unpin_map(pool->obj);
 
+       intel_gt_chipset_flush(ce->vm->gt);
+
        batch = i915_vma_instance(pool->obj, ce->vm, NULL);
        if (IS_ERR(batch)) {
                err = PTR_ERR(batch);
@@ -289,10 +291,12 @@ struct i915_vma *intel_emit_vma_copy_blt(struct 
intel_context *ce,
        } while (rem);
 
        *cmd = MI_BATCH_BUFFER_END;
-       intel_gt_chipset_flush(ce->vm->gt);
 
+       i915_gem_object_flush_map(pool->obj);
        i915_gem_object_unpin_map(pool->obj);
 
+       intel_gt_chipset_flush(ce->vm->gt);
+
        batch = i915_vma_instance(pool->obj, ce->vm, NULL);
        if (IS_ERR(batch)) {
                err = PTR_ERR(batch);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c 
b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
index 5d855fcd5c0f..189efcd58942 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
@@ -391,6 +391,7 @@ void __i915_gem_object_flush_map(struct drm_i915_gem_object 
*obj,
        GEM_BUG_ON(range_overflows_t(typeof(obj->base.size),
                                     offset, size, obj->base.size));
 
+       wmb(); /* let all previous writes be visible to HW */
        obj->mm.dirty = true;
 
        if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE)
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c 
b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
index 3f6079e1dfb6..87d7d8aa080f 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
@@ -158,6 +158,8 @@ static int wc_set(struct context *ctx, unsigned long 
offset, u32 v)
                return PTR_ERR(map);
 
        map[offset / sizeof(*map)] = v;
+
+       __i915_gem_object_flush_map(ctx->obj, offset, sizeof(*map));
        i915_gem_object_unpin_map(ctx->obj);
 
        return 0;
diff --git a/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c 
b/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c
index 772d8cba7da9..226b5fa9b430 100644
--- a/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c
+++ b/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c
@@ -83,6 +83,8 @@ igt_emit_store_dw(struct i915_vma *vma,
                offset += PAGE_SIZE;
        }
        *cmd = MI_BATCH_BUFFER_END;
+
+       i915_gem_object_flush_map(obj);
        i915_gem_object_unpin_map(obj);
 
        intel_gt_chipset_flush(vma->vm->gt);
diff --git a/drivers/gpu/drm/i915/gt/selftest_ring_submission.c 
b/drivers/gpu/drm/i915/gt/selftest_ring_submission.c
index 9995faadd7e8..3350e7c995bc 100644
--- a/drivers/gpu/drm/i915/gt/selftest_ring_submission.c
+++ b/drivers/gpu/drm/i915/gt/selftest_ring_submission.c
@@ -54,6 +54,8 @@ static struct i915_vma *create_wally(struct intel_engine_cs 
*engine)
        *cs++ = STACK_MAGIC;
 
        *cs++ = MI_BATCH_BUFFER_END;
+
+       i915_gem_object_flush_map(obj);
        i915_gem_object_unpin_map(obj);
 
        vma->private = intel_context_create(engine); /* dummy residuals */
diff --git a/drivers/gpu/drm/i915/gt/selftest_rps.c 
b/drivers/gpu/drm/i915/gt/selftest_rps.c
index bfa1a15564f7..6275d69aa9cc 100644
--- a/drivers/gpu/drm/i915/gt/selftest_rps.c
+++ b/drivers/gpu/drm/i915/gt/selftest_rps.c
@@ -727,6 +727,7 @@ int live_rps_frequency_cs(void *arg)
 
 err_vma:
                *cancel = MI_BATCH_BUFFER_END;
+               i915_gem_object_flush_map(vma->obj);
                i915_gem_object_unpin_map(vma->obj);
                i915_vma_unpin(vma);
                i915_vma_put(vma);
@@ -868,6 +869,7 @@ int live_rps_frequency_srm(void *arg)
 
 err_vma:
                *cancel = MI_BATCH_BUFFER_END;
+               i915_gem_object_flush_map(vma->obj);
                i915_gem_object_unpin_map(vma->obj);
                i915_vma_unpin(vma);
                i915_vma_put(vma);
diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c 
b/drivers/gpu/drm/i915/selftests/i915_request.c
index 15b1ca9f7a01..c191976e1d15 100644
--- a/drivers/gpu/drm/i915/selftests/i915_request.c
+++ b/drivers/gpu/drm/i915/selftests/i915_request.c
@@ -816,10 +816,12 @@ static int recursive_batch_resolve(struct i915_vma *batch)
                return PTR_ERR(cmd);
 
        *cmd = MI_BATCH_BUFFER_END;
-       intel_gt_chipset_flush(batch->vm->gt);
 
+       __i915_gem_object_flush_map(batch->obj, 0, sizeof(*cmd));
        i915_gem_object_unpin_map(batch->obj);
 
+       intel_gt_chipset_flush(batch->vm->gt);
+
        return 0;
 }
 
@@ -1060,9 +1062,12 @@ static int live_sequential_engines(void *arg)
                                              I915_MAP_WC);
                if (!IS_ERR(cmd)) {
                        *cmd = MI_BATCH_BUFFER_END;
-                       intel_gt_chipset_flush(engine->gt);
 
+                       __i915_gem_object_flush_map(request[idx]->batch->obj,
+                                                   0, sizeof(*cmd));
                        i915_gem_object_unpin_map(request[idx]->batch->obj);
+
+                       intel_gt_chipset_flush(engine->gt);
                }
 
                i915_vma_put(request[idx]->batch);
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to