Since we don't differentiate on the different GPU read domains, it
should be safe to allow back to back reads to occur without issuing a
wait (or flush in the non-semaphore case).

This has the unfortunate side effect that we need to keep track of all
the outstanding buffer reads so that we can synchronize on a write, to
another ring (since we don't know which read finishes first). In other
words, the code is quite simple for two rings, but gets more tricky for
> 2 rings.

Here is a picture of the solution to the above problem

Ring 0            Ring 1             Ring 2
batch 0           batch 1            batch 2
  read buffer A     read buffer A      wait batch 0
                                       wait batch 1
                                       write buffer A

This code is really untested. I'm hoping for some feedback if this is
worth cleaning up, and testing more thoroughly.

Cc: Chris Wilson <ch...@chris-wilson.co.uk>
Cc: Daniel Vetter <daniel.vet...@ffwll.ch>
Signed-off-by: Ben Widawsky <b...@bwidawsk.net>
---
 drivers/gpu/drm/i915/i915_drv.h            |    1 +
 drivers/gpu/drm/i915/i915_gem.c            |    1 +
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |   32 ++++++++++++++++++++++-----
 3 files changed, 28 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 4a9c1b9..d59bf20 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -852,6 +852,7 @@ struct drm_i915_gem_object {
 
        /** Breadcrumb of last rendering to the buffer. */
        uint32_t last_rendering_seqno;
+       uint32_t lrs_ro[I915_NUM_RINGS];
        struct intel_ring_buffer *ring;
 
        /** Breadcrumb of last fenced GPU access to the buffer. */
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index ed0b68f..64cc804 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1561,6 +1561,7 @@ i915_gem_object_move_off_active(struct 
drm_i915_gem_object *obj)
 {
        list_del_init(&obj->ring_list);
        obj->last_rendering_seqno = 0;
+       memset(obj->lrs_ro, 0, sizeof(obj->lrs_ro));
 }
 
 static void
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 42a6529..78090a3 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -752,21 +752,20 @@ i915_gem_execbuffer_sync_rings(struct drm_i915_gem_object 
*obj,
 {
        struct intel_ring_buffer *from = obj->ring;
        u32 seqno;
-       int ret, idx;
+       int ret, idx, i;
 
        if (from == NULL || to == from)
                return 0;
 
-       /* If the object isn't being written to, and the object isn't moving
-        * into a GPU write domain, it doesn't need to sync.
-        */
        if (obj->pending_gpu_write == 0 &&
-           obj->base.pending_write_domain == 0)
+           obj->base.pending_write_domain == 0) {
+               obj->lrs_ro[ffs(to->id) - 1] = obj->last_rendering_seqno;
                return 0;
+       }
 
        /* XXX gpu semaphores are implicated in various hard hangs on SNB */
        if (INTEL_INFO(obj->base.dev)->gen < 6 || !i915_semaphores)
-               return i915_gem_object_wait_rendering(obj);
+               ret = i915_gem_object_wait_rendering(obj);
 
        idx = intel_ring_sync_index(from, to);
 
@@ -792,6 +791,27 @@ i915_gem_execbuffer_sync_rings(struct drm_i915_gem_object 
*obj,
 
        from->sync_seqno[idx] = seqno;
 
+       /* We must be doing a write to an object which may be in queue to be
+        * read by some other command. If we skipped synchronizing because of a 
read
+        * only request, we must check and add a sync point now. The reason we
+        * must sync on the write is because we have more than 2 rings. If two
+        * rings have outstanding reads (without sync), we can't let the write
+        * occur until both other have completed.
+        */
+       for (i = 0; i < I915_NUM_RINGS; i++) {
+               uint32_t ro_seqno = obj->lrs_ro[i];
+               if (ro_seqno != 0) {
+                       obj->lrs_ro[i] = 0;
+
+                       if (i915_seqno_passed(from->get_seqno(from), ro_seqno))
+                               continue;
+
+                       ret = to->sync_to(to, from, ro_seqno - 1);
+                       if (ret)
+                               return ret;
+               }
+       }
+
        return to->sync_to(to, from, seqno - 1);
 }
 
-- 
1.7.8

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to