Use the semaphore mechanism to make this happen, this uses signaling
from the cpu instead of signaling by the gpu.

Signed-off-by: Maarten Lankhorst <maarten.lankhorst at canonical.com>
---
 drivers/gpu/drm/radeon/radeon.h           |  17 ++-
 drivers/gpu/drm/radeon/radeon_cs.c        |  30 ++---
 drivers/gpu/drm/radeon/radeon_fence.c     |  13 ++-
 drivers/gpu/drm/radeon/radeon_semaphore.c | 184 ++++++++++++++++++++++++++++++
 4 files changed, 221 insertions(+), 23 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index dddb2b7dd752..cd18fa7f801c 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -359,6 +359,11 @@ struct radeon_fence_driver {
        struct delayed_work             lockup_work;
 };

+struct radeon_fence_cb {
+       struct fence_cb base;
+       struct fence *fence;
+};
+
 struct radeon_fence {
        struct fence base;

@@ -368,6 +373,10 @@ struct radeon_fence {
        unsigned                        ring;

        wait_queue_t                    fence_wake;
+
+       atomic_t                        num_cpu_cbs;
+       struct radeon_fence_cb          *cpu_cbs;
+       uint32_t                        *cpu_sema;
 };

 int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring);
@@ -574,9 +583,11 @@ int radeon_mode_dumb_mmap(struct drm_file *filp,
  */
 struct radeon_semaphore {
        struct radeon_sa_bo             *sa_bo;
-       signed                          waiters;
+       signed                          waiters, cpu_waiters, cpu_waiters_max;
        uint64_t                        gpu_addr;
        struct radeon_fence             *sync_to[RADEON_NUM_RINGS];
+       uint32_t                        *cpu_sema;
+       struct radeon_fence_cb          *cpu_cbs;
 };

 int radeon_semaphore_create(struct radeon_device *rdev,
@@ -587,6 +598,10 @@ bool radeon_semaphore_emit_wait(struct radeon_device 
*rdev, int ring,
                                struct radeon_semaphore *semaphore);
 void radeon_semaphore_sync_to(struct radeon_semaphore *semaphore,
                              struct radeon_fence *fence);
+int radeon_semaphore_sync_obj(struct radeon_device *rdev,
+                             struct radeon_semaphore *semaphore,
+                             struct reservation_object *resv);
+
 int radeon_semaphore_sync_rings(struct radeon_device *rdev,
                                struct radeon_semaphore *semaphore,
                                int waiting_ring);
diff --git a/drivers/gpu/drm/radeon/radeon_cs.c 
b/drivers/gpu/drm/radeon/radeon_cs.c
index 8ad4e2cfae15..b141f5bd029d 100644
--- a/drivers/gpu/drm/radeon/radeon_cs.c
+++ b/drivers/gpu/drm/radeon/radeon_cs.c
@@ -250,32 +250,16 @@ static int radeon_cs_get_ring(struct radeon_cs_parser *p, 
u32 ring, s32 priority

 static int radeon_cs_sync_rings(struct radeon_cs_parser *p)
 {
-       int i;
-
-       for (i = 0; i < p->nrelocs; i++) {
-               struct reservation_object *resv;
-               struct fence *fence;
-               struct radeon_fence *rfence;
-               int r;
+       int i, ret = 0;

+       for (i = 0; !ret && i < p->nrelocs; i++) {
                if (!p->relocs[i].robj)
                        continue;

-               resv = p->relocs[i].robj->tbo.resv;
-               fence = reservation_object_get_excl(resv);
-               if (!fence)
-                       continue;
-               rfence = to_radeon_fence(fence);
-               if (!rfence || rfence->rdev != p->rdev) {
-                       r = fence_wait(fence, true);
-                       if (r)
-                               return r;
-                       continue;
-               }
-
-               radeon_semaphore_sync_to(p->ib.semaphore, rfence);
+               ret = radeon_semaphore_sync_obj(p->rdev, p->ib.semaphore,
+                                               p->relocs[i].robj->tbo.resv);
        }
-       return 0;
+       return ret;
 }

 /* XXX: note that this is called from the legacy UMS CS ioctl as well */
@@ -442,6 +426,10 @@ static void radeon_cs_parser_fini(struct radeon_cs_parser 
*parser, int error, bo
                 */
                list_sort(NULL, &parser->validated, cmp_size_smaller_first);

+               /* must be called with all reservation_objects still held */
+               radeon_semaphore_free(parser->rdev, &parser->ib.semaphore,
+                                     parser->ib.fence);
+
                ttm_eu_fence_buffer_objects(&parser->ticket,
                                            &parser->validated,
                                            &parser->ib.fence->base);
diff --git a/drivers/gpu/drm/radeon/radeon_fence.c 
b/drivers/gpu/drm/radeon/radeon_fence.c
index 0262fe2580d2..7687a7f8f41b 100644
--- a/drivers/gpu/drm/radeon/radeon_fence.c
+++ b/drivers/gpu/drm/radeon/radeon_fence.c
@@ -142,6 +142,8 @@ int radeon_fence_emit(struct radeon_device *rdev,
        (*fence)->ring = ring;
        fence_init(&(*fence)->base, &radeon_fence_ops,
                   &rdev->fence_queue.lock, rdev->fence_context + ring, seq);
+       (*fence)->cpu_cbs = NULL;
+       (*fence)->cpu_sema = NULL;
        radeon_fence_ring_emit(rdev, ring, *fence);
        trace_radeon_fence_emit(rdev->ddev, ring, (*fence)->seq);
        radeon_fence_schedule_check(rdev, ring);
@@ -1057,11 +1059,20 @@ static signed long radeon_fence_default_wait(struct 
fence *f, bool intr,
        return t;
 }

+static void __radeon_fence_destroy(struct fence *f)
+{
+       struct radeon_fence *fence = to_radeon_fence(f);
+
+       WARN_ON(fence->cpu_cbs);
+       kfree(fence->cpu_cbs);
+       fence_free(f);
+}
+
 const struct fence_ops radeon_fence_ops = {
        .get_driver_name = radeon_fence_get_driver_name,
        .get_timeline_name = radeon_fence_get_timeline_name,
        .enable_signaling = radeon_fence_enable_signaling,
        .signaled = radeon_fence_is_signaled,
        .wait = radeon_fence_default_wait,
-       .release = NULL,
+       .release = __radeon_fence_destroy,
 };
diff --git a/drivers/gpu/drm/radeon/radeon_semaphore.c 
b/drivers/gpu/drm/radeon/radeon_semaphore.c
index 56d9fd66d8ae..2e71463d11c5 100644
--- a/drivers/gpu/drm/radeon/radeon_semaphore.c
+++ b/drivers/gpu/drm/radeon/radeon_semaphore.c
@@ -30,6 +30,7 @@
 #include <drm/drmP.h>
 #include "radeon.h"
 #include "radeon_trace.h"
+#include <trace/events/fence.h>

 int radeon_semaphore_create(struct radeon_device *rdev,
                            struct radeon_semaphore **semaphore)
@@ -49,7 +50,11 @@ int radeon_semaphore_create(struct radeon_device *rdev,
                return r;
        }
        (*semaphore)->waiters = 0;
+       (*semaphore)->cpu_waiters = 0;
+       (*semaphore)->cpu_waiters_max = 0;
        (*semaphore)->gpu_addr = radeon_sa_bo_gpu_addr((*semaphore)->sa_bo);
+       (*semaphore)->cpu_sema = NULL;
+       (*semaphore)->cpu_cbs = NULL;

        cpu_addr = radeon_sa_bo_cpu_addr((*semaphore)->sa_bo);
        for (i = 0; i < RADEON_NUM_SYNCS; ++i)
@@ -115,6 +120,101 @@ void radeon_semaphore_sync_to(struct radeon_semaphore 
*semaphore,
         semaphore->sync_to[fence->ring] = radeon_fence_later(fence, other);
 }

+int radeon_semaphore_reserve_cpu_waiters(struct radeon_semaphore *semaphore, 
int add)
+{
+       int max = 4;
+       struct radeon_fence_cb *cpu_cbs;
+
+       if (semaphore->cpu_waiters + add <= semaphore->cpu_waiters_max)
+               return 0;
+
+       if (semaphore->cpu_waiters_max)
+               max = semaphore->cpu_waiters_max * 2;
+
+       cpu_cbs = krealloc(semaphore->cpu_cbs, max * sizeof(*cpu_cbs), 
GFP_KERNEL);
+       if (!cpu_cbs)
+               return -ENOMEM;
+       semaphore->cpu_cbs = cpu_cbs;
+       semaphore->cpu_waiters_max = max;
+       return 0;
+}
+
+static void radeon_semaphore_add_cpu_cb(struct radeon_semaphore *semaphore,
+                                       struct fence *fence)
+{
+       unsigned i;
+       struct radeon_fence_cb *empty = NULL;
+
+       for (i = 0; i < semaphore->cpu_waiters; ++i) {
+               struct fence *other = semaphore->cpu_cbs[i].fence;
+
+               if (!other)
+                       empty = &semaphore->cpu_cbs[i];
+               else if (other->context == fence->context) {
+                       semaphore->cpu_cbs[i].fence = fence_later(other, fence);
+                       return;
+               }
+       }
+
+       if (!empty)
+               empty = &semaphore->cpu_cbs[semaphore->cpu_waiters++];
+
+       empty->fence = fence;
+       return;
+}
+
+/**
+ * radeon_semaphore_sync_obj - use the semaphore to sync to a bo
+ *
+ * @semaphore: semaphore object to add fence to
+ * @resv: the reservation_object to sync to
+ *
+ * Sync the reservation_object using this semaphore.
+ *
+ * radeon_semaphore_free must be called with all reservation_object locks
+ * still held!!!
+ */
+int radeon_semaphore_sync_obj(struct radeon_device *rdev,
+                             struct radeon_semaphore *semaphore,
+                             struct reservation_object *resv)
+{
+       struct fence *fence;
+       struct radeon_fence *rfence;
+       struct reservation_object_list *fobj;
+       int ret, i;
+
+       fobj = reservation_object_get_list(resv);
+       if (fobj && fobj->shared_count) {
+               ret = radeon_semaphore_reserve_cpu_waiters(semaphore, 
fobj->shared_count);
+               if (ret)
+                       return ret;
+               for (i = 0; i < fobj->shared_count; ++i) {
+                       fence = rcu_dereference_protected(fobj->shared[i],
+                                               reservation_object_held(resv));
+
+                       radeon_semaphore_add_cpu_cb(semaphore, fence);
+               }
+               return 0;
+       }
+
+       fence = reservation_object_get_excl(resv);
+       if (!fence)
+               return 0;
+
+       rfence = to_radeon_fence(fence);
+       if (rfence && rfence->rdev == rdev) {
+               struct radeon_fence *other = semaphore->sync_to[rfence->ring];
+
+               semaphore->sync_to[rfence->ring] =
+                       radeon_fence_later(rfence, other);
+               return 0;
+       }
+       ret = radeon_semaphore_reserve_cpu_waiters(semaphore, 1);
+       if (!ret)
+               radeon_semaphore_add_cpu_cb(semaphore, fence);
+       return ret;
+}
+
 /**
  * radeon_semaphore_sync_rings - sync ring to all registered fences
  *
@@ -124,6 +224,8 @@ void radeon_semaphore_sync_to(struct radeon_semaphore 
*semaphore,
  *
  * Ensure that all registered fences are signaled before letting
  * the ring continue. The caller must hold the ring lock.
+ *
+ * This function may only be called once on a semaphore.
  */
 int radeon_semaphore_sync_rings(struct radeon_device *rdev,
                                struct radeon_semaphore *semaphore,
@@ -132,6 +234,16 @@ int radeon_semaphore_sync_rings(struct radeon_device *rdev,
        unsigned count = 0;
        int i, r;

+       if (semaphore->cpu_waiters) {
+               /* allocate enough space for sync command */
+               if (radeon_semaphore_emit_wait(rdev, ring, semaphore)) {
+                       semaphore->cpu_sema = 
radeon_sa_bo_cpu_addr(semaphore->sa_bo);
+                       semaphore->gpu_addr += 8;
+                       ++count;
+               } else
+                       semaphore->cpu_waiters = -1;
+       }
+
         for (i = 0; i < RADEON_NUM_RINGS; ++i) {
                struct radeon_fence *fence = semaphore->sync_to[i];

@@ -188,6 +300,68 @@ int radeon_semaphore_sync_rings(struct radeon_device *rdev,
        return 0;
 }

+static void radeon_semaphore_cpu_trigger(struct fence *other_fence,
+                                        struct fence_cb *fence_cb)
+{
+       struct radeon_fence_cb *cb = (struct radeon_fence_cb*)fence_cb;
+       struct radeon_fence *fence = (struct radeon_fence *)cb->fence;
+
+#ifdef CONFIG_FENCE_TRACE
+       int ret = atomic_dec_return(&fence->num_cpu_cbs);
+
+       if (ret)
+               FENCE_TRACE(&fence->base, "triggered from %u#%u, %i 
remaining\n",
+                           ret, other_fence->context, other_fence->seqno);
+       else
+#else
+       if (atomic_dec_and_test(&fence->num_cpu_cbs))
+#endif
+       {
+               FENCE_TRACE(&fence->base, "triggered from %u#%u, starting 
work\n",
+                           other_fence->context, other_fence->seqno);
+
+               *fence->cpu_sema = ~0;
+
+               kfree(fence->cpu_cbs);
+               fence->cpu_cbs = NULL;
+       }
+}
+
+static void radeon_semaphore_arm_cpu_cbs(struct radeon_semaphore *semaphore,
+                                        struct radeon_fence *fence)
+{
+       unsigned i, skipped = 0;
+
+       fence->cpu_cbs = semaphore->cpu_cbs;
+       fence->cpu_sema = semaphore->cpu_sema;
+       atomic_set(&fence->num_cpu_cbs, semaphore->cpu_waiters);
+
+       for (i = 0; i < semaphore->cpu_waiters; ++i) {
+               struct fence *other = fence->cpu_cbs[i].fence;
+
+               if (other) {
+                       fence->cpu_cbs[i].fence = &fence->base;
+                       trace_fence_annotate_wait_on(&fence->base, other);
+
+                       FENCE_TRACE(&fence->base, "queued wait on %u#%u\n",
+                                   other->context, other->seqno);
+
+                       if (!fence_add_callback(other, &fence->cpu_cbs[i].base,
+                                               radeon_semaphore_cpu_trigger))
+                               continue;
+               }
+               skipped++;
+       }
+
+       if (skipped && atomic_sub_and_test(skipped, &fence->num_cpu_cbs)) {
+               FENCE_TRACE(&fence->base, "No triggers, starting..\n");
+
+               *fence->cpu_sema = ~0;
+               kfree(fence->cpu_cbs);
+               fence->cpu_cbs = NULL;
+       }
+}
+
 void radeon_semaphore_free(struct radeon_device *rdev,
                           struct radeon_semaphore **semaphore,
                           struct radeon_fence *fence)
@@ -195,6 +369,16 @@ void radeon_semaphore_free(struct radeon_device *rdev,
        if (semaphore == NULL || *semaphore == NULL) {
                return;
        }
+       if ((*semaphore)->cpu_cbs) {
+               (*semaphore)->waiters--;
+
+               if (!fence) {
+                       *(*semaphore)->cpu_sema = ~0U;
+                       kfree((*semaphore)->cpu_cbs);
+               } else
+                       radeon_semaphore_arm_cpu_cbs(*semaphore, fence);
+       }
+
        if ((*semaphore)->waiters > 0) {
                dev_err(rdev->dev, "semaphore %p has more waiters than 
signalers,"
                        " hardware lockup imminent!\n", *semaphore);
-- 
2.0.4


Reply via email to