From: Jerome Glisse <jgli...@redhat.com>

Using 64bits fence sequence we can directly compare sequence
number to know if a fence is signaled or not. Thus the fence
list became useless, so does the fence lock that mainly
protected the fence list.

Things like ring.ready are no longer behind a lock, this should
be ok as ring.ready is initialized once and will only change
when facing lockup. Worst case is that we return an -EBUSY just
after a successfull GPU reset, or we go into wait state instead
of returning -EBUSY (thus delaying reporting -EBUSY to fence
wait caller).

Signed-off-by: Jerome Glisse <jgli...@redhat.com>
---
 drivers/gpu/drm/radeon/radeon.h        |    6 +-
 drivers/gpu/drm/radeon/radeon_device.c |    1 -
 drivers/gpu/drm/radeon/radeon_fence.c  |  278 ++++++++++++--------------------
 3 files changed, 102 insertions(+), 183 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 4c5a667..141aee2 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -286,15 +286,12 @@ struct radeon_fence_driver {
        uint64_t                        last_seq;
        unsigned long                   last_activity;
        wait_queue_head_t               queue;
-       struct list_head                emitted;
-       struct list_head                signaled;
        bool                            initialized;
 };
 
 struct radeon_fence {
        struct radeon_device            *rdev;
        struct kref                     kref;
-       struct list_head                list;
        /* protected by radeon_fence.lock */
        uint64_t                        seq;
        bool                            emitted;
@@ -316,7 +313,7 @@ int radeon_fence_wait_next(struct radeon_device *rdev, int 
ring);
 int radeon_fence_wait_empty(struct radeon_device *rdev, int ring);
 struct radeon_fence *radeon_fence_ref(struct radeon_fence *fence);
 void radeon_fence_unref(struct radeon_fence **fence);
-int radeon_fence_count_emitted(struct radeon_device *rdev, int ring);
+unsigned radeon_fence_count_emitted(struct radeon_device *rdev, int ring);
 
 /*
  * Tiling registers
@@ -1488,7 +1485,6 @@ struct radeon_device {
        struct radeon_mode_info         mode_info;
        struct radeon_scratch           scratch;
        struct radeon_mman              mman;
-       rwlock_t                        fence_lock;
        struct radeon_fence_driver      fence_drv[RADEON_NUM_RINGS];
        struct radeon_ring              ring[RADEON_NUM_RINGS];
        bool                            ib_pool_ready;
diff --git a/drivers/gpu/drm/radeon/radeon_device.c 
b/drivers/gpu/drm/radeon/radeon_device.c
index 3880aad..290b8d0 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -746,7 +746,6 @@ int radeon_device_init(struct radeon_device *rdev,
        mutex_init(&rdev->gem.mutex);
        mutex_init(&rdev->pm.mutex);
        mutex_init(&rdev->vram_mutex);
-       rwlock_init(&rdev->fence_lock);
        INIT_LIST_HEAD(&rdev->gem.objects);
        init_waitqueue_head(&rdev->irq.vblank_queue);
        init_waitqueue_head(&rdev->irq.idle_queue);
diff --git a/drivers/gpu/drm/radeon/radeon_fence.c 
b/drivers/gpu/drm/radeon/radeon_fence.c
index 6da1535..3f34f7b 100644
--- a/drivers/gpu/drm/radeon/radeon_fence.c
+++ b/drivers/gpu/drm/radeon/radeon_fence.c
@@ -65,32 +65,22 @@ static u64 radeon_fence_read(struct radeon_device *rdev, 
int ring)
 
 int radeon_fence_emit(struct radeon_device *rdev, struct radeon_fence *fence)
 {
-       unsigned long irq_flags;
-
-       write_lock_irqsave(&rdev->fence_lock, irq_flags);
        if (fence->emitted) {
-               write_unlock_irqrestore(&rdev->fence_lock, irq_flags);
                return 0;
        }
        fence->seq = atomic64_inc_return(&rdev->fence_drv[fence->ring].seq);
+       /* there is small chance that we overwritte a bigger last_emited
+        * value, but in normal usage this
+        */
        radeon_fence_ring_emit(rdev, fence->ring, fence);
        trace_radeon_fence_emit(rdev->ddev, fence->seq);
        fence->emitted = true;
-       /* are we the first fence on a previusly idle ring? */
-       if (list_empty(&rdev->fence_drv[fence->ring].emitted)) {
-               rdev->fence_drv[fence->ring].last_activity = jiffies;
-       }
-       list_move_tail(&fence->list, &rdev->fence_drv[fence->ring].emitted);
-       write_unlock_irqrestore(&rdev->fence_lock, irq_flags);
        return 0;
 }
 
-static bool radeon_fence_poll_locked(struct radeon_device *rdev, int ring)
+static bool radeon_fence_poll(struct radeon_device *rdev, int ring)
 {
-       struct radeon_fence *fence;
-       struct list_head *i, *n;
        uint64_t seq;
-       bool wake = false;
 
        seq = radeon_fence_read(rdev, ring);
        if (seq == rdev->fence_drv[ring].last_seq)
@@ -98,40 +88,15 @@ static bool radeon_fence_poll_locked(struct radeon_device 
*rdev, int ring)
 
        rdev->fence_drv[ring].last_seq = seq;
        rdev->fence_drv[ring].last_activity = jiffies;
-
-       n = NULL;
-       list_for_each(i, &rdev->fence_drv[ring].emitted) {
-               fence = list_entry(i, struct radeon_fence, list);
-               if (fence->seq == seq) {
-                       n = i;
-                       break;
-               }
-       }
-       /* all fence previous to this one are considered as signaled */
-       if (n) {
-               i = n;
-               do {
-                       n = i->prev;
-                       list_move_tail(i, &rdev->fence_drv[ring].signaled);
-                       fence = list_entry(i, struct radeon_fence, list);
-                       fence->signaled = true;
-                       i = n;
-               } while (i != &rdev->fence_drv[ring].emitted);
-               wake = true;
-       }
-       return wake;
+       return true;
 }
 
 static void radeon_fence_destroy(struct kref *kref)
 {
-       unsigned long irq_flags;
-        struct radeon_fence *fence;
+       struct radeon_fence *fence;
 
        fence = container_of(kref, struct radeon_fence, kref);
-       write_lock_irqsave(&fence->rdev->fence_lock, irq_flags);
-       list_del(&fence->list);
        fence->emitted = false;
-       write_unlock_irqrestore(&fence->rdev->fence_lock, irq_flags);
        if (fence->semaphore.sa_bo) {
                radeon_semaphore_free(fence->rdev, fence);
        }
@@ -153,78 +118,95 @@ int radeon_fence_create(struct radeon_device *rdev,
        (*fence)->seq = 0;
        (*fence)->ring = ring;
        (*fence)->semaphore.sa_bo = NULL;
-       INIT_LIST_HEAD(&(*fence)->list);
        return 0;
 }
 
 bool radeon_fence_signaled(struct radeon_fence *fence)
 {
-       unsigned long irq_flags;
-       bool signaled = false;
+       struct radeon_device *rdev;
 
-       if (!fence)
+       if (!fence) {
                return true;
+       }
 
-       write_lock_irqsave(&fence->rdev->fence_lock, irq_flags);
-       signaled = fence->signaled;
+       rdev = fence->rdev;
+       if (!fence->emitted) {
+               dev_warn(rdev->dev, "Querying an unemitted fence : %p !\n", 
fence);
+               fence->signaled = true;
+               return true;
+       }
+       if (fence->signaled) {
+               return true;
+       }
+       if (rdev->fence_drv[fence->ring].last_seq >= fence->seq) {
+               fence->signaled = true;
+               return true;
+       }
        /* if we are shuting down report all fence as signaled */
-       if (fence->rdev->shutdown) {
-               signaled = true;
+       if (rdev->shutdown) {
+               fence->signaled = true;
+               return true;
        }
-       if (!fence->emitted) {
-               WARN(1, "Querying an unemitted fence : %p !\n", fence);
-               signaled = true;
+       if (!fence->signaled) {
+               radeon_fence_poll(fence->rdev, fence->ring);
+               if (rdev->fence_drv[fence->ring].last_seq >= fence->seq) {
+                       fence->signaled = true;
+                       return true;
+               }
        }
-       if (!signaled) {
-               radeon_fence_poll_locked(fence->rdev, fence->ring);
-               signaled = fence->signaled;
+       return fence->signaled;
+}
+
+bool radeon_seq_signaled(struct radeon_device *rdev, u64 seq, unsigned ring)
+{
+       if (rdev->fence_drv[ring].last_seq >= seq) {
+               return true;
+       }
+       radeon_fence_poll(rdev, ring);
+       if (rdev->fence_drv[ring].last_seq >= seq) {
+               return true;
        }
-       write_unlock_irqrestore(&fence->rdev->fence_lock, irq_flags);
-       return signaled;
+       return false;
 }
 
-int radeon_fence_wait(struct radeon_fence *fence, bool intr)
+static int radeon_wait_seq(struct radeon_device *rdev, u64 target_seq,
+                          unsigned ring, bool intr)
 {
-       struct radeon_device *rdev;
-       unsigned long irq_flags, timeout;
-       u64 seq;
-       int i, r;
+       unsigned long timeout;
+       uint64_t seq;
        bool signaled;
+       int r;
 
-       if (fence == NULL) {
-               WARN(1, "Querying an invalid fence : %p !\n", fence);
-               return -EINVAL;
-       }
+       while (target_seq > rdev->fence_drv[ring].last_seq) {
+               if (!rdev->ring[ring].ready) {
+                       return -EBUSY;
+               }
 
-       rdev = fence->rdev;
-       signaled = radeon_fence_signaled(fence);
-       while (!signaled) {
-               read_lock_irqsave(&rdev->fence_lock, irq_flags);
                timeout = jiffies - RADEON_FENCE_JIFFIES_TIMEOUT;
-               if (time_after(rdev->fence_drv[fence->ring].last_activity, 
timeout)) {
+               if (time_after(rdev->fence_drv[ring].last_activity, timeout)) {
                        /* the normal case, timeout is somewhere before 
last_activity */
-                       timeout = rdev->fence_drv[fence->ring].last_activity - 
timeout;
+                       timeout = rdev->fence_drv[ring].last_activity - timeout;
                } else {
                        /* either jiffies wrapped around, or no fence was 
signaled in the last 500ms
-                        * anyway we will just wait for the minimum amount and 
then check for a lockup */
+                        * anyway we will just wait for the minimum amount and 
then check for a lockup
+                        */
                        timeout = 1;
                }
                /* save current sequence value used to check for GPU lockups */
-               seq = rdev->fence_drv[fence->ring].last_seq;
-               read_unlock_irqrestore(&rdev->fence_lock, irq_flags);
+               seq = rdev->fence_drv[ring].last_seq;
 
                trace_radeon_fence_wait_begin(rdev->ddev, seq);
-               radeon_irq_kms_sw_irq_get(rdev, fence->ring);
+               radeon_irq_kms_sw_irq_get(rdev, ring);
                if (intr) {
-                       r = wait_event_interruptible_timeout(
-                               rdev->fence_drv[fence->ring].queue,
-                               (signaled = radeon_fence_signaled(fence)), 
timeout);
+                       r = 
wait_event_interruptible_timeout(rdev->fence_drv[ring].queue,
+                                                            (signaled = 
radeon_seq_signaled(rdev, target_seq, ring)),
+                                                            timeout);
                } else {
-                       r = wait_event_timeout(
-                               rdev->fence_drv[fence->ring].queue,
-                               (signaled = radeon_fence_signaled(fence)), 
timeout);
+                       r = wait_event_timeout(rdev->fence_drv[ring].queue,
+                                              (signaled = 
radeon_seq_signaled(rdev, target_seq, ring)),
+                                              timeout);
                }
-               radeon_irq_kms_sw_irq_put(rdev, fence->ring);
+               radeon_irq_kms_sw_irq_put(rdev, ring);
                if (unlikely(r < 0)) {
                        return r;
                }
@@ -237,28 +219,18 @@ int radeon_fence_wait(struct radeon_fence *fence, bool 
intr)
                                continue;
                        }
 
-                       write_lock_irqsave(&rdev->fence_lock, irq_flags);
                        /* check if sequence value has changed since 
last_activity */
-                       if (seq != rdev->fence_drv[fence->ring].last_seq) {
-                               write_unlock_irqrestore(&rdev->fence_lock, 
irq_flags);
+                       if (seq != rdev->fence_drv[ring].last_seq) {
                                continue;
                        }
 
-                       /* change sequence value on all rings, so nobody else 
things there is a lockup */
-                       for (i = 0; i < RADEON_NUM_RINGS; ++i)
-                               rdev->fence_drv[i].last_seq -= 0x10000;
-
-                       rdev->fence_drv[fence->ring].last_activity = jiffies;
-                       write_unlock_irqrestore(&rdev->fence_lock, irq_flags);
-
-                       if (radeon_ring_is_lockup(rdev, fence->ring, 
&rdev->ring[fence->ring])) {
-
+                       if (radeon_ring_is_lockup(rdev, ring, 
&rdev->ring[ring])) {
                                /* good news we believe it's a lockup */
                                dev_warn(rdev->dev, "GPU lockup (waiting for 
0x%016llx last fence id 0x%016llx)\n",
-                                        fence->seq, seq);
+                                        target_seq, seq);
 
                                /* mark the ring as not ready any more */
-                               rdev->ring[fence->ring].ready = false;
+                               rdev->ring[ring].ready = false;
                                return -EDEADLK;
                        }
                }
@@ -266,52 +238,31 @@ int radeon_fence_wait(struct radeon_fence *fence, bool 
intr)
        return 0;
 }
 
-int radeon_fence_wait_next(struct radeon_device *rdev, int ring)
+int radeon_fence_wait(struct radeon_fence *fence, bool intr)
 {
-       unsigned long irq_flags;
-       struct radeon_fence *fence;
        int r;
 
-       write_lock_irqsave(&rdev->fence_lock, irq_flags);
-       if (!rdev->ring[ring].ready) {
-               write_unlock_irqrestore(&rdev->fence_lock, irq_flags);
-               return -EBUSY;
+       if (fence == NULL) {
+               WARN(1, "Querying an invalid fence : %p !\n", fence);
+               return -EINVAL;
        }
-       if (list_empty(&rdev->fence_drv[ring].emitted)) {
-               write_unlock_irqrestore(&rdev->fence_lock, irq_flags);
-               return -ENOENT;
+
+       r = radeon_wait_seq(fence->rdev, fence->seq, fence->ring, intr);
+       if (r) {
+               return r;
        }
-       fence = list_entry(rdev->fence_drv[ring].emitted.next,
-                          struct radeon_fence, list);
-       radeon_fence_ref(fence);
-       write_unlock_irqrestore(&rdev->fence_lock, irq_flags);
-       r = radeon_fence_wait(fence, false);
-       radeon_fence_unref(&fence);
-       return r;
+       fence->signaled = true;
+       return 0;
 }
 
-int radeon_fence_wait_empty(struct radeon_device *rdev, int ring)
+int radeon_fence_wait_next(struct radeon_device *rdev, int ring)
 {
-       unsigned long irq_flags;
-       struct radeon_fence *fence;
-       int r;
+       return radeon_wait_seq(rdev, rdev->fence_drv[ring].last_seq + 1ULL, 
ring, false);
+}
 
-       write_lock_irqsave(&rdev->fence_lock, irq_flags);
-       if (!rdev->ring[ring].ready) {
-               write_unlock_irqrestore(&rdev->fence_lock, irq_flags);
-               return -EBUSY;
-       }
-       if (list_empty(&rdev->fence_drv[ring].emitted)) {
-               write_unlock_irqrestore(&rdev->fence_lock, irq_flags);
-               return 0;
-       }
-       fence = list_entry(rdev->fence_drv[ring].emitted.prev,
-                          struct radeon_fence, list);
-       radeon_fence_ref(fence);
-       write_unlock_irqrestore(&rdev->fence_lock, irq_flags);
-       r = radeon_fence_wait(fence, false);
-       radeon_fence_unref(&fence);
-       return r;
+int radeon_fence_wait_empty(struct radeon_device *rdev, int ring)
+{
+       return radeon_wait_seq(rdev, atomic64_read(&rdev->fence_drv[ring].seq), 
ring, false);
 }
 
 struct radeon_fence *radeon_fence_ref(struct radeon_fence *fence)
@@ -332,47 +283,32 @@ void radeon_fence_unref(struct radeon_fence **fence)
 
 void radeon_fence_process(struct radeon_device *rdev, int ring)
 {
-       unsigned long irq_flags;
        bool wake;
 
-       write_lock_irqsave(&rdev->fence_lock, irq_flags);
-       wake = radeon_fence_poll_locked(rdev, ring);
-       write_unlock_irqrestore(&rdev->fence_lock, irq_flags);
+       wake = radeon_fence_poll(rdev, ring);
        if (wake) {
                wake_up_all(&rdev->fence_drv[ring].queue);
        }
 }
 
-int radeon_fence_count_emitted(struct radeon_device *rdev, int ring)
+unsigned radeon_fence_count_emitted(struct radeon_device *rdev, int ring)
 {
-       unsigned long irq_flags;
-       int not_processed = 0;
-
-       read_lock_irqsave(&rdev->fence_lock, irq_flags);
-       if (!rdev->fence_drv[ring].initialized) {
-               read_unlock_irqrestore(&rdev->fence_lock, irq_flags);
-               return 0;
-       }
+       uint64_t emitted;
 
-       if (!list_empty(&rdev->fence_drv[ring].emitted)) {
-               struct list_head *ptr;
-               list_for_each(ptr, &rdev->fence_drv[ring].emitted) {
-                       /* count up to 3, that's enought info */
-                       if (++not_processed >= 3)
-                               break;
-               }
+       radeon_fence_poll(rdev, ring);
+       emitted = atomic64_read(&rdev->fence_drv[ring].seq) - 
rdev->fence_drv[ring].last_seq;
+       /* to avoid 32bits warp around */
+       if (emitted > 0x10000000) {
+               emitted = 0x10000000;
        }
-       read_unlock_irqrestore(&rdev->fence_lock, irq_flags);
-       return not_processed;
+       return (unsigned)emitted;
 }
 
 int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring)
 {
-       unsigned long irq_flags;
        uint64_t index;
        int r;
 
-       write_lock_irqsave(&rdev->fence_lock, irq_flags);
        radeon_scratch_free(rdev, rdev->fence_drv[ring].scratch_reg, 2);
        if (rdev->wb.use_event) {
                rdev->fence_drv[ring].scratch_reg = 0;
@@ -381,7 +317,6 @@ int radeon_fence_driver_start_ring(struct radeon_device 
*rdev, int ring)
                r = radeon_scratch_get(rdev, 
&rdev->fence_drv[ring].scratch_reg, 2);
                if (r) {
                        dev_err(rdev->dev, "fence failed to get scratch 
register\n");
-                       write_unlock_irqrestore(&rdev->fence_lock, irq_flags);
                        return r;
                }
                index = RADEON_WB_SCRATCH_OFFSET +
@@ -392,9 +327,8 @@ int radeon_fence_driver_start_ring(struct radeon_device 
*rdev, int ring)
        rdev->fence_drv[ring].gpu_addr = rdev->wb.gpu_addr + index;
        radeon_fence_write(rdev, atomic64_read(&rdev->fence_drv[ring].seq), 
ring);
        rdev->fence_drv[ring].initialized = true;
-       DRM_INFO("fence driver on ring %d use gpu addr 0x%08Lx and cpu addr 
0x%p\n",
+       dev_info(rdev->dev, "fence driver on ring %d use gpu addr 0x%08Lx and 
cpu addr 0x%p\n",
                 ring, rdev->fence_drv[ring].gpu_addr, 
rdev->fence_drv[ring].cpu_addr);
-       write_unlock_irqrestore(&rdev->fence_lock, irq_flags);
        return 0;
 }
 
@@ -403,23 +337,20 @@ static void radeon_fence_driver_init_ring(struct 
radeon_device *rdev, int ring)
        rdev->fence_drv[ring].scratch_reg = -1;
        rdev->fence_drv[ring].cpu_addr = NULL;
        rdev->fence_drv[ring].gpu_addr = 0;
+       rdev->fence_drv[ring].last_activity = jiffies;
        atomic64_set(&rdev->fence_drv[ring].seq, 0);
-       INIT_LIST_HEAD(&rdev->fence_drv[ring].emitted);
-       INIT_LIST_HEAD(&rdev->fence_drv[ring].signaled);
+       rdev->fence_drv[ring].last_seq = 0;
        init_waitqueue_head(&rdev->fence_drv[ring].queue);
        rdev->fence_drv[ring].initialized = false;
 }
 
 int radeon_fence_driver_init(struct radeon_device *rdev)
 {
-       unsigned long irq_flags;
        int ring;
 
-       write_lock_irqsave(&rdev->fence_lock, irq_flags);
        for (ring = 0; ring < RADEON_NUM_RINGS; ring++) {
                radeon_fence_driver_init_ring(rdev, ring);
        }
-       write_unlock_irqrestore(&rdev->fence_lock, irq_flags);
        if (radeon_debugfs_fence_init(rdev)) {
                dev_err(rdev->dev, "fence debugfs file creation failed\n");
        }
@@ -428,7 +359,6 @@ int radeon_fence_driver_init(struct radeon_device *rdev)
 
 void radeon_fence_driver_fini(struct radeon_device *rdev)
 {
-       unsigned long irq_flags;
        int ring;
 
        for (ring = 0; ring < RADEON_NUM_RINGS; ring++) {
@@ -436,9 +366,7 @@ void radeon_fence_driver_fini(struct radeon_device *rdev)
                        continue;
                radeon_fence_wait_empty(rdev, ring);
                wake_up_all(&rdev->fence_drv[ring].queue);
-               write_lock_irqsave(&rdev->fence_lock, irq_flags);
                radeon_scratch_free(rdev, rdev->fence_drv[ring].scratch_reg, 2);
-               write_unlock_irqrestore(&rdev->fence_lock, irq_flags);
                rdev->fence_drv[ring].initialized = false;
        }
 }
@@ -453,7 +381,6 @@ static int radeon_debugfs_fence_info(struct seq_file *m, 
void *data)
        struct drm_info_node *node = (struct drm_info_node *)m->private;
        struct drm_device *dev = node->minor->dev;
        struct radeon_device *rdev = dev->dev_private;
-       struct radeon_fence *fence;
        int i;
 
        for (i = 0; i < RADEON_NUM_RINGS; ++i) {
@@ -461,13 +388,10 @@ static int radeon_debugfs_fence_info(struct seq_file *m, 
void *data)
                        continue;
 
                seq_printf(m, "--- ring %d ---\n", i);
-               seq_printf(m, "Last signaled fence 0x%016llx\n", 
radeon_fence_read(rdev, i));
-               if (!list_empty(&rdev->fence_drv[i].emitted)) {
-                       fence = list_entry(rdev->fence_drv[i].emitted.prev,
-                                          struct radeon_fence, list);
-                       seq_printf(m, "Last emitted fence %p with 0x%016llx\n",
-                                  fence,  fence->seq);
-               }
+               seq_printf(m, "Last signaled 0x%016llx\n",
+                          radeon_fence_read(rdev, i));
+               seq_printf(m, "Last emitted  0x%016lx\n",
+                          atomic64_read(&rdev->fence_drv[i].seq));
        }
        return 0;
 }
-- 
1.7.7.6

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/dri-devel

Reply via email to