From: Jerome Glisse <jgli...@redhat.com> Using 64bits fence sequence we can directly compare sequence number to know if a fence is signaled or not. Thus the fence list became useless, so does the fence lock that mainly protected the fence list.
Things like ring.ready are no longer behind a lock, this should be ok as ring.ready is initialized once and will only change when facing lockup. Worst case is that we return an -EBUSY just after a successfull GPU reset, or we go into wait state instead of returning -EBUSY (thus delaying reporting -EBUSY to fence wait caller). Signed-off-by: Jerome Glisse <jglisse at redhat.com> --- drivers/gpu/drm/radeon/radeon.h | 6 +- drivers/gpu/drm/radeon/radeon_device.c | 1 - drivers/gpu/drm/radeon/radeon_fence.c | 278 ++++++++++++-------------------- 3 files changed, 102 insertions(+), 183 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 4c5a667..141aee2 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -286,15 +286,12 @@ struct radeon_fence_driver { uint64_t last_seq; unsigned long last_activity; wait_queue_head_t queue; - struct list_head emitted; - struct list_head signaled; bool initialized; }; struct radeon_fence { struct radeon_device *rdev; struct kref kref; - struct list_head list; /* protected by radeon_fence.lock */ uint64_t seq; bool emitted; @@ -316,7 +313,7 @@ int radeon_fence_wait_next(struct radeon_device *rdev, int ring); int radeon_fence_wait_empty(struct radeon_device *rdev, int ring); struct radeon_fence *radeon_fence_ref(struct radeon_fence *fence); void radeon_fence_unref(struct radeon_fence **fence); -int radeon_fence_count_emitted(struct radeon_device *rdev, int ring); +unsigned radeon_fence_count_emitted(struct radeon_device *rdev, int ring); /* * Tiling registers @@ -1488,7 +1485,6 @@ struct radeon_device { struct radeon_mode_info mode_info; struct radeon_scratch scratch; struct radeon_mman mman; - rwlock_t fence_lock; struct radeon_fence_driver fence_drv[RADEON_NUM_RINGS]; struct radeon_ring ring[RADEON_NUM_RINGS]; bool ib_pool_ready; diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index 3880aad..290b8d0 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -746,7 +746,6 @@ int radeon_device_init(struct radeon_device *rdev, mutex_init(&rdev->gem.mutex); mutex_init(&rdev->pm.mutex); mutex_init(&rdev->vram_mutex); - rwlock_init(&rdev->fence_lock); INIT_LIST_HEAD(&rdev->gem.objects); init_waitqueue_head(&rdev->irq.vblank_queue); init_waitqueue_head(&rdev->irq.idle_queue); diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c index 6da1535..3f34f7b 100644 --- a/drivers/gpu/drm/radeon/radeon_fence.c +++ b/drivers/gpu/drm/radeon/radeon_fence.c @@ -65,32 +65,22 @@ static u64 radeon_fence_read(struct radeon_device *rdev, int ring) int radeon_fence_emit(struct radeon_device *rdev, struct radeon_fence *fence) { - unsigned long irq_flags; - - write_lock_irqsave(&rdev->fence_lock, irq_flags); if (fence->emitted) { - write_unlock_irqrestore(&rdev->fence_lock, irq_flags); return 0; } fence->seq = atomic64_inc_return(&rdev->fence_drv[fence->ring].seq); + /* there is small chance that we overwritte a bigger last_emited + * value, but in normal usage this + */ radeon_fence_ring_emit(rdev, fence->ring, fence); trace_radeon_fence_emit(rdev->ddev, fence->seq); fence->emitted = true; - /* are we the first fence on a previusly idle ring? */ - if (list_empty(&rdev->fence_drv[fence->ring].emitted)) { - rdev->fence_drv[fence->ring].last_activity = jiffies; - } - list_move_tail(&fence->list, &rdev->fence_drv[fence->ring].emitted); - write_unlock_irqrestore(&rdev->fence_lock, irq_flags); return 0; } -static bool radeon_fence_poll_locked(struct radeon_device *rdev, int ring) +static bool radeon_fence_poll(struct radeon_device *rdev, int ring) { - struct radeon_fence *fence; - struct list_head *i, *n; uint64_t seq; - bool wake = false; seq = radeon_fence_read(rdev, ring); if (seq == rdev->fence_drv[ring].last_seq) @@ -98,40 +88,15 @@ static bool radeon_fence_poll_locked(struct radeon_device *rdev, int ring) rdev->fence_drv[ring].last_seq = seq; rdev->fence_drv[ring].last_activity = jiffies; - - n = NULL; - list_for_each(i, &rdev->fence_drv[ring].emitted) { - fence = list_entry(i, struct radeon_fence, list); - if (fence->seq == seq) { - n = i; - break; - } - } - /* all fence previous to this one are considered as signaled */ - if (n) { - i = n; - do { - n = i->prev; - list_move_tail(i, &rdev->fence_drv[ring].signaled); - fence = list_entry(i, struct radeon_fence, list); - fence->signaled = true; - i = n; - } while (i != &rdev->fence_drv[ring].emitted); - wake = true; - } - return wake; + return true; } static void radeon_fence_destroy(struct kref *kref) { - unsigned long irq_flags; - struct radeon_fence *fence; + struct radeon_fence *fence; fence = container_of(kref, struct radeon_fence, kref); - write_lock_irqsave(&fence->rdev->fence_lock, irq_flags); - list_del(&fence->list); fence->emitted = false; - write_unlock_irqrestore(&fence->rdev->fence_lock, irq_flags); if (fence->semaphore.sa_bo) { radeon_semaphore_free(fence->rdev, fence); } @@ -153,78 +118,95 @@ int radeon_fence_create(struct radeon_device *rdev, (*fence)->seq = 0; (*fence)->ring = ring; (*fence)->semaphore.sa_bo = NULL; - INIT_LIST_HEAD(&(*fence)->list); return 0; } bool radeon_fence_signaled(struct radeon_fence *fence) { - unsigned long irq_flags; - bool signaled = false; + struct radeon_device *rdev; - if (!fence) + if (!fence) { return true; + } - write_lock_irqsave(&fence->rdev->fence_lock, irq_flags); - signaled = fence->signaled; + rdev = fence->rdev; + if (!fence->emitted) { + dev_warn(rdev->dev, "Querying an unemitted fence : %p !\n", fence); + fence->signaled = true; + return true; + } + if (fence->signaled) { + return true; + } + if (rdev->fence_drv[fence->ring].last_seq >= fence->seq) { + fence->signaled = true; + return true; + } /* if we are shuting down report all fence as signaled */ - if (fence->rdev->shutdown) { - signaled = true; + if (rdev->shutdown) { + fence->signaled = true; + return true; } - if (!fence->emitted) { - WARN(1, "Querying an unemitted fence : %p !\n", fence); - signaled = true; + if (!fence->signaled) { + radeon_fence_poll(fence->rdev, fence->ring); + if (rdev->fence_drv[fence->ring].last_seq >= fence->seq) { + fence->signaled = true; + return true; + } } - if (!signaled) { - radeon_fence_poll_locked(fence->rdev, fence->ring); - signaled = fence->signaled; + return fence->signaled; +} + +bool radeon_seq_signaled(struct radeon_device *rdev, u64 seq, unsigned ring) +{ + if (rdev->fence_drv[ring].last_seq >= seq) { + return true; + } + radeon_fence_poll(rdev, ring); + if (rdev->fence_drv[ring].last_seq >= seq) { + return true; } - write_unlock_irqrestore(&fence->rdev->fence_lock, irq_flags); - return signaled; + return false; } -int radeon_fence_wait(struct radeon_fence *fence, bool intr) +static int radeon_wait_seq(struct radeon_device *rdev, u64 target_seq, + unsigned ring, bool intr) { - struct radeon_device *rdev; - unsigned long irq_flags, timeout; - u64 seq; - int i, r; + unsigned long timeout; + uint64_t seq; bool signaled; + int r; - if (fence == NULL) { - WARN(1, "Querying an invalid fence : %p !\n", fence); - return -EINVAL; - } + while (target_seq > rdev->fence_drv[ring].last_seq) { + if (!rdev->ring[ring].ready) { + return -EBUSY; + } - rdev = fence->rdev; - signaled = radeon_fence_signaled(fence); - while (!signaled) { - read_lock_irqsave(&rdev->fence_lock, irq_flags); timeout = jiffies - RADEON_FENCE_JIFFIES_TIMEOUT; - if (time_after(rdev->fence_drv[fence->ring].last_activity, timeout)) { + if (time_after(rdev->fence_drv[ring].last_activity, timeout)) { /* the normal case, timeout is somewhere before last_activity */ - timeout = rdev->fence_drv[fence->ring].last_activity - timeout; + timeout = rdev->fence_drv[ring].last_activity - timeout; } else { /* either jiffies wrapped around, or no fence was signaled in the last 500ms - * anyway we will just wait for the minimum amount and then check for a lockup */ + * anyway we will just wait for the minimum amount and then check for a lockup + */ timeout = 1; } /* save current sequence value used to check for GPU lockups */ - seq = rdev->fence_drv[fence->ring].last_seq; - read_unlock_irqrestore(&rdev->fence_lock, irq_flags); + seq = rdev->fence_drv[ring].last_seq; trace_radeon_fence_wait_begin(rdev->ddev, seq); - radeon_irq_kms_sw_irq_get(rdev, fence->ring); + radeon_irq_kms_sw_irq_get(rdev, ring); if (intr) { - r = wait_event_interruptible_timeout( - rdev->fence_drv[fence->ring].queue, - (signaled = radeon_fence_signaled(fence)), timeout); + r = wait_event_interruptible_timeout(rdev->fence_drv[ring].queue, + (signaled = radeon_seq_signaled(rdev, target_seq, ring)), + timeout); } else { - r = wait_event_timeout( - rdev->fence_drv[fence->ring].queue, - (signaled = radeon_fence_signaled(fence)), timeout); + r = wait_event_timeout(rdev->fence_drv[ring].queue, + (signaled = radeon_seq_signaled(rdev, target_seq, ring)), + timeout); } - radeon_irq_kms_sw_irq_put(rdev, fence->ring); + radeon_irq_kms_sw_irq_put(rdev, ring); if (unlikely(r < 0)) { return r; } @@ -237,28 +219,18 @@ int radeon_fence_wait(struct radeon_fence *fence, bool intr) continue; } - write_lock_irqsave(&rdev->fence_lock, irq_flags); /* check if sequence value has changed since last_activity */ - if (seq != rdev->fence_drv[fence->ring].last_seq) { - write_unlock_irqrestore(&rdev->fence_lock, irq_flags); + if (seq != rdev->fence_drv[ring].last_seq) { continue; } - /* change sequence value on all rings, so nobody else things there is a lockup */ - for (i = 0; i < RADEON_NUM_RINGS; ++i) - rdev->fence_drv[i].last_seq -= 0x10000; - - rdev->fence_drv[fence->ring].last_activity = jiffies; - write_unlock_irqrestore(&rdev->fence_lock, irq_flags); - - if (radeon_ring_is_lockup(rdev, fence->ring, &rdev->ring[fence->ring])) { - + if (radeon_ring_is_lockup(rdev, ring, &rdev->ring[ring])) { /* good news we believe it's a lockup */ dev_warn(rdev->dev, "GPU lockup (waiting for 0x%016llx last fence id 0x%016llx)\n", - fence->seq, seq); + target_seq, seq); /* mark the ring as not ready any more */ - rdev->ring[fence->ring].ready = false; + rdev->ring[ring].ready = false; return -EDEADLK; } } @@ -266,52 +238,31 @@ int radeon_fence_wait(struct radeon_fence *fence, bool intr) return 0; } -int radeon_fence_wait_next(struct radeon_device *rdev, int ring) +int radeon_fence_wait(struct radeon_fence *fence, bool intr) { - unsigned long irq_flags; - struct radeon_fence *fence; int r; - write_lock_irqsave(&rdev->fence_lock, irq_flags); - if (!rdev->ring[ring].ready) { - write_unlock_irqrestore(&rdev->fence_lock, irq_flags); - return -EBUSY; + if (fence == NULL) { + WARN(1, "Querying an invalid fence : %p !\n", fence); + return -EINVAL; } - if (list_empty(&rdev->fence_drv[ring].emitted)) { - write_unlock_irqrestore(&rdev->fence_lock, irq_flags); - return -ENOENT; + + r = radeon_wait_seq(fence->rdev, fence->seq, fence->ring, intr); + if (r) { + return r; } - fence = list_entry(rdev->fence_drv[ring].emitted.next, - struct radeon_fence, list); - radeon_fence_ref(fence); - write_unlock_irqrestore(&rdev->fence_lock, irq_flags); - r = radeon_fence_wait(fence, false); - radeon_fence_unref(&fence); - return r; + fence->signaled = true; + return 0; } -int radeon_fence_wait_empty(struct radeon_device *rdev, int ring) +int radeon_fence_wait_next(struct radeon_device *rdev, int ring) { - unsigned long irq_flags; - struct radeon_fence *fence; - int r; + return radeon_wait_seq(rdev, rdev->fence_drv[ring].last_seq + 1ULL, ring, false); +} - write_lock_irqsave(&rdev->fence_lock, irq_flags); - if (!rdev->ring[ring].ready) { - write_unlock_irqrestore(&rdev->fence_lock, irq_flags); - return -EBUSY; - } - if (list_empty(&rdev->fence_drv[ring].emitted)) { - write_unlock_irqrestore(&rdev->fence_lock, irq_flags); - return 0; - } - fence = list_entry(rdev->fence_drv[ring].emitted.prev, - struct radeon_fence, list); - radeon_fence_ref(fence); - write_unlock_irqrestore(&rdev->fence_lock, irq_flags); - r = radeon_fence_wait(fence, false); - radeon_fence_unref(&fence); - return r; +int radeon_fence_wait_empty(struct radeon_device *rdev, int ring) +{ + return radeon_wait_seq(rdev, atomic64_read(&rdev->fence_drv[ring].seq), ring, false); } struct radeon_fence *radeon_fence_ref(struct radeon_fence *fence) @@ -332,47 +283,32 @@ void radeon_fence_unref(struct radeon_fence **fence) void radeon_fence_process(struct radeon_device *rdev, int ring) { - unsigned long irq_flags; bool wake; - write_lock_irqsave(&rdev->fence_lock, irq_flags); - wake = radeon_fence_poll_locked(rdev, ring); - write_unlock_irqrestore(&rdev->fence_lock, irq_flags); + wake = radeon_fence_poll(rdev, ring); if (wake) { wake_up_all(&rdev->fence_drv[ring].queue); } } -int radeon_fence_count_emitted(struct radeon_device *rdev, int ring) +unsigned radeon_fence_count_emitted(struct radeon_device *rdev, int ring) { - unsigned long irq_flags; - int not_processed = 0; - - read_lock_irqsave(&rdev->fence_lock, irq_flags); - if (!rdev->fence_drv[ring].initialized) { - read_unlock_irqrestore(&rdev->fence_lock, irq_flags); - return 0; - } + uint64_t emitted; - if (!list_empty(&rdev->fence_drv[ring].emitted)) { - struct list_head *ptr; - list_for_each(ptr, &rdev->fence_drv[ring].emitted) { - /* count up to 3, that's enought info */ - if (++not_processed >= 3) - break; - } + radeon_fence_poll(rdev, ring); + emitted = atomic64_read(&rdev->fence_drv[ring].seq) - rdev->fence_drv[ring].last_seq; + /* to avoid 32bits warp around */ + if (emitted > 0x10000000) { + emitted = 0x10000000; } - read_unlock_irqrestore(&rdev->fence_lock, irq_flags); - return not_processed; + return (unsigned)emitted; } int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring) { - unsigned long irq_flags; uint64_t index; int r; - write_lock_irqsave(&rdev->fence_lock, irq_flags); radeon_scratch_free(rdev, rdev->fence_drv[ring].scratch_reg, 2); if (rdev->wb.use_event) { rdev->fence_drv[ring].scratch_reg = 0; @@ -381,7 +317,6 @@ int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring) r = radeon_scratch_get(rdev, &rdev->fence_drv[ring].scratch_reg, 2); if (r) { dev_err(rdev->dev, "fence failed to get scratch register\n"); - write_unlock_irqrestore(&rdev->fence_lock, irq_flags); return r; } index = RADEON_WB_SCRATCH_OFFSET + @@ -392,9 +327,8 @@ int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring) rdev->fence_drv[ring].gpu_addr = rdev->wb.gpu_addr + index; radeon_fence_write(rdev, atomic64_read(&rdev->fence_drv[ring].seq), ring); rdev->fence_drv[ring].initialized = true; - DRM_INFO("fence driver on ring %d use gpu addr 0x%08Lx and cpu addr 0x%p\n", + dev_info(rdev->dev, "fence driver on ring %d use gpu addr 0x%08Lx and cpu addr 0x%p\n", ring, rdev->fence_drv[ring].gpu_addr, rdev->fence_drv[ring].cpu_addr); - write_unlock_irqrestore(&rdev->fence_lock, irq_flags); return 0; } @@ -403,23 +337,20 @@ static void radeon_fence_driver_init_ring(struct radeon_device *rdev, int ring) rdev->fence_drv[ring].scratch_reg = -1; rdev->fence_drv[ring].cpu_addr = NULL; rdev->fence_drv[ring].gpu_addr = 0; + rdev->fence_drv[ring].last_activity = jiffies; atomic64_set(&rdev->fence_drv[ring].seq, 0); - INIT_LIST_HEAD(&rdev->fence_drv[ring].emitted); - INIT_LIST_HEAD(&rdev->fence_drv[ring].signaled); + rdev->fence_drv[ring].last_seq = 0; init_waitqueue_head(&rdev->fence_drv[ring].queue); rdev->fence_drv[ring].initialized = false; } int radeon_fence_driver_init(struct radeon_device *rdev) { - unsigned long irq_flags; int ring; - write_lock_irqsave(&rdev->fence_lock, irq_flags); for (ring = 0; ring < RADEON_NUM_RINGS; ring++) { radeon_fence_driver_init_ring(rdev, ring); } - write_unlock_irqrestore(&rdev->fence_lock, irq_flags); if (radeon_debugfs_fence_init(rdev)) { dev_err(rdev->dev, "fence debugfs file creation failed\n"); } @@ -428,7 +359,6 @@ int radeon_fence_driver_init(struct radeon_device *rdev) void radeon_fence_driver_fini(struct radeon_device *rdev) { - unsigned long irq_flags; int ring; for (ring = 0; ring < RADEON_NUM_RINGS; ring++) { @@ -436,9 +366,7 @@ void radeon_fence_driver_fini(struct radeon_device *rdev) continue; radeon_fence_wait_empty(rdev, ring); wake_up_all(&rdev->fence_drv[ring].queue); - write_lock_irqsave(&rdev->fence_lock, irq_flags); radeon_scratch_free(rdev, rdev->fence_drv[ring].scratch_reg, 2); - write_unlock_irqrestore(&rdev->fence_lock, irq_flags); rdev->fence_drv[ring].initialized = false; } } @@ -453,7 +381,6 @@ static int radeon_debugfs_fence_info(struct seq_file *m, void *data) struct drm_info_node *node = (struct drm_info_node *)m->private; struct drm_device *dev = node->minor->dev; struct radeon_device *rdev = dev->dev_private; - struct radeon_fence *fence; int i; for (i = 0; i < RADEON_NUM_RINGS; ++i) { @@ -461,13 +388,10 @@ static int radeon_debugfs_fence_info(struct seq_file *m, void *data) continue; seq_printf(m, "--- ring %d ---\n", i); - seq_printf(m, "Last signaled fence 0x%016llx\n", radeon_fence_read(rdev, i)); - if (!list_empty(&rdev->fence_drv[i].emitted)) { - fence = list_entry(rdev->fence_drv[i].emitted.prev, - struct radeon_fence, list); - seq_printf(m, "Last emitted fence %p with 0x%016llx\n", - fence, fence->seq); - } + seq_printf(m, "Last signaled 0x%016llx\n", + radeon_fence_read(rdev, i)); + seq_printf(m, "Last emitted 0x%016lx\n", + atomic64_read(&rdev->fence_drv[i].seq)); } return 0; } -- 1.7.7.6