I changed the hierarchy to make fence_lock the most inner lock,
instead of outer lock. This will simplify things slightly, and
hopefully makes it easier to make fence_lock global at one point
should it be needed.

To make things clearer, I change the order around in ttm_bo_cleanup_refs
and ttm_bo_cleanup_refs_or_queue.

A reservation is taken first, then fence lock is taken and a wait is attempted.

Signed-off-by: Maarten Lankhorst <maarten.lankhorst at canonical.com>

v2:
 - fix conflict with upstream race fix, simplifies ttm_bo_cleanup_refs
v3:
 - change removal of fence_lock to making it a inner lock instead
---
 drivers/gpu/drm/ttm/ttm_bo.c           | 95 ++++++++++++++++------------------
 drivers/gpu/drm/ttm/ttm_execbuf_util.c |  4 +-
 2 files changed, 48 insertions(+), 51 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index a3383a7..70285ff 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -478,28 +478,26 @@ static void ttm_bo_cleanup_refs_or_queue(struct 
ttm_buffer_object *bo)
 {
        struct ttm_bo_device *bdev = bo->bdev;
        struct ttm_bo_global *glob = bo->glob;
-       struct ttm_bo_driver *driver;
+       struct ttm_bo_driver *driver = bdev->driver;
        void *sync_obj = NULL;
        int put_count;
        int ret;

-       spin_lock(&bdev->fence_lock);
-       (void) ttm_bo_wait(bo, false, false, true);
-       if (!bo->sync_obj) {
-
-               spin_lock(&glob->lru_lock);
-
-               /**
-                * Lock inversion between bo:reserve and bdev::fence_lock here,
-                * but that's OK, since we're only trylocking.
-                */
-
-               ret = ttm_bo_reserve_locked(bo, false, true, false, 0);
+       spin_lock(&glob->lru_lock);
+       ret = ttm_bo_reserve_locked(bo, false, true, false, 0);
+       if (!ret) {
+               spin_lock(&bdev->fence_lock);
+               ret = ttm_bo_wait(bo, false, false, true);

-               if (unlikely(ret == -EBUSY))
+               if (unlikely(ret == -EBUSY)) {
+                       sync_obj = driver->sync_obj_ref(bo->sync_obj);
+                       spin_unlock(&bdev->fence_lock);
+                       atomic_set(&bo->reserved, 0);
+                       wake_up_all(&bo->event_queue);
                        goto queue;
-
+               }
                spin_unlock(&bdev->fence_lock);
+
                put_count = ttm_bo_del_from_lru(bo);

                atomic_set(&bo->reserved, 0);
@@ -509,18 +507,11 @@ static void ttm_bo_cleanup_refs_or_queue(struct 
ttm_buffer_object *bo)
                ttm_bo_list_ref_sub(bo, put_count, true);

                return;
-       } else {
-               spin_lock(&glob->lru_lock);
        }
 queue:
-       driver = bdev->driver;
-       if (bo->sync_obj)
-               sync_obj = driver->sync_obj_ref(bo->sync_obj);
-
        kref_get(&bo->list_kref);
        list_add_tail(&bo->ddestroy, &bdev->ddestroy);
        spin_unlock(&glob->lru_lock);
-       spin_unlock(&bdev->fence_lock);

        if (sync_obj) {
                driver->sync_obj_flush(sync_obj);
@@ -546,54 +537,60 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object 
*bo,
                               bool no_wait_gpu)
 {
        struct ttm_bo_device *bdev = bo->bdev;
+       struct ttm_bo_driver *driver = bdev->driver;
        struct ttm_bo_global *glob = bo->glob;
        int put_count;
        int ret = 0;
+       void *sync_obj;

 retry:
-       spin_lock(&bdev->fence_lock);
-       ret = ttm_bo_wait(bo, false, interruptible, no_wait_gpu);
-       spin_unlock(&bdev->fence_lock);
+       spin_lock(&glob->lru_lock);

-       if (unlikely(ret != 0))
-               return ret;
+       ret = ttm_bo_reserve_locked(bo, interruptible,
+                                   no_wait_reserve, false, 0);

-retry_reserve:
-       spin_lock(&glob->lru_lock);
+       if (unlikely(ret)) {
+               spin_unlock(&glob->lru_lock);
+               return ret;
+       }

        if (unlikely(list_empty(&bo->ddestroy))) {
+               atomic_set(&bo->reserved, 0);
+               wake_up_all(&bo->event_queue);
                spin_unlock(&glob->lru_lock);
                return 0;
        }

-       ret = ttm_bo_reserve_locked(bo, false, true, false, 0);
-
-       if (unlikely(ret == -EBUSY)) {
-               spin_unlock(&glob->lru_lock);
-               if (likely(!no_wait_reserve))
-                       ret = ttm_bo_wait_unreserved(bo, interruptible);
-               if (unlikely(ret != 0))
+       spin_lock(&bdev->fence_lock);
+       ret = ttm_bo_wait(bo, false, false, true);
+       if (ret) {
+               if (no_wait_gpu) {
+                       spin_unlock(&bdev->fence_lock);
+                       atomic_set(&bo->reserved, 0);
+                       wake_up_all(&bo->event_queue);
+                       spin_unlock(&glob->lru_lock);
                        return ret;
+               }

-               goto retry_reserve;
-       }
-
-       BUG_ON(ret != 0);
-
-       /**
-        * We can re-check for sync object without taking
-        * the bo::lock since setting the sync object requires
-        * also bo::reserved. A busy object at this point may
-        * be caused by another thread recently starting an accelerated
-        * eviction.
-        */
+               /**
+                * Take a reference to the fence and unreserve, if the wait
+                * was succesful and no new sync_obj was attached,
+                * ttm_bo_wait in retry will return ret = 0, and end the loop.
+                */

-       if (unlikely(bo->sync_obj)) {
+               sync_obj = driver->sync_obj_ref(&bo->sync_obj);
+               spin_unlock(&bdev->fence_lock);
                atomic_set(&bo->reserved, 0);
                wake_up_all(&bo->event_queue);
                spin_unlock(&glob->lru_lock);
+
+               ret = driver->sync_obj_wait(bo->sync_obj, false, interruptible);
+               driver->sync_obj_unref(&sync_obj);
+               if (ret)
+                       return ret;
                goto retry;
        }
+       spin_unlock(&bdev->fence_lock);

        put_count = ttm_bo_del_from_lru(bo);
        list_del_init(&bo->ddestroy);
diff --git a/drivers/gpu/drm/ttm/ttm_execbuf_util.c 
b/drivers/gpu/drm/ttm/ttm_execbuf_util.c
index 1986d00..cd9e452 100644
--- a/drivers/gpu/drm/ttm/ttm_execbuf_util.c
+++ b/drivers/gpu/drm/ttm/ttm_execbuf_util.c
@@ -213,8 +213,8 @@ void ttm_eu_fence_buffer_objects(struct list_head *list, 
void *sync_obj)
        driver = bdev->driver;
        glob = bo->glob;

-       spin_lock(&bdev->fence_lock);
        spin_lock(&glob->lru_lock);
+       spin_lock(&bdev->fence_lock);

        list_for_each_entry(entry, list, head) {
                bo = entry->bo;
@@ -223,8 +223,8 @@ void ttm_eu_fence_buffer_objects(struct list_head *list, 
void *sync_obj)
                ttm_bo_unreserve_locked(bo);
                entry->reserved = false;
        }
-       spin_unlock(&glob->lru_lock);
        spin_unlock(&bdev->fence_lock);
+       spin_unlock(&glob->lru_lock);

        list_for_each_entry(entry, list, head) {
                if (entry->old_sync_obj)
-- 
1.8.0

Reply via email to