On 11/19/2012 03:17 PM, Thomas Hellstrom wrote: > Hi, > > This patch looks mostly good, although I think ttm_bo_cleanup_refs > becomes overly complicated: > Could this do, or am I missing something? >
Actually, my version is bad, because ttm_bo_wait() is called with the lru lock held. /Thomas > > static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo, > bool interruptible, > bool no_wait_reserve, > bool no_wait_gpu) > { > struct ttm_bo_device *bdev = bo->bdev; > struct ttm_bo_global *glob = bo->glob; > int put_count; > int ret = 0; > > /* > * First, reserve while making sure we're still on the > * ddestroy list. > */ > retry_reserve: > spin_lock(&glob->lru_lock); > > if (unlikely(list_empty(&bo->ddestroy))) { > spin_unlock(&glob->lru_lock); > return 0; > } > > ret = ttm_bo_reserve_locked(bo, false, true, false, 0); > > if (unlikely(ret == -EBUSY)) { > spin_unlock(&glob->lru_lock); > if (likely(!no_wait_reserve)) > ret = ttm_bo_wait_unreserved(bo, interruptible); > if (unlikely(ret != 0)) > return ret; > > goto retry_reserve; > } > > BUG_ON(ret != 0); > > spin_lock(&bdev->fence_lock); > ret = ttm_bo_wait(bo, false, interruptible, no_wait_gpu); > spin_unlock(&bdev->fence_lock); > > if (unlikely(ret != 0)) { > atomic_set(&bo->reserved, 0); > wake_up_all(&bo->event_queue); > spin_unlock(&glob->lru_lock); > return ret; > } > > put_count = ttm_bo_del_from_lru(bo); > list_del_init(&bo->ddestroy); > ++put_count; > > spin_unlock(&glob->lru_lock); > ttm_bo_cleanup_memtype_use(bo); > > atomic_set(&bo_reserved, 0); > wake_up_all(&bo->event_queue); > ttm_bo_list_ref_sub(bo, put_count, true); > > return 0; > } > > > On 11/12/2012 03:00 PM, Maarten Lankhorst wrote: >> I changed the hierarchy to make fence_lock the most inner lock, >> instead of outer lock. This will simplify things slightly, and >> hopefully makes it easier to make fence_lock global at one point >> should it be needed. >> >> To make things clearer, I change the order around in ttm_bo_cleanup_refs >> and ttm_bo_cleanup_refs_or_queue. >> >> A reservation is taken first, then fence lock is taken and a wait is >> attempted. >> >> Signed-off-by: Maarten Lankhorst <maarten.lankhorst at canonical.com> >> >> v2: >> - fix conflict with upstream race fix, simplifies ttm_bo_cleanup_refs >> v3: >> - change removal of fence_lock to making it a inner lock instead >> --- >> drivers/gpu/drm/ttm/ttm_bo.c | 95 >> ++++++++++++++++------------------ >> drivers/gpu/drm/ttm/ttm_execbuf_util.c | 4 +- >> 2 files changed, 48 insertions(+), 51 deletions(-) >> >> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c >> index a3383a7..70285ff 100644 >> --- a/drivers/gpu/drm/ttm/ttm_bo.c >> +++ b/drivers/gpu/drm/ttm/ttm_bo.c >> @@ -478,28 +478,26 @@ static void ttm_bo_cleanup_refs_or_queue(struct >> ttm_buffer_object *bo) >> { >> struct ttm_bo_device *bdev = bo->bdev; >> struct ttm_bo_global *glob = bo->glob; >> - struct ttm_bo_driver *driver; >> + struct ttm_bo_driver *driver = bdev->driver; >> void *sync_obj = NULL; >> int put_count; >> int ret; >> - spin_lock(&bdev->fence_lock); >> - (void) ttm_bo_wait(bo, false, false, true); >> - if (!bo->sync_obj) { >> - >> - spin_lock(&glob->lru_lock); >> - >> - /** >> - * Lock inversion between bo:reserve and bdev::fence_lock here, >> - * but that's OK, since we're only trylocking. >> - */ >> - >> - ret = ttm_bo_reserve_locked(bo, false, true, false, 0); >> + spin_lock(&glob->lru_lock); >> + ret = ttm_bo_reserve_locked(bo, false, true, false, 0); >> + if (!ret) { >> + spin_lock(&bdev->fence_lock); >> + ret = ttm_bo_wait(bo, false, false, true); >> - if (unlikely(ret == -EBUSY)) >> + if (unlikely(ret == -EBUSY)) { >> + sync_obj = driver->sync_obj_ref(bo->sync_obj); >> + spin_unlock(&bdev->fence_lock); >> + atomic_set(&bo->reserved, 0); >> + wake_up_all(&bo->event_queue); >> goto queue; >> - >> + } >> spin_unlock(&bdev->fence_lock); >> + >> put_count = ttm_bo_del_from_lru(bo); >> atomic_set(&bo->reserved, 0); >> @@ -509,18 +507,11 @@ static void ttm_bo_cleanup_refs_or_queue(struct >> ttm_buffer_object *bo) >> ttm_bo_list_ref_sub(bo, put_count, true); >> return; >> - } else { >> - spin_lock(&glob->lru_lock); >> } >> queue: >> - driver = bdev->driver; >> - if (bo->sync_obj) >> - sync_obj = driver->sync_obj_ref(bo->sync_obj); >> - >> kref_get(&bo->list_kref); >> list_add_tail(&bo->ddestroy, &bdev->ddestroy); >> spin_unlock(&glob->lru_lock); >> - spin_unlock(&bdev->fence_lock); >> if (sync_obj) { >> driver->sync_obj_flush(sync_obj); >> @@ -546,54 +537,60 @@ static int ttm_bo_cleanup_refs(struct >> ttm_buffer_object *bo, >> bool no_wait_gpu) >> { >> struct ttm_bo_device *bdev = bo->bdev; >> + struct ttm_bo_driver *driver = bdev->driver; >> struct ttm_bo_global *glob = bo->glob; >> int put_count; >> int ret = 0; >> + void *sync_obj; >> retry: >> - spin_lock(&bdev->fence_lock); >> - ret = ttm_bo_wait(bo, false, interruptible, no_wait_gpu); >> - spin_unlock(&bdev->fence_lock); >> + spin_lock(&glob->lru_lock); >> - if (unlikely(ret != 0)) >> - return ret; >> + ret = ttm_bo_reserve_locked(bo, interruptible, >> + no_wait_reserve, false, 0); >> -retry_reserve: >> - spin_lock(&glob->lru_lock); >> + if (unlikely(ret)) { >> + spin_unlock(&glob->lru_lock); >> + return ret; >> + } >> if (unlikely(list_empty(&bo->ddestroy))) { >> + atomic_set(&bo->reserved, 0); >> + wake_up_all(&bo->event_queue); >> spin_unlock(&glob->lru_lock); >> return 0; >> } >> - ret = ttm_bo_reserve_locked(bo, false, true, false, 0); >> - >> - if (unlikely(ret == -EBUSY)) { >> - spin_unlock(&glob->lru_lock); >> - if (likely(!no_wait_reserve)) >> - ret = ttm_bo_wait_unreserved(bo, interruptible); >> - if (unlikely(ret != 0)) >> + spin_lock(&bdev->fence_lock); >> + ret = ttm_bo_wait(bo, false, false, true); >> + if (ret) { >> + if (no_wait_gpu) { >> + spin_unlock(&bdev->fence_lock); >> + atomic_set(&bo->reserved, 0); >> + wake_up_all(&bo->event_queue); >> + spin_unlock(&glob->lru_lock); >> return ret; >> + } >> - goto retry_reserve; >> - } >> - >> - BUG_ON(ret != 0); >> - >> - /** >> - * We can re-check for sync object without taking >> - * the bo::lock since setting the sync object requires >> - * also bo::reserved. A busy object at this point may >> - * be caused by another thread recently starting an accelerated >> - * eviction. >> - */ >> + /** >> + * Take a reference to the fence and unreserve, if the wait >> + * was succesful and no new sync_obj was attached, >> + * ttm_bo_wait in retry will return ret = 0, and end the loop. >> + */ >> - if (unlikely(bo->sync_obj)) { >> + sync_obj = driver->sync_obj_ref(&bo->sync_obj); >> + spin_unlock(&bdev->fence_lock); >> atomic_set(&bo->reserved, 0); >> wake_up_all(&bo->event_queue); >> spin_unlock(&glob->lru_lock); >> + >> + ret = driver->sync_obj_wait(bo->sync_obj, false, >> interruptible); >> + driver->sync_obj_unref(&sync_obj); >> + if (ret) >> + return ret; >> goto retry; >> } >> + spin_unlock(&bdev->fence_lock); >> put_count = ttm_bo_del_from_lru(bo); >> list_del_init(&bo->ddestroy); >> diff --git a/drivers/gpu/drm/ttm/ttm_execbuf_util.c >> b/drivers/gpu/drm/ttm/ttm_execbuf_util.c >> index 1986d00..cd9e452 100644 >> --- a/drivers/gpu/drm/ttm/ttm_execbuf_util.c >> +++ b/drivers/gpu/drm/ttm/ttm_execbuf_util.c >> @@ -213,8 +213,8 @@ void ttm_eu_fence_buffer_objects(struct list_head >> *list, void *sync_obj) >> driver = bdev->driver; >> glob = bo->glob; >> - spin_lock(&bdev->fence_lock); >> spin_lock(&glob->lru_lock); >> + spin_lock(&bdev->fence_lock); >> list_for_each_entry(entry, list, head) { >> bo = entry->bo; >> @@ -223,8 +223,8 @@ void ttm_eu_fence_buffer_objects(struct list_head >> *list, void *sync_obj) >> ttm_bo_unreserve_locked(bo); >> entry->reserved = false; >> } >> - spin_unlock(&glob->lru_lock); >> spin_unlock(&bdev->fence_lock); >> + spin_unlock(&glob->lru_lock); >> list_for_each_entry(entry, list, head) { >> if (entry->old_sync_obj) >