Hi, This patch looks mostly good, although I think ttm_bo_cleanup_refs becomes overly complicated: Could this do, or am I missing something?
static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo, bool interruptible, bool no_wait_reserve, bool no_wait_gpu) { struct ttm_bo_device *bdev = bo->bdev; struct ttm_bo_global *glob = bo->glob; int put_count; int ret = 0; /* * First, reserve while making sure we're still on the * ddestroy list. */ retry_reserve: spin_lock(&glob->lru_lock); if (unlikely(list_empty(&bo->ddestroy))) { spin_unlock(&glob->lru_lock); return 0; } ret = ttm_bo_reserve_locked(bo, false, true, false, 0); if (unlikely(ret == -EBUSY)) { spin_unlock(&glob->lru_lock); if (likely(!no_wait_reserve)) ret = ttm_bo_wait_unreserved(bo, interruptible); if (unlikely(ret != 0)) return ret; goto retry_reserve; } BUG_ON(ret != 0); spin_lock(&bdev->fence_lock); ret = ttm_bo_wait(bo, false, interruptible, no_wait_gpu); spin_unlock(&bdev->fence_lock); if (unlikely(ret != 0)) { atomic_set(&bo->reserved, 0); wake_up_all(&bo->event_queue); spin_unlock(&glob->lru_lock); return ret; } put_count = ttm_bo_del_from_lru(bo); list_del_init(&bo->ddestroy); ++put_count; spin_unlock(&glob->lru_lock); ttm_bo_cleanup_memtype_use(bo); atomic_set(&bo_reserved, 0); wake_up_all(&bo->event_queue); ttm_bo_list_ref_sub(bo, put_count, true); return 0; } On 11/12/2012 03:00 PM, Maarten Lankhorst wrote: > I changed the hierarchy to make fence_lock the most inner lock, > instead of outer lock. This will simplify things slightly, and > hopefully makes it easier to make fence_lock global at one point > should it be needed. > > To make things clearer, I change the order around in ttm_bo_cleanup_refs > and ttm_bo_cleanup_refs_or_queue. > > A reservation is taken first, then fence lock is taken and a wait is > attempted. > > Signed-off-by: Maarten Lankhorst <maarten.lankhorst at canonical.com> > > v2: > - fix conflict with upstream race fix, simplifies ttm_bo_cleanup_refs > v3: > - change removal of fence_lock to making it a inner lock instead > --- > drivers/gpu/drm/ttm/ttm_bo.c | 95 > ++++++++++++++++------------------ > drivers/gpu/drm/ttm/ttm_execbuf_util.c | 4 +- > 2 files changed, 48 insertions(+), 51 deletions(-) > > diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c > index a3383a7..70285ff 100644 > --- a/drivers/gpu/drm/ttm/ttm_bo.c > +++ b/drivers/gpu/drm/ttm/ttm_bo.c > @@ -478,28 +478,26 @@ static void ttm_bo_cleanup_refs_or_queue(struct > ttm_buffer_object *bo) > { > struct ttm_bo_device *bdev = bo->bdev; > struct ttm_bo_global *glob = bo->glob; > - struct ttm_bo_driver *driver; > + struct ttm_bo_driver *driver = bdev->driver; > void *sync_obj = NULL; > int put_count; > int ret; > > - spin_lock(&bdev->fence_lock); > - (void) ttm_bo_wait(bo, false, false, true); > - if (!bo->sync_obj) { > - > - spin_lock(&glob->lru_lock); > - > - /** > - * Lock inversion between bo:reserve and bdev::fence_lock here, > - * but that's OK, since we're only trylocking. > - */ > - > - ret = ttm_bo_reserve_locked(bo, false, true, false, 0); > + spin_lock(&glob->lru_lock); > + ret = ttm_bo_reserve_locked(bo, false, true, false, 0); > + if (!ret) { > + spin_lock(&bdev->fence_lock); > + ret = ttm_bo_wait(bo, false, false, true); > > - if (unlikely(ret == -EBUSY)) > + if (unlikely(ret == -EBUSY)) { > + sync_obj = driver->sync_obj_ref(bo->sync_obj); > + spin_unlock(&bdev->fence_lock); > + atomic_set(&bo->reserved, 0); > + wake_up_all(&bo->event_queue); > goto queue; > - > + } > spin_unlock(&bdev->fence_lock); > + > put_count = ttm_bo_del_from_lru(bo); > > atomic_set(&bo->reserved, 0); > @@ -509,18 +507,11 @@ static void ttm_bo_cleanup_refs_or_queue(struct > ttm_buffer_object *bo) > ttm_bo_list_ref_sub(bo, put_count, true); > > return; > - } else { > - spin_lock(&glob->lru_lock); > } > queue: > - driver = bdev->driver; > - if (bo->sync_obj) > - sync_obj = driver->sync_obj_ref(bo->sync_obj); > - > kref_get(&bo->list_kref); > list_add_tail(&bo->ddestroy, &bdev->ddestroy); > spin_unlock(&glob->lru_lock); > - spin_unlock(&bdev->fence_lock); > > if (sync_obj) { > driver->sync_obj_flush(sync_obj); > @@ -546,54 +537,60 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object > *bo, > bool no_wait_gpu) > { > struct ttm_bo_device *bdev = bo->bdev; > + struct ttm_bo_driver *driver = bdev->driver; > struct ttm_bo_global *glob = bo->glob; > int put_count; > int ret = 0; > + void *sync_obj; > > retry: > - spin_lock(&bdev->fence_lock); > - ret = ttm_bo_wait(bo, false, interruptible, no_wait_gpu); > - spin_unlock(&bdev->fence_lock); > + spin_lock(&glob->lru_lock); > > - if (unlikely(ret != 0)) > - return ret; > + ret = ttm_bo_reserve_locked(bo, interruptible, > + no_wait_reserve, false, 0); > > -retry_reserve: > - spin_lock(&glob->lru_lock); > + if (unlikely(ret)) { > + spin_unlock(&glob->lru_lock); > + return ret; > + } > > if (unlikely(list_empty(&bo->ddestroy))) { > + atomic_set(&bo->reserved, 0); > + wake_up_all(&bo->event_queue); > spin_unlock(&glob->lru_lock); > return 0; > } > > - ret = ttm_bo_reserve_locked(bo, false, true, false, 0); > - > - if (unlikely(ret == -EBUSY)) { > - spin_unlock(&glob->lru_lock); > - if (likely(!no_wait_reserve)) > - ret = ttm_bo_wait_unreserved(bo, interruptible); > - if (unlikely(ret != 0)) > + spin_lock(&bdev->fence_lock); > + ret = ttm_bo_wait(bo, false, false, true); > + if (ret) { > + if (no_wait_gpu) { > + spin_unlock(&bdev->fence_lock); > + atomic_set(&bo->reserved, 0); > + wake_up_all(&bo->event_queue); > + spin_unlock(&glob->lru_lock); > return ret; > + } > > - goto retry_reserve; > - } > - > - BUG_ON(ret != 0); > - > - /** > - * We can re-check for sync object without taking > - * the bo::lock since setting the sync object requires > - * also bo::reserved. A busy object at this point may > - * be caused by another thread recently starting an accelerated > - * eviction. > - */ > + /** > + * Take a reference to the fence and unreserve, if the wait > + * was succesful and no new sync_obj was attached, > + * ttm_bo_wait in retry will return ret = 0, and end the loop. > + */ > > - if (unlikely(bo->sync_obj)) { > + sync_obj = driver->sync_obj_ref(&bo->sync_obj); > + spin_unlock(&bdev->fence_lock); > atomic_set(&bo->reserved, 0); > wake_up_all(&bo->event_queue); > spin_unlock(&glob->lru_lock); > + > + ret = driver->sync_obj_wait(bo->sync_obj, false, interruptible); > + driver->sync_obj_unref(&sync_obj); > + if (ret) > + return ret; > goto retry; > } > + spin_unlock(&bdev->fence_lock); > > put_count = ttm_bo_del_from_lru(bo); > list_del_init(&bo->ddestroy); > diff --git a/drivers/gpu/drm/ttm/ttm_execbuf_util.c > b/drivers/gpu/drm/ttm/ttm_execbuf_util.c > index 1986d00..cd9e452 100644 > --- a/drivers/gpu/drm/ttm/ttm_execbuf_util.c > +++ b/drivers/gpu/drm/ttm/ttm_execbuf_util.c > @@ -213,8 +213,8 @@ void ttm_eu_fence_buffer_objects(struct list_head *list, > void *sync_obj) > driver = bdev->driver; > glob = bo->glob; > > - spin_lock(&bdev->fence_lock); > spin_lock(&glob->lru_lock); > + spin_lock(&bdev->fence_lock); > > list_for_each_entry(entry, list, head) { > bo = entry->bo; > @@ -223,8 +223,8 @@ void ttm_eu_fence_buffer_objects(struct list_head *list, > void *sync_obj) > ttm_bo_unreserve_locked(bo); > entry->reserved = false; > } > - spin_unlock(&glob->lru_lock); > spin_unlock(&bdev->fence_lock); > + spin_unlock(&glob->lru_lock); > > list_for_each_entry(entry, list, head) { > if (entry->old_sync_obj)