On 11/28/2012 12:25 PM, Maarten Lankhorst wrote: > By removing the unlocking of lru and retaking it immediately, a race is > removed where the bo is taken off the swap list or the lru list between > the unlock and relock. As such the cleanup_refs code can be simplified, > it will attempt to call ttm_bo_wait non-blockingly, and if it fails > it will drop the locks and perform a blocking wait, or return an error > if no_wait_gpu was set. > > The need for looping is also eliminated, since swapout and evict_mem_first > will always follow the destruction path, so no new fence is allowed > to be attached. As far as I can see this may already have been the case, > but the unlocking / relocking required a complicated loop to deal with > re-reservation. > > The downside is that ttm_bo_cleanup_memtype_use is no longer called with > reservation held, so drivers must be aware that move_notify with a null > parameter doesn't require a reservation.
Why can't we unreserve *after* ttm_bo_cleanup_memtype_use? That's not immediately clear from this patch. > > Signed-off-by: Maarten Lankhorst <maarten.lankhorst at canonical.com> > --- > drivers/gpu/drm/ttm/ttm_bo.c | 112 > +++++++++++++++++++++++-------------------- > 1 file changed, 60 insertions(+), 52 deletions(-) > > diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c > index 202fc20..02b275b 100644 > --- a/drivers/gpu/drm/ttm/ttm_bo.c > +++ b/drivers/gpu/drm/ttm/ttm_bo.c > @@ -486,14 +486,6 @@ static void ttm_bo_cleanup_memtype_use(struct > ttm_buffer_object *bo) > bo->ttm = NULL; > } > ttm_bo_mem_put(bo, &bo->mem); > - > - atomic_set(&bo->reserved, 0); > - > - /* > - * Make processes trying to reserve really pick it up. > - */ > - smp_mb__after_atomic_dec(); > - wake_up_all(&bo->event_queue); > } > > static void ttm_bo_cleanup_refs_or_queue(struct ttm_buffer_object *bo) > @@ -515,6 +507,9 @@ static void ttm_bo_cleanup_refs_or_queue(struct > ttm_buffer_object *bo) > put_count = ttm_bo_del_from_lru(bo); > > spin_unlock(&glob->lru_lock); > + atomic_set(&bo->reserved, 0); > + wake_up_all(&bo->event_queue); > + I think (although I'm not 100% sure) that if we use atomic_set() to unreserve, and it's not followed by a spin_unlock(), we need to insert a memory barrier, like is done above in the removed code, otherwise memory operations protected by reserve may be reordered until after reservation. > ttm_bo_cleanup_memtype_use(bo); > > ttm_bo_list_ref_sub(bo, put_count, true); > @@ -543,68 +538,72 @@ static void ttm_bo_cleanup_refs_or_queue(struct > ttm_buffer_object *bo) > } > > /** > - * function ttm_bo_cleanup_refs > + * function ttm_bo_cleanup_refs_and_unlock > * If bo idle, remove from delayed- and lru lists, and unref. > * If not idle, do nothing. > * > + * Must be called with lru_lock and reservation held, this function > + * will drop both before returning. > + * > * @interruptible Any sleeps should occur interruptibly. > - * @no_wait_reserve Never wait for reserve. Return -EBUSY instead. > * @no_wait_gpu Never wait for gpu. Return -EBUSY instead. > */ > > -static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo, > - bool interruptible, > - bool no_wait_reserve, > - bool no_wait_gpu) > +static int ttm_bo_cleanup_refs_and_unlock(struct ttm_buffer_object *bo, > + bool interruptible, > + bool no_wait_gpu) > { > struct ttm_bo_device *bdev = bo->bdev; > + struct ttm_bo_driver *driver = bdev->driver; > struct ttm_bo_global *glob = bo->glob; > int put_count; > int ret = 0; > > -retry: > spin_lock(&bdev->fence_lock); > - ret = ttm_bo_wait(bo, false, interruptible, no_wait_gpu); > - spin_unlock(&bdev->fence_lock); > + ret = ttm_bo_wait(bo, false, false, true); > > - if (unlikely(ret != 0)) > + if (ret && no_wait_gpu) { > + spin_unlock(&bdev->fence_lock); > + atomic_set(&bo->reserved, 0); > + wake_up_all(&bo->event_queue); > + spin_unlock(&glob->lru_lock); > return ret; > + } else if (ret) { > + void *sync_obj; > > -retry_reserve: > - spin_lock(&glob->lru_lock); > - > - if (unlikely(list_empty(&bo->ddestroy))) { > + /** > + * Take a reference to the fence and unreserve, > + * at this point the buffer should be dead, so > + * no new sync objects can be attached. > + */ > + sync_obj = driver->sync_obj_ref(&bo->sync_obj); > + spin_unlock(&bdev->fence_lock); > + atomic_set(&bo->reserved, 0); > + wake_up_all(&bo->event_queue); > spin_unlock(&glob->lru_lock); > - return 0; > - } > - > - ret = ttm_bo_reserve_locked(bo, false, true, false, 0); > > - if (unlikely(ret == -EBUSY)) { > - spin_unlock(&glob->lru_lock); > - if (likely(!no_wait_reserve)) > - ret = ttm_bo_wait_unreserved(bo, interruptible); > - if (unlikely(ret != 0)) > + ret = driver->sync_obj_wait(sync_obj, false, interruptible); > + driver->sync_obj_unref(&sync_obj); > + if (ret) > return ret; > > - goto retry_reserve; > - } > - > - BUG_ON(ret != 0); > + /* remove sync_obj with ttm_bo_wait */ > + spin_lock(&bdev->fence_lock); > + ret = ttm_bo_wait(bo, false, false, true); > + spin_unlock(&bdev->fence_lock); > > - /** > - * We can re-check for sync object without taking > - * the bo::lock since setting the sync object requires > - * also bo::reserved. A busy object at this point may > - * be caused by another thread recently starting an accelerated > - * eviction. > - */ > + WARN_ON(ret); > > - if (unlikely(bo->sync_obj)) { > + spin_lock(&glob->lru_lock); > + } else { > + spin_unlock(&bdev->fence_lock); > atomic_set(&bo->reserved, 0); > wake_up_all(&bo->event_queue); > + } > + > + if (unlikely(list_empty(&bo->ddestroy))) { > spin_unlock(&glob->lru_lock); > - goto retry; > + return 0; > } > > put_count = ttm_bo_del_from_lru(bo); > @@ -647,9 +646,13 @@ static int ttm_bo_delayed_delete(struct ttm_bo_device > *bdev, bool remove_all) > kref_get(&nentry->list_kref); > } > > - spin_unlock(&glob->lru_lock); > - ret = ttm_bo_cleanup_refs(entry, false, !remove_all, > - !remove_all); > + ret = ttm_bo_reserve_locked(entry, false, !remove_all, false, > 0); > + if (!ret) > + ret = ttm_bo_cleanup_refs_and_unlock(entry, false, > + !remove_all); > + else > + spin_unlock(&glob->lru_lock); > + > kref_put(&entry->list_kref, ttm_bo_release_list); > entry = nentry; > > @@ -803,9 +806,13 @@ retry: > kref_get(&bo->list_kref); > > if (!list_empty(&bo->ddestroy)) { > - spin_unlock(&glob->lru_lock); > - ret = ttm_bo_cleanup_refs(bo, interruptible, > - no_wait_reserve, no_wait_gpu); > + ret = ttm_bo_reserve_locked(bo, interruptible, no_wait_reserve, > false, 0); > + if (!ret) > + ret = ttm_bo_cleanup_refs_and_unlock(bo, interruptible, > + no_wait_gpu); > + else > + spin_unlock(&glob->lru_lock); > + > kref_put(&bo->list_kref, ttm_bo_release_list); > > return ret; > @@ -1799,8 +1806,9 @@ static int ttm_bo_swapout(struct ttm_mem_shrink *shrink) > kref_get(&bo->list_kref); > > if (!list_empty(&bo->ddestroy)) { > - spin_unlock(&glob->lru_lock); > - (void) ttm_bo_cleanup_refs(bo, false, false, false); > + ttm_bo_reserve_locked(bo, false, false, false, 0); > + ttm_bo_cleanup_refs_and_unlock(bo, false, false); > + > kref_put(&bo->list_kref, ttm_bo_release_list); > spin_lock(&glob->lru_lock); > continue;