ttm: change fence_lock to inner lock, v3

Thomas Hellstrom Mon, 19 Nov 2012 16:04:13 +0100

On 11/19/2012 03:17 PM, Thomas Hellstrom wrote:
> Hi,
>
> This patch looks mostly good, although I think ttm_bo_cleanup_refs 
> becomes overly complicated:
> Could this do, or am I missing something?
>


Actually, my version is bad, because ttm_bo_wait() is called with the 
lru lock held.

/Thomas


>
> static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
>                    bool interruptible,
>                    bool no_wait_reserve,
>                    bool no_wait_gpu)
> {
>     struct ttm_bo_device *bdev = bo->bdev;
>     struct ttm_bo_global *glob = bo->glob;
>     int put_count;
>     int ret = 0;
>
>     /*
>      * First, reserve while making sure we're still on the
>      * ddestroy list.
>      */
> retry_reserve:
>     spin_lock(&glob->lru_lock);
>
>     if (unlikely(list_empty(&bo->ddestroy))) {
>         spin_unlock(&glob->lru_lock);
>         return 0;
>     }
>
>     ret = ttm_bo_reserve_locked(bo, false, true, false, 0);
>
>     if (unlikely(ret == -EBUSY)) {
>         spin_unlock(&glob->lru_lock);
>         if (likely(!no_wait_reserve))
>             ret = ttm_bo_wait_unreserved(bo, interruptible);
>         if (unlikely(ret != 0))
>             return ret;
>
>         goto retry_reserve;
>     }
>
>     BUG_ON(ret != 0);
>
>     spin_lock(&bdev->fence_lock);
>     ret = ttm_bo_wait(bo, false, interruptible, no_wait_gpu);
>     spin_unlock(&bdev->fence_lock);
>
>     if (unlikely(ret != 0)) {
>         atomic_set(&bo->reserved, 0);
>         wake_up_all(&bo->event_queue);
>         spin_unlock(&glob->lru_lock);
>         return ret;
>     }
>
>     put_count = ttm_bo_del_from_lru(bo);
>     list_del_init(&bo->ddestroy);
>     ++put_count;
>
>     spin_unlock(&glob->lru_lock);
>     ttm_bo_cleanup_memtype_use(bo);
>
>     atomic_set(&bo_reserved, 0);
>     wake_up_all(&bo->event_queue);
>     ttm_bo_list_ref_sub(bo, put_count, true);
>
>     return 0;
> }
>
>
> On 11/12/2012 03:00 PM, Maarten Lankhorst wrote:
>> I changed the hierarchy to make fence_lock the most inner lock,
>> instead of outer lock. This will simplify things slightly, and
>> hopefully makes it easier to make fence_lock global at one point
>> should it be needed.
>>
>> To make things clearer, I change the order around in ttm_bo_cleanup_refs
>> and ttm_bo_cleanup_refs_or_queue.
>>
>> A reservation is taken first, then fence lock is taken and a wait is 
>> attempted.
>>
>> Signed-off-by: Maarten Lankhorst <maarten.lankhorst at canonical.com>
>>
>> v2:
>>   - fix conflict with upstream race fix, simplifies ttm_bo_cleanup_refs
>> v3:
>>   - change removal of fence_lock to making it a inner lock instead
>> ---
>>   drivers/gpu/drm/ttm/ttm_bo.c           | 95 
>> ++++++++++++++++------------------
>>   drivers/gpu/drm/ttm/ttm_execbuf_util.c |  4 +-
>>   2 files changed, 48 insertions(+), 51 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
>> index a3383a7..70285ff 100644
>> --- a/drivers/gpu/drm/ttm/ttm_bo.c
>> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
>> @@ -478,28 +478,26 @@ static void ttm_bo_cleanup_refs_or_queue(struct 
>> ttm_buffer_object *bo)
>>   {
>>       struct ttm_bo_device *bdev = bo->bdev;
>>       struct ttm_bo_global *glob = bo->glob;
>> -    struct ttm_bo_driver *driver;
>> +    struct ttm_bo_driver *driver = bdev->driver;
>>       void *sync_obj = NULL;
>>       int put_count;
>>       int ret;
>>   -    spin_lock(&bdev->fence_lock);
>> -    (void) ttm_bo_wait(bo, false, false, true);
>> -    if (!bo->sync_obj) {
>> -
>> -        spin_lock(&glob->lru_lock);
>> -
>> -        /**
>> -         * Lock inversion between bo:reserve and bdev::fence_lock here,
>> -         * but that's OK, since we're only trylocking.
>> -         */
>> -
>> -        ret = ttm_bo_reserve_locked(bo, false, true, false, 0);
>> +    spin_lock(&glob->lru_lock);
>> +    ret = ttm_bo_reserve_locked(bo, false, true, false, 0);
>> +    if (!ret) {
>> +        spin_lock(&bdev->fence_lock);
>> +        ret = ttm_bo_wait(bo, false, false, true);
>>   -        if (unlikely(ret == -EBUSY))
>> +        if (unlikely(ret == -EBUSY)) {
>> +            sync_obj = driver->sync_obj_ref(bo->sync_obj);
>> +            spin_unlock(&bdev->fence_lock);
>> +            atomic_set(&bo->reserved, 0);
>> +            wake_up_all(&bo->event_queue);
>>               goto queue;
>> -
>> +        }
>>           spin_unlock(&bdev->fence_lock);
>> +
>>           put_count = ttm_bo_del_from_lru(bo);
>>             atomic_set(&bo->reserved, 0);
>> @@ -509,18 +507,11 @@ static void ttm_bo_cleanup_refs_or_queue(struct 
>> ttm_buffer_object *bo)
>>           ttm_bo_list_ref_sub(bo, put_count, true);
>>             return;
>> -    } else {
>> -        spin_lock(&glob->lru_lock);
>>       }
>>   queue:
>> -    driver = bdev->driver;
>> -    if (bo->sync_obj)
>> -        sync_obj = driver->sync_obj_ref(bo->sync_obj);
>> -
>>       kref_get(&bo->list_kref);
>>       list_add_tail(&bo->ddestroy, &bdev->ddestroy);
>>       spin_unlock(&glob->lru_lock);
>> -    spin_unlock(&bdev->fence_lock);
>>         if (sync_obj) {
>>           driver->sync_obj_flush(sync_obj);
>> @@ -546,54 +537,60 @@ static int ttm_bo_cleanup_refs(struct 
>> ttm_buffer_object *bo,
>>                      bool no_wait_gpu)
>>   {
>>       struct ttm_bo_device *bdev = bo->bdev;
>> +    struct ttm_bo_driver *driver = bdev->driver;
>>       struct ttm_bo_global *glob = bo->glob;
>>       int put_count;
>>       int ret = 0;
>> +    void *sync_obj;
>>     retry:
>> -    spin_lock(&bdev->fence_lock);
>> -    ret = ttm_bo_wait(bo, false, interruptible, no_wait_gpu);
>> -    spin_unlock(&bdev->fence_lock);
>> +    spin_lock(&glob->lru_lock);
>>   -    if (unlikely(ret != 0))
>> -        return ret;
>> +    ret = ttm_bo_reserve_locked(bo, interruptible,
>> +                    no_wait_reserve, false, 0);
>>   -retry_reserve:
>> -    spin_lock(&glob->lru_lock);
>> +    if (unlikely(ret)) {
>> +        spin_unlock(&glob->lru_lock);
>> +        return ret;
>> +    }
>>         if (unlikely(list_empty(&bo->ddestroy))) {
>> +        atomic_set(&bo->reserved, 0);
>> +        wake_up_all(&bo->event_queue);
>>           spin_unlock(&glob->lru_lock);
>>           return 0;
>>       }
>>   -    ret = ttm_bo_reserve_locked(bo, false, true, false, 0);
>> -
>> -    if (unlikely(ret == -EBUSY)) {
>> -        spin_unlock(&glob->lru_lock);
>> -        if (likely(!no_wait_reserve))
>> -            ret = ttm_bo_wait_unreserved(bo, interruptible);
>> -        if (unlikely(ret != 0))
>> +    spin_lock(&bdev->fence_lock);
>> +    ret = ttm_bo_wait(bo, false, false, true);
>> +    if (ret) {
>> +        if (no_wait_gpu) {
>> +            spin_unlock(&bdev->fence_lock);
>> +            atomic_set(&bo->reserved, 0);
>> +            wake_up_all(&bo->event_queue);
>> +            spin_unlock(&glob->lru_lock);
>>               return ret;
>> +        }
>>   -        goto retry_reserve;
>> -    }
>> -
>> -    BUG_ON(ret != 0);
>> -
>> -    /**
>> -     * We can re-check for sync object without taking
>> -     * the bo::lock since setting the sync object requires
>> -     * also bo::reserved. A busy object at this point may
>> -     * be caused by another thread recently starting an accelerated
>> -     * eviction.
>> -     */
>> +        /**
>> +         * Take a reference to the fence and unreserve, if the wait
>> +         * was succesful and no new sync_obj was attached,
>> +         * ttm_bo_wait in retry will return ret = 0, and end the loop.
>> +         */
>>   -    if (unlikely(bo->sync_obj)) {
>> +        sync_obj = driver->sync_obj_ref(&bo->sync_obj);
>> +        spin_unlock(&bdev->fence_lock);
>>           atomic_set(&bo->reserved, 0);
>>           wake_up_all(&bo->event_queue);
>>           spin_unlock(&glob->lru_lock);
>> +
>> +        ret = driver->sync_obj_wait(bo->sync_obj, false, 
>> interruptible);
>> +        driver->sync_obj_unref(&sync_obj);
>> +        if (ret)
>> +            return ret;
>>           goto retry;
>>       }
>> +    spin_unlock(&bdev->fence_lock);
>>         put_count = ttm_bo_del_from_lru(bo);
>>       list_del_init(&bo->ddestroy);
>> diff --git a/drivers/gpu/drm/ttm/ttm_execbuf_util.c 
>> b/drivers/gpu/drm/ttm/ttm_execbuf_util.c
>> index 1986d00..cd9e452 100644
>> --- a/drivers/gpu/drm/ttm/ttm_execbuf_util.c
>> +++ b/drivers/gpu/drm/ttm/ttm_execbuf_util.c
>> @@ -213,8 +213,8 @@ void ttm_eu_fence_buffer_objects(struct list_head 
>> *list, void *sync_obj)
>>       driver = bdev->driver;
>>       glob = bo->glob;
>>   -    spin_lock(&bdev->fence_lock);
>>       spin_lock(&glob->lru_lock);
>> +    spin_lock(&bdev->fence_lock);
>>         list_for_each_entry(entry, list, head) {
>>           bo = entry->bo;
>> @@ -223,8 +223,8 @@ void ttm_eu_fence_buffer_objects(struct list_head 
>> *list, void *sync_obj)
>>           ttm_bo_unreserve_locked(bo);
>>           entry->reserved = false;
>>       }
>> -    spin_unlock(&glob->lru_lock);
>>       spin_unlock(&bdev->fence_lock);
>> +    spin_unlock(&glob->lru_lock);
>>         list_for_each_entry(entry, list, head) {
>>           if (entry->old_sync_obj)
>

[PATCH 04/10] drm/ttm: change fence_lock to inner lock, v3

Reply via email to