Hi, Maarten, As you know I have been having my doubts about this change. To me it seems insane to be forced to read the fence pointer under a reserved lock, simply because when you take the reserve lock, another process may have it and there is a substantial chance that that process will also be waiting for idle while holding the reserve lock.
In essence, to read the fence pointer, there is a large chance you will be waiting for idle to be able to access it. That's why it's protected by a separate spinlock in the first place. So even if this change might not affect current driver much it's a change to the TTM api that leads to an IMHO very poor design. /Thomas On 10/12/2012 05:23 PM, Maarten Lankhorst wrote: > With the nouveau calls fixed there were only 2 places left that used > fence_lock without a reservation, those are fixed in this patch: > > ttm_bo_cleanup_refs_or_queue is fixed by simply doing things the other > way around. > > ttm_bo_cleanup_refs is fixed by taking a reservation first, then a pointer > to the fence object and backs off from the reservation if waiting has to > be performed. > > Signed-off-by: Maarten Lankhorst <maarten.lankhorst at canonical.com> > --- > This patch should be applied only after all the previous patches I sent are > applied, > since it depends on sync_obj_arg removal (kinda, probably fails to apply > otherwise), > uses ttm_bo_is_reserved, and depends on nouveau wait behavior being fixed. > > drivers/gpu/drm/nouveau/nouveau_bo.c | 4 - > drivers/gpu/drm/nouveau/nouveau_gem.c | 15 +--- > drivers/gpu/drm/radeon/radeon_object.c | 2 - > drivers/gpu/drm/ttm/ttm_bo.c | 124 > ++++++++++++-------------------- > drivers/gpu/drm/ttm/ttm_bo_util.c | 3 - > drivers/gpu/drm/ttm/ttm_bo_vm.c | 5 - > drivers/gpu/drm/ttm/ttm_execbuf_util.c | 2 - > drivers/gpu/drm/vmwgfx/vmwgfx_drv.c | 3 - > include/drm/ttm/ttm_bo_api.h | 12 +-- > include/drm/ttm/ttm_bo_driver.h | 3 - > 10 files changed, 52 insertions(+), 121 deletions(-) > > diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c > b/drivers/gpu/drm/nouveau/nouveau_bo.c > index cee7448..9749c45 100644 > --- a/drivers/gpu/drm/nouveau/nouveau_bo.c > +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c > @@ -1449,10 +1449,8 @@ nouveau_bo_fence(struct nouveau_bo *nvbo, struct > nouveau_fence *fence) > if (likely(fence)) > nouveau_fence_ref(fence); > > - spin_lock(&nvbo->bo.bdev->fence_lock); > old_fence = nvbo->bo.sync_obj; > nvbo->bo.sync_obj = fence; > - spin_unlock(&nvbo->bo.bdev->fence_lock); > > nouveau_fence_unref(&old_fence); > } > @@ -1552,9 +1550,7 @@ nouveau_bo_vma_del(struct nouveau_bo *nvbo, struct > nouveau_vma *vma) > if (vma->node) { > if (nvbo->bo.mem.mem_type != TTM_PL_SYSTEM) { > ttm_bo_reserve(&nvbo->bo, false, false, false, 0); > - spin_lock(&nvbo->bo.bdev->fence_lock); > ttm_bo_wait(&nvbo->bo, false, false, false); > - spin_unlock(&nvbo->bo.bdev->fence_lock); > ttm_bo_unreserve(&nvbo->bo); > nouveau_vm_unmap(vma); > } > diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c > b/drivers/gpu/drm/nouveau/nouveau_gem.c > index 6d8391d..eaa700a 100644 > --- a/drivers/gpu/drm/nouveau/nouveau_gem.c > +++ b/drivers/gpu/drm/nouveau/nouveau_gem.c > @@ -391,18 +391,11 @@ retry: > static int > validate_sync(struct nouveau_channel *chan, struct nouveau_bo *nvbo) > { > - struct nouveau_fence *fence = NULL; > + struct nouveau_fence *fence = nvbo->bo.sync_obj; > int ret = 0; > > - spin_lock(&nvbo->bo.bdev->fence_lock); > - if (nvbo->bo.sync_obj) > - fence = nouveau_fence_ref(nvbo->bo.sync_obj); > - spin_unlock(&nvbo->bo.bdev->fence_lock); > - > - if (fence) { > + if (fence) > ret = nouveau_fence_sync(fence, chan); > - nouveau_fence_unref(&fence); > - } > > return ret; > } > @@ -614,9 +607,7 @@ nouveau_gem_pushbuf_reloc_apply(struct drm_device *dev, > data |= r->vor; > } > > - spin_lock(&nvbo->bo.bdev->fence_lock); > ret = ttm_bo_wait(&nvbo->bo, false, false, false); > - spin_unlock(&nvbo->bo.bdev->fence_lock); > if (ret) { > NV_ERROR(drm, "reloc wait_idle failed: %d\n", ret); > break; > @@ -848,11 +839,9 @@ nouveau_gem_ioctl_cpu_prep(struct drm_device *dev, void > *data, > > ret = ttm_bo_reserve(&nvbo->bo, true, false, false, 0); > if (!ret) { > - spin_lock(&nvbo->bo.bdev->fence_lock); > ret = ttm_bo_wait(&nvbo->bo, true, true, true); > if (!no_wait && ret) > fence = nouveau_fence_ref(nvbo->bo.sync_obj); > - spin_unlock(&nvbo->bo.bdev->fence_lock); > > ttm_bo_unreserve(&nvbo->bo); > } > diff --git a/drivers/gpu/drm/radeon/radeon_object.c > b/drivers/gpu/drm/radeon/radeon_object.c > index 808c444..df430e4 100644 > --- a/drivers/gpu/drm/radeon/radeon_object.c > +++ b/drivers/gpu/drm/radeon/radeon_object.c > @@ -613,12 +613,10 @@ int radeon_bo_wait(struct radeon_bo *bo, u32 *mem_type, > bool no_wait) > r = ttm_bo_reserve(&bo->tbo, true, no_wait, false, 0); > if (unlikely(r != 0)) > return r; > - spin_lock(&bo->tbo.bdev->fence_lock); > if (mem_type) > *mem_type = bo->tbo.mem.mem_type; > if (bo->tbo.sync_obj) > r = ttm_bo_wait(&bo->tbo, true, true, no_wait); > - spin_unlock(&bo->tbo.bdev->fence_lock); > ttm_bo_unreserve(&bo->tbo); > return r; > } > diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c > index f6d7026..69fc29b 100644 > --- a/drivers/gpu/drm/ttm/ttm_bo.c > +++ b/drivers/gpu/drm/ttm/ttm_bo.c > @@ -498,28 +498,23 @@ static void ttm_bo_cleanup_refs_or_queue(struct > ttm_buffer_object *bo) > { > struct ttm_bo_device *bdev = bo->bdev; > struct ttm_bo_global *glob = bo->glob; > - struct ttm_bo_driver *driver; > + struct ttm_bo_driver *driver = bdev->driver; > void *sync_obj = NULL; > int put_count; > int ret; > > - spin_lock(&bdev->fence_lock); > - (void) ttm_bo_wait(bo, false, false, true); > - if (!bo->sync_obj) { > - > - spin_lock(&glob->lru_lock); > - > - /** > - * Lock inversion between bo:reserve and bdev::fence_lock here, > - * but that's OK, since we're only trylocking. > - */ > - > - ret = ttm_bo_reserve_locked(bo, false, true, false, 0); > + spin_lock(&glob->lru_lock); > + ret = ttm_bo_reserve_locked(bo, false, true, false, 0); > + if (!ret) { > + ret = ttm_bo_wait(bo, false, false, true); > > - if (unlikely(ret == -EBUSY)) > + if (unlikely(ret == -EBUSY)) { > + sync_obj = driver->sync_obj_ref(bo->sync_obj); > + atomic_set(&bo->reserved, 0); > + wake_up_all(&bo->event_queue); > goto queue; > + } > > - spin_unlock(&bdev->fence_lock); > put_count = ttm_bo_del_from_lru(bo); > > spin_unlock(&glob->lru_lock); > @@ -528,18 +523,11 @@ static void ttm_bo_cleanup_refs_or_queue(struct > ttm_buffer_object *bo) > ttm_bo_list_ref_sub(bo, put_count, true); > > return; > - } else { > - spin_lock(&glob->lru_lock); > } > queue: > - driver = bdev->driver; > - if (bo->sync_obj) > - sync_obj = driver->sync_obj_ref(bo->sync_obj); > - > kref_get(&bo->list_kref); > list_add_tail(&bo->ddestroy, &bdev->ddestroy); > spin_unlock(&glob->lru_lock); > - spin_unlock(&bdev->fence_lock); > > if (sync_obj) { > driver->sync_obj_flush(sync_obj); > @@ -565,25 +553,15 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object > *bo, > bool no_wait_gpu) > { > struct ttm_bo_device *bdev = bo->bdev; > + struct ttm_bo_driver *driver = bdev->driver; > struct ttm_bo_global *glob = bo->glob; > int put_count; > int ret = 0; > + void *sync_obj; > > retry: > - spin_lock(&bdev->fence_lock); > - ret = ttm_bo_wait(bo, false, interruptible, no_wait_gpu); > - spin_unlock(&bdev->fence_lock); > - > - if (unlikely(ret != 0)) > - return ret; > - > spin_lock(&glob->lru_lock); > > - if (unlikely(list_empty(&bo->ddestroy))) { > - spin_unlock(&glob->lru_lock); > - return 0; > - } > - > ret = ttm_bo_reserve_locked(bo, interruptible, > no_wait_reserve, false, 0); > > @@ -592,18 +570,37 @@ retry: > return ret; > } > > - /** > - * We can re-check for sync object without taking > - * the bo::lock since setting the sync object requires > - * also bo::reserved. A busy object at this point may > - * be caused by another thread recently starting an accelerated > - * eviction. > - */ > + if (unlikely(list_empty(&bo->ddestroy))) { > + atomic_set(&bo->reserved, 0); > + wake_up_all(&bo->event_queue); > + spin_unlock(&glob->lru_lock); > + return 0; > + } > + ret = ttm_bo_wait(bo, false, false, true); > + > + if (ret) { > + if (no_wait_gpu) { > + atomic_set(&bo->reserved, 0); > + wake_up_all(&bo->event_queue); > + spin_unlock(&glob->lru_lock); > + return ret; > + } > > - if (unlikely(bo->sync_obj)) { > + /** > + * Take a reference to the fence and unreserve, if the wait > + * was succesful and no new sync_obj was attached, > + * ttm_bo_wait in retry will return ret = 0, and end the loop. > + */ > + > + sync_obj = driver->sync_obj_ref(&bo->sync_obj); > atomic_set(&bo->reserved, 0); > wake_up_all(&bo->event_queue); > spin_unlock(&glob->lru_lock); > + > + ret = driver->sync_obj_wait(bo->sync_obj, false, interruptible); > + driver->sync_obj_unref(&sync_obj); > + if (ret) > + return ret; > goto retry; > } > > @@ -735,9 +732,7 @@ static int ttm_bo_evict(struct ttm_buffer_object *bo, > bool interruptible, > struct ttm_placement placement; > int ret = 0; > > - spin_lock(&bdev->fence_lock); > ret = ttm_bo_wait(bo, false, interruptible, no_wait_gpu); > - spin_unlock(&bdev->fence_lock); > > if (unlikely(ret != 0)) { > if (ret != -ERESTARTSYS) { > @@ -1054,7 +1049,6 @@ int ttm_bo_move_buffer(struct ttm_buffer_object *bo, > { > int ret = 0; > struct ttm_mem_reg mem; > - struct ttm_bo_device *bdev = bo->bdev; > > BUG_ON(!ttm_bo_is_reserved(bo)); > > @@ -1063,9 +1057,7 @@ int ttm_bo_move_buffer(struct ttm_buffer_object *bo, > * Have the driver move function wait for idle when necessary, > * instead of doing it here. > */ > - spin_lock(&bdev->fence_lock); > ret = ttm_bo_wait(bo, false, interruptible, no_wait_gpu); > - spin_unlock(&bdev->fence_lock); > if (ret) > return ret; > mem.num_pages = bo->num_pages; > @@ -1554,7 +1546,6 @@ int ttm_bo_device_init(struct ttm_bo_device *bdev, > bdev->glob = glob; > bdev->need_dma32 = need_dma32; > bdev->val_seq = 0; > - spin_lock_init(&bdev->fence_lock); > mutex_lock(&glob->device_list_mutex); > list_add_tail(&bdev->device_list, &glob->device_list); > mutex_unlock(&glob->device_list_mutex); > @@ -1690,52 +1681,33 @@ int ttm_bo_wait(struct ttm_buffer_object *bo, > bool lazy, bool interruptible, bool no_wait) > { > struct ttm_bo_driver *driver = bo->bdev->driver; > - struct ttm_bo_device *bdev = bo->bdev; > - void *sync_obj; > int ret = 0; > > + WARN_ON_ONCE(!ttm_bo_is_reserved(bo)); > + > if (likely(bo->sync_obj == NULL)) > return 0; > > - while (bo->sync_obj) { > - > + if (bo->sync_obj) { > if (driver->sync_obj_signaled(bo->sync_obj)) { > void *tmp_obj = bo->sync_obj; > bo->sync_obj = NULL; > clear_bit(TTM_BO_PRIV_FLAG_MOVING, &bo->priv_flags); > - spin_unlock(&bdev->fence_lock); > driver->sync_obj_unref(&tmp_obj); > - spin_lock(&bdev->fence_lock); > - continue; > + return 0; > } > > if (no_wait) > return -EBUSY; > > - sync_obj = driver->sync_obj_ref(bo->sync_obj); > - spin_unlock(&bdev->fence_lock); > - ret = driver->sync_obj_wait(sync_obj, > + ret = driver->sync_obj_wait(bo->sync_obj, > lazy, interruptible); > if (unlikely(ret != 0)) { > - driver->sync_obj_unref(&sync_obj); > - spin_lock(&bdev->fence_lock); > return ret; > } > - spin_lock(&bdev->fence_lock); > - if (likely(bo->sync_obj == sync_obj)) { > - void *tmp_obj = bo->sync_obj; > - bo->sync_obj = NULL; > - clear_bit(TTM_BO_PRIV_FLAG_MOVING, > - &bo->priv_flags); > - spin_unlock(&bdev->fence_lock); > - driver->sync_obj_unref(&sync_obj); > - driver->sync_obj_unref(&tmp_obj); > - spin_lock(&bdev->fence_lock); > - } else { > - spin_unlock(&bdev->fence_lock); > - driver->sync_obj_unref(&sync_obj); > - spin_lock(&bdev->fence_lock); > - } > + driver->sync_obj_unref(&bo->sync_obj); > + clear_bit(TTM_BO_PRIV_FLAG_MOVING, > + &bo->priv_flags); > } > return 0; > } > @@ -1799,9 +1771,7 @@ static int ttm_bo_swapout(struct ttm_mem_shrink *shrink) > * Wait for GPU, then move to system cached. > */ > > - spin_lock(&bo->bdev->fence_lock); > ret = ttm_bo_wait(bo, false, false, false); > - spin_unlock(&bo->bdev->fence_lock); > > if (unlikely(ret != 0)) > goto out; > diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c > b/drivers/gpu/drm/ttm/ttm_bo_util.c > index d80d1e8..84d6e01 100644 > --- a/drivers/gpu/drm/ttm/ttm_bo_util.c > +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c > @@ -622,7 +622,6 @@ int ttm_bo_move_accel_cleanup(struct ttm_buffer_object > *bo, > struct ttm_buffer_object *ghost_obj; > void *tmp_obj = NULL; > > - spin_lock(&bdev->fence_lock); > if (bo->sync_obj) { > tmp_obj = bo->sync_obj; > bo->sync_obj = NULL; > @@ -630,7 +629,6 @@ int ttm_bo_move_accel_cleanup(struct ttm_buffer_object > *bo, > bo->sync_obj = driver->sync_obj_ref(sync_obj); > if (evict) { > ret = ttm_bo_wait(bo, false, false, false); > - spin_unlock(&bdev->fence_lock); > if (tmp_obj) > driver->sync_obj_unref(&tmp_obj); > if (ret) > @@ -653,7 +651,6 @@ int ttm_bo_move_accel_cleanup(struct ttm_buffer_object > *bo, > */ > > set_bit(TTM_BO_PRIV_FLAG_MOVING, &bo->priv_flags); > - spin_unlock(&bdev->fence_lock); > if (tmp_obj) > driver->sync_obj_unref(&tmp_obj); > > diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c > index a877813..55718c2 100644 > --- a/drivers/gpu/drm/ttm/ttm_bo_vm.c > +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c > @@ -122,17 +122,14 @@ static int ttm_bo_vm_fault(struct vm_area_struct *vma, > struct vm_fault *vmf) > * move. > */ > > - spin_lock(&bdev->fence_lock); > if (test_bit(TTM_BO_PRIV_FLAG_MOVING, &bo->priv_flags)) { > ret = ttm_bo_wait(bo, false, true, false); > - spin_unlock(&bdev->fence_lock); > if (unlikely(ret != 0)) { > retval = (ret != -ERESTARTSYS) ? > VM_FAULT_SIGBUS : VM_FAULT_NOPAGE; > goto out_unlock; > } > - } else > - spin_unlock(&bdev->fence_lock); > + } > > ret = ttm_mem_io_lock(man, true); > if (unlikely(ret != 0)) { > diff --git a/drivers/gpu/drm/ttm/ttm_execbuf_util.c > b/drivers/gpu/drm/ttm/ttm_execbuf_util.c > index 721886e..51b5e97 100644 > --- a/drivers/gpu/drm/ttm/ttm_execbuf_util.c > +++ b/drivers/gpu/drm/ttm/ttm_execbuf_util.c > @@ -207,7 +207,6 @@ void ttm_eu_fence_buffer_objects(struct list_head *list, > void *sync_obj) > driver = bdev->driver; > glob = bo->glob; > > - spin_lock(&bdev->fence_lock); > spin_lock(&glob->lru_lock); > > list_for_each_entry(entry, list, head) { > @@ -218,7 +217,6 @@ void ttm_eu_fence_buffer_objects(struct list_head *list, > void *sync_obj) > entry->reserved = false; > } > spin_unlock(&glob->lru_lock); > - spin_unlock(&bdev->fence_lock); > > list_for_each_entry(entry, list, head) { > if (entry->old_sync_obj) > diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c > b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c > index ed3c1e7..e038c9a 100644 > --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c > +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c > @@ -251,13 +251,10 @@ static void vmw_dummy_query_bo_prepare(struct > vmw_private *dev_priv) > volatile SVGA3dQueryResult *result; > bool dummy; > int ret; > - struct ttm_bo_device *bdev = &dev_priv->bdev; > struct ttm_buffer_object *bo = dev_priv->dummy_query_bo; > > ttm_bo_reserve(bo, false, false, false, 0); > - spin_lock(&bdev->fence_lock); > ret = ttm_bo_wait(bo, false, false, false); > - spin_unlock(&bdev->fence_lock); > if (unlikely(ret != 0)) > (void) vmw_fallback_wait(dev_priv, false, true, 0, false, > 10*HZ); > diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h > index 816d9b1..6c69224 100644 > --- a/include/drm/ttm/ttm_bo_api.h > +++ b/include/drm/ttm/ttm_bo_api.h > @@ -222,6 +222,8 @@ struct ttm_buffer_object { > struct file *persistent_swap_storage; > struct ttm_tt *ttm; > bool evicted; > + void *sync_obj; > + unsigned long priv_flags; > > /** > * Members protected by the bdev::lru_lock. > @@ -242,16 +244,6 @@ struct ttm_buffer_object { > atomic_t reserved; > > /** > - * Members protected by struct buffer_object_device::fence_lock > - * In addition, setting sync_obj to anything else > - * than NULL requires bo::reserved to be held. This allows for > - * checking NULL while reserved but not holding the mentioned lock. > - */ > - > - void *sync_obj; > - unsigned long priv_flags; > - > - /** > * Members protected by the bdev::vm_lock > */ > > diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h > index 0c8c3b5..aac101b 100644 > --- a/include/drm/ttm/ttm_bo_driver.h > +++ b/include/drm/ttm/ttm_bo_driver.h > @@ -515,8 +515,6 @@ struct ttm_bo_global { > * > * @driver: Pointer to a struct ttm_bo_driver struct setup by the driver. > * @man: An array of mem_type_managers. > - * @fence_lock: Protects the synchronizing members on *all* bos belonging > - * to this device. > * @addr_space_mm: Range manager for the device address space. > * lru_lock: Spinlock that protects the buffer+device lru lists and > * ddestroy lists. > @@ -539,7 +537,6 @@ struct ttm_bo_device { > struct ttm_bo_driver *driver; > rwlock_t vm_lock; > struct ttm_mem_type_manager man[TTM_NUM_MEM_TYPES]; > - spinlock_t fence_lock; > /* > * Protected by the vm lock. > */ > > _______________________________________________ > dri-devel mailing list > dri-devel at lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/dri-devel