Hi, Christian,

While this has probably already been committed, and looks like a nice cleanup there are two things below I think needs fixing.

On 11/15/2017 01:31 PM, Christian König wrote:
There is no guarantee that the next entry on the ddelete list stays on
the list when we drop the locks.

Completely rework this mess by moving processed entries on a temporary
list.

Signed-off-by: Christian König <christian.koe...@amd.com>
---
  drivers/gpu/drm/ttm/ttm_bo.c | 77 ++++++++++++++------------------------------
  1 file changed, 25 insertions(+), 52 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 7c1eac4f4b4b..ad0afdd71f21 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -572,71 +572,47 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object 
*bo,
   * Traverse the delayed list, and call ttm_bo_cleanup_refs on all
   * encountered buffers.
   */
-
-static int ttm_bo_delayed_delete(struct ttm_bo_device *bdev, bool remove_all)
+static bool ttm_bo_delayed_delete(struct ttm_bo_device *bdev, bool remove_all)
  {
        struct ttm_bo_global *glob = bdev->glob;
-       struct ttm_buffer_object *entry = NULL;
-       int ret = 0;
-
-       spin_lock(&glob->lru_lock);
-       if (list_empty(&bdev->ddestroy))
-               goto out_unlock;
+       struct list_head removed;
+       bool empty;
- entry = list_first_entry(&bdev->ddestroy,
-               struct ttm_buffer_object, ddestroy);
-       kref_get(&entry->list_kref);
+       INIT_LIST_HEAD(&removed);
- for (;;) {
-               struct ttm_buffer_object *nentry = NULL;
-
-               if (entry->ddestroy.next != &bdev->ddestroy) {
-                       nentry = list_first_entry(&entry->ddestroy,
-                               struct ttm_buffer_object, ddestroy);
-                       kref_get(&nentry->list_kref);
-               }
-
-               ret = reservation_object_trylock(entry->resv) ? 0 : -EBUSY;
-               if (remove_all && ret) {
-                       spin_unlock(&glob->lru_lock);
-                       ret = reservation_object_lock(entry->resv, NULL);
-                       spin_lock(&glob->lru_lock);
-               }
+       spin_lock(&glob->lru_lock);
+       while (!list_empty(&bdev->ddestroy)) {
+               struct ttm_buffer_object *bo;
- if (!ret)
-                       ret = ttm_bo_cleanup_refs(entry, false, !remove_all,
-                                                 true);
-               else
-                       spin_unlock(&glob->lru_lock);
+               bo = list_first_entry(&bdev->ddestroy, struct ttm_buffer_object,
+                                     ddestroy);
+               kref_get(&bo->list_kref);
+               list_move_tail(&bo->ddestroy, &removed);
+               spin_unlock(&glob->lru_lock);
- kref_put(&entry->list_kref, ttm_bo_release_list);
-               entry = nentry;
+               reservation_object_lock(bo->resv, NULL);

Reservation may be a long lived lock, and typically if the object is reserved here, it's being evicted somewhere and there might be a substantial stall, which isn't really acceptable in the global workqueue. Better to move on to the next bo. This function was really intended to be non-blocking, unless remove_all == true. I even think it's safe to keep the spinlock held on tryreserve?

- if (ret || !entry)
-                       goto out;
+               spin_lock(&glob->lru_lock);
+               ttm_bo_cleanup_refs(bo, false, !remove_all, true);
+ kref_put(&bo->list_kref, ttm_bo_release_list);

Calling a release function in atomic context is a bad thing. Nobody knows what locks needs to be taken in the release function and such code is prone to lock inversion and sleep-while-atomic bugs. Not long ago vfree() was even forbidden from atomic context. But here it's easily avoidable.

/Thomas


_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Reply via email to