Allow specifying a drm_exec object in TTMs operation context which is
used to lock objects during eviction.

This allows to handle deadlocks much more gracefully and with that
avoid returning -ENOMEM on heavily contended domains.

v2: rebased on top of Thomas work

TODO: This still doesn't handle BOs which are about to be torn down
correctly.

Signed-off-by: Christian König <christian.koe...@amd.com>
---
 drivers/gpu/drm/ttm/ttm_bo_util.c | 45 +++++++++++++++++++++++++------
 drivers/gpu/drm/ttm/ttm_bo_util.h |  2 ++
 include/drm/ttm/ttm_bo.h          |  3 +++
 3 files changed, 42 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c 
b/drivers/gpu/drm/ttm/ttm_bo_util.c
index 7a4bc7e9950b..850e329ab5a5 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_util.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
@@ -36,6 +36,7 @@
 #include <drm/ttm/ttm_tt.h>
 
 #include <drm/drm_cache.h>
+#include <drm/drm_exec.h>
 
 #include "ttm_bo_util.h"
 
@@ -776,15 +777,22 @@ static bool ttm_lru_walk_trylock(struct ttm_lru_walk 
*walk,
 {
        struct ttm_operation_ctx *ctx = walk->ctx;
 
+       walk->needs_drop = false;
        walk->needs_unlock = false;
 
-       if (dma_resv_trylock(bo->base.resv)) {
-               walk->needs_unlock = true;
+       if (bo->base.resv == ctx->resv && ctx->allow_res_evict) {
+               dma_resv_assert_held(bo->base.resv);
                return true;
        }
 
-       if (bo->base.resv == ctx->resv && ctx->allow_res_evict) {
-               dma_resv_assert_held(bo->base.resv);
+       if (walk->ctx->exec) {
+               if (drm_exec_trylock_obj(walk->ctx->exec, &bo->base)) {
+                       walk->needs_drop = true;
+                       return true;
+               }
+
+       } else if (dma_resv_trylock(bo->base.resv)) {
+               walk->needs_unlock = true;
                return true;
        }
 
@@ -797,7 +805,9 @@ static int ttm_lru_walk_ticketlock(struct ttm_lru_walk 
*walk,
        struct dma_resv *resv = bo->base.resv;
        int ret;
 
-       if (walk->ctx->interruptible)
+       if (walk->ctx->exec)
+               ret = drm_exec_lock_obj(walk->ctx->exec, &bo->base);
+       else if (walk->ctx->interruptible)
                ret = dma_resv_lock_interruptible(resv, walk->ticket);
        else
                ret = dma_resv_lock(resv, walk->ticket);
@@ -811,7 +821,8 @@ static int ttm_lru_walk_ticketlock(struct ttm_lru_walk 
*walk,
                 * trylocking for this walk.
                 */
                walk->ticket = NULL;
-       } else if (ret == -EDEADLK) {
+
+       } else if (!walk->ctx->exec && ret == -EDEADLK) {
                /* Caller needs to exit the ww transaction. */
                ret = -ENOSPC;
        }
@@ -822,7 +833,15 @@ static int ttm_lru_walk_ticketlock(struct ttm_lru_walk 
*walk,
 static void ttm_lru_walk_unlock(struct ttm_lru_walk *walk,
                                struct ttm_buffer_object *bo)
 {
-       if (walk->needs_unlock)
+       if (walk->needs_drop)
+               drm_exec_drop_trylocked_obj(walk->ctx->exec, &bo->base);
+
+       if (!walk->needs_unlock)
+               return;
+
+       if (walk->ctx->exec)
+               drm_exec_unlock_obj(walk->ctx->exec, &bo->base);
+       else
                dma_resv_unlock(bo->base.resv);
 }
 
@@ -891,8 +910,18 @@ s64 ttm_lru_walk_for_evict(struct ttm_lru_walk *walk, 
struct ttm_device *bdev,
                spin_unlock(&bdev->lru_lock);
 
                lret = 0;
-               if (!bo_locked)
+               if (!bo_locked) {
                        lret = ttm_lru_walk_ticketlock(walk, bo);
+               } else if (walk->ctx->exec && !bo->deleted) {
+                       lret = drm_exec_keep_trylocked_obj(walk->ctx->exec,
+                                                          &bo->base);
+                       if (!lret) {
+                               walk->needs_drop = false;
+                               walk->needs_unlock = true;
+                       }
+               } else {
+                       lret = 0;
+               }
 
                /*
                 * Note that in between the release of the lru lock and the
diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.h 
b/drivers/gpu/drm/ttm/ttm_bo_util.h
index c653e16ccb76..5e1bb156837f 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_util.h
+++ b/drivers/gpu/drm/ttm/ttm_bo_util.h
@@ -59,6 +59,8 @@ struct ttm_lru_walk {
        struct ww_acquire_ctx *ticket;
        /** @tryock_only: Only use trylock for locking. */
        bool trylock_only;
+       /** @needs_drop: If the current BO needs a drm_exec trylock drop */
+       bool needs_drop;
        /** @needs_unlock: If the current BO needs unlocking */
        bool needs_unlock;
 };
diff --git a/include/drm/ttm/ttm_bo.h b/include/drm/ttm/ttm_bo.h
index 5f7c967222a2..5bee917e01e2 100644
--- a/include/drm/ttm/ttm_bo.h
+++ b/include/drm/ttm/ttm_bo.h
@@ -180,6 +180,8 @@ struct ttm_bo_kmap_obj {
  * faults. Should only be used by TTM internally.
  * @resv: Reservation object to allow reserved evictions with.
  * @bytes_moved: Statistics on how many bytes have been moved.
+ * @exec: optional drm_exec object to use for locking BOs and tracking which 
are
+ * locked.
  *
  * Context for TTM operations like changing buffer placement or general memory
  * allocation.
@@ -192,6 +194,7 @@ struct ttm_operation_ctx {
        bool force_alloc;
        struct dma_resv *resv;
        uint64_t bytes_moved;
+       struct drm_exec *exec;
 };
 
 /**
-- 
2.34.1

Reply via email to