Add ttm_bo_evict_cgroup() to evict buffer objects charged to a specific
dmem cgroup pool from a resource manager's LRU until a byte target is
met.  Add ttm_resource_manager_set_dmem_region() to register the TTM
eviction path as the reclaim callback for a dmem cgroup region.

The eviction context is interruptible; signals abort the operation and
propagate back through the write() syscall.

Introduce a new mode for the bo LRU walker so that sleeping locks
can be taken. This can be used when the caller doesn't hold any
previous dma_resv locks, and where it intends to hold at most
one lock at a time.

Like the rest of the TTM eviction this should sooner than later
be converted to full WW transactions.

Assisted-by: GitHub Copilot:claude-sonnet-4.6
Signed-off-by: Thomas Hellström <[email protected]>
---
 drivers/gpu/drm/ttm/ttm_bo.c       | 95 +++++++++++++++++++++++++++++-
 drivers/gpu/drm/ttm/ttm_bo_util.c  |  3 +-
 drivers/gpu/drm/ttm/ttm_resource.c | 36 +++++++++++
 include/drm/ttm/ttm_bo.h           | 10 ++++
 include/drm/ttm/ttm_resource.h     |  4 ++
 5 files changed, 144 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index d85f0a37ac35..1745557c184c 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -515,12 +515,20 @@ static s64 ttm_bo_evict_cb(struct ttm_lru_walk *walk, 
struct ttm_buffer_object *
 {
        struct ttm_bo_evict_walk *evict_walk =
                container_of(walk, typeof(*evict_walk), walk);
+       /* Capture size before eviction in case res is cleared. */
+       s64 bo_size = bo->base.size;
        s64 lret;
 
        if (!dmem_cgroup_state_evict_valuable(evict_walk->limit_pool, 
bo->resource->css,
                                              evict_walk->try_low, 
&evict_walk->hit_low))
                return 0;
 
+       /*
+        * evict_walk->place is NULL in cgroup drain mode.  Drivers'
+        * eviction_valuable() callbacks must handle a NULL place, treating it
+        * as "any placement": the TTM base implementation already does so via
+        * ttm_resource_intersects().
+        */
        if (bo->pin_count || !bo->bdev->funcs->eviction_valuable(bo, 
evict_walk->place))
                return 0;
 
@@ -536,11 +544,15 @@ static s64 ttm_bo_evict_cb(struct ttm_lru_walk *walk, 
struct ttm_buffer_object *
                goto out;
 
        evict_walk->evicted++;
-       if (evict_walk->res)
+       if (evict_walk->res) {
                lret = ttm_resource_alloc(evict_walk->evictor, 
evict_walk->place,
                                          evict_walk->res, NULL);
-       if (lret == 0)
-               return 1;
+               if (lret == 0)
+                       return 1;
+       } else {
+               /* Cgroup drain: return bytes freed for byte-denominated 
progress. */
+               return bo_size;
+       }
 out:
        /* Errors that should terminate the walk. */
        if (lret == -ENOSPC)
@@ -614,6 +626,83 @@ static int ttm_bo_evict_alloc(struct ttm_device *bdev,
        return 0;
 }
 
+/**
+ * ttm_bo_evict_cgroup - Evict buffer objects charged to a specific cgroup.
+ * @bdev: The TTM device.
+ * @man: The resource manager whose LRU to walk.
+ * @limit_pool: The cgroup pool state whose members should be evicted.
+ * @target_bytes: Number of bytes to free.
+ * @ctx: The TTM operation context.
+ *
+ * Walk the LRU of @man and evict buffer objects that are charged to the
+ * cgroup identified by @limit_pool, until at least @target_bytes have been
+ * freed.  Mirrors the two-pass (trylock -> sleeping-lock, low-watermark)
+ * strategy used by ttm_bo_evict_alloc().
+ *
+ * Return: >= @target_bytes on full success, 0..target_bytes-1 if partial,
+ *         negative error code on fatal error.
+ */
+s64 ttm_bo_evict_cgroup(struct ttm_device *bdev,
+                       struct ttm_resource_manager *man,
+                       struct dmem_cgroup_pool_state *limit_pool,
+                       s64 target_bytes,
+                       struct ttm_operation_ctx *ctx)
+{
+       struct ttm_bo_evict_walk evict_walk = {
+               .walk = {
+                       .ops = &ttm_evict_walk_ops,
+                       .arg = { .ctx = ctx },
+               },
+               .limit_pool = limit_pool,
+               /* place, evictor, res left NULL: selects cgroup drain mode */
+       };
+       s64 lret, pass;
+
+       evict_walk.walk.arg.trylock_only = true;
+       lret = ttm_lru_walk_for_evict(&evict_walk.walk, bdev, man, 
target_bytes);
+       if (lret < 0 || lret >= target_bytes)
+               return lret;
+
+       /* Second pass: also evict BOs at the low watermark. */
+       if (evict_walk.hit_low) {
+               evict_walk.try_low = true;
+               pass = ttm_lru_walk_for_evict(&evict_walk.walk, bdev, man,
+                                             target_bytes - lret);
+               if (pass < 0)
+                       return pass;
+               lret += pass;
+               if (lret >= target_bytes)
+                       return lret;
+       }
+
+       /* Full sleeping-lock pass for remaining target. */
+       evict_walk.try_low = evict_walk.hit_low = false;
+       evict_walk.walk.arg.trylock_only = false;
+
+retry:
+       evict_walk.walk.arg.sleeping_lock = true;
+       do {
+               evict_walk.evicted = 0;
+               pass = ttm_lru_walk_for_evict(&evict_walk.walk, bdev, man,
+                                             target_bytes - lret);
+               if (pass < 0) {
+                       lret = pass;
+                       goto out;
+               }
+               lret += pass;
+       } while (lret < target_bytes && evict_walk.evicted);
+
+       /* One more attempt if we hit the low limit during sleeping-lock pass. 
*/
+       if (lret < target_bytes && evict_walk.hit_low && !evict_walk.try_low) {
+               evict_walk.try_low = true;
+               goto retry;
+       }
+
+out:
+       return lret;
+}
+EXPORT_SYMBOL(ttm_bo_evict_cgroup);
+
 /**
  * ttm_bo_pin - Pin the buffer object.
  * @bo: The buffer object to pin
diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c 
b/drivers/gpu/drm/ttm/ttm_bo_util.c
index f83b7d5ec6c6..81c6a674c462 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_util.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
@@ -999,7 +999,8 @@ __ttm_bo_lru_cursor_next(struct ttm_bo_lru_cursor *curs)
                bo = res->bo;
                if (ttm_lru_walk_trylock(curs, bo))
                        bo_locked = true;
-               else if (!arg->ticket || arg->ctx->no_wait_gpu || 
arg->trylock_only)
+               else if ((!arg->ticket && !arg->sleeping_lock) || 
arg->ctx->no_wait_gpu ||
+                        arg->trylock_only)
                        continue;
 
                if (!ttm_bo_get_unless_zero(bo)) {
diff --git a/drivers/gpu/drm/ttm/ttm_resource.c 
b/drivers/gpu/drm/ttm/ttm_resource.c
index 9f36631d48b6..936552f426a7 100644
--- a/drivers/gpu/drm/ttm/ttm_resource.c
+++ b/drivers/gpu/drm/ttm/ttm_resource.c
@@ -937,3 +937,39 @@ void ttm_resource_manager_create_debugfs(struct 
ttm_resource_manager *man,
 #endif
 }
 EXPORT_SYMBOL(ttm_resource_manager_create_debugfs);
+
+static int ttm_resource_manager_dmem_reclaim(struct dmem_cgroup_pool_state 
*pool,
+                                            u64 target_bytes, void *priv)
+{
+       struct ttm_resource_manager *man = priv;
+       struct ttm_operation_ctx ctx = { .interruptible = true };
+       s64 freed;
+
+       freed = ttm_bo_evict_cgroup(man->bdev, man, pool, target_bytes, &ctx);
+       if (freed < 0)
+               return freed;
+
+       return freed >= (s64)target_bytes ? 0 : -ENOSPC;
+}
+
+/**
+ * ttm_resource_manager_set_dmem_region - Associate a dmem cgroup region with a
+ *                                        resource manager and register a 
reclaim
+ *                                        callback.
+ * @man: The resource manager.
+ * @region: The dmem cgroup region to associate, may be NULL or IS_ERR().
+ *
+ * Sets @man->cg and registers ttm_resource_manager_dmem_reclaim() so that
+ * writing to dmem.max below current usage triggers TTM eviction rather than
+ * returning -EBUSY to userspace.
+ */
+void ttm_resource_manager_set_dmem_region(struct ttm_resource_manager *man,
+                                         struct dmem_cgroup_region *region)
+{
+       man->cg = region;
+       if (!IS_ERR_OR_NULL(region))
+               dmem_cgroup_region_set_reclaim(region,
+                                              
ttm_resource_manager_dmem_reclaim,
+                                              man);
+}
+EXPORT_SYMBOL(ttm_resource_manager_set_dmem_region);
diff --git a/include/drm/ttm/ttm_bo.h b/include/drm/ttm/ttm_bo.h
index 8310bc3d55f9..32791c4db2a9 100644
--- a/include/drm/ttm/ttm_bo.h
+++ b/include/drm/ttm/ttm_bo.h
@@ -226,6 +226,11 @@ struct ttm_lru_walk_arg {
        struct ww_acquire_ctx *ticket;
        /** @trylock_only: Only use trylock for locking. */
        bool trylock_only;
+       /**
+        * @sleeping_lock: Use sleeping locks even with %NULL @ticket.
+        * @trylock_only has precedence over this field.
+        */
+       bool sleeping_lock;
 };
 
 /**
@@ -431,6 +436,11 @@ void ttm_bo_unpin(struct ttm_buffer_object *bo);
 int ttm_bo_evict_first(struct ttm_device *bdev,
                       struct ttm_resource_manager *man,
                       struct ttm_operation_ctx *ctx);
+s64 ttm_bo_evict_cgroup(struct ttm_device *bdev,
+                       struct ttm_resource_manager *man,
+                       struct dmem_cgroup_pool_state *limit_pool,
+                       s64 target_bytes,
+                       struct ttm_operation_ctx *ctx);
 int ttm_bo_access(struct ttm_buffer_object *bo, unsigned long offset,
                  void *buf, int len, int write);
 vm_fault_t ttm_bo_vm_reserve(struct ttm_buffer_object *bo,
diff --git a/include/drm/ttm/ttm_resource.h b/include/drm/ttm/ttm_resource.h
index 33e80f30b8b8..c187e6c8b871 100644
--- a/include/drm/ttm/ttm_resource.h
+++ b/include/drm/ttm/ttm_resource.h
@@ -39,6 +39,7 @@
 
 struct dentry;
 struct dmem_cgroup_device;
+struct dmem_cgroup_region;
 struct drm_printer;
 struct ttm_device;
 struct ttm_resource_manager;
@@ -475,6 +476,9 @@ void ttm_resource_manager_init(struct ttm_resource_manager 
*man,
                               struct ttm_device *bdev,
                               uint64_t size);
 
+void ttm_resource_manager_set_dmem_region(struct ttm_resource_manager *man,
+                                         struct dmem_cgroup_region *region);
+
 int ttm_resource_manager_evict_all(struct ttm_device *bdev,
                                   struct ttm_resource_manager *man);
 
-- 
2.53.0

Reply via email to