Add ttm_bo_evict_cgroup() to evict buffer objects charged to a specific dmem cgroup pool from a resource manager's LRU until a byte target is met. Add ttm_resource_manager_set_dmem_region() to register the TTM eviction path as the reclaim callback for a dmem cgroup region.
The eviction context is interruptible; signals abort the operation and propagate back through the write() syscall. Introduce a new mode for the bo LRU walker so that sleeping locks can be taken. This can be used when the caller doesn't hold any previous dma_resv locks, and where it intends to hold at most one lock at a time. Like the rest of the TTM eviction this should sooner than later be converted to full WW transactions. Assisted-by: GitHub Copilot:claude-sonnet-4.6 Signed-off-by: Thomas Hellström <[email protected]> --- drivers/gpu/drm/ttm/ttm_bo.c | 95 +++++++++++++++++++++++++++++- drivers/gpu/drm/ttm/ttm_bo_util.c | 3 +- drivers/gpu/drm/ttm/ttm_resource.c | 36 +++++++++++ include/drm/ttm/ttm_bo.h | 10 ++++ include/drm/ttm/ttm_resource.h | 4 ++ 5 files changed, 144 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index d85f0a37ac35..1745557c184c 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -515,12 +515,20 @@ static s64 ttm_bo_evict_cb(struct ttm_lru_walk *walk, struct ttm_buffer_object * { struct ttm_bo_evict_walk *evict_walk = container_of(walk, typeof(*evict_walk), walk); + /* Capture size before eviction in case res is cleared. */ + s64 bo_size = bo->base.size; s64 lret; if (!dmem_cgroup_state_evict_valuable(evict_walk->limit_pool, bo->resource->css, evict_walk->try_low, &evict_walk->hit_low)) return 0; + /* + * evict_walk->place is NULL in cgroup drain mode. Drivers' + * eviction_valuable() callbacks must handle a NULL place, treating it + * as "any placement": the TTM base implementation already does so via + * ttm_resource_intersects(). + */ if (bo->pin_count || !bo->bdev->funcs->eviction_valuable(bo, evict_walk->place)) return 0; @@ -536,11 +544,15 @@ static s64 ttm_bo_evict_cb(struct ttm_lru_walk *walk, struct ttm_buffer_object * goto out; evict_walk->evicted++; - if (evict_walk->res) + if (evict_walk->res) { lret = ttm_resource_alloc(evict_walk->evictor, evict_walk->place, evict_walk->res, NULL); - if (lret == 0) - return 1; + if (lret == 0) + return 1; + } else { + /* Cgroup drain: return bytes freed for byte-denominated progress. */ + return bo_size; + } out: /* Errors that should terminate the walk. */ if (lret == -ENOSPC) @@ -614,6 +626,83 @@ static int ttm_bo_evict_alloc(struct ttm_device *bdev, return 0; } +/** + * ttm_bo_evict_cgroup - Evict buffer objects charged to a specific cgroup. + * @bdev: The TTM device. + * @man: The resource manager whose LRU to walk. + * @limit_pool: The cgroup pool state whose members should be evicted. + * @target_bytes: Number of bytes to free. + * @ctx: The TTM operation context. + * + * Walk the LRU of @man and evict buffer objects that are charged to the + * cgroup identified by @limit_pool, until at least @target_bytes have been + * freed. Mirrors the two-pass (trylock -> sleeping-lock, low-watermark) + * strategy used by ttm_bo_evict_alloc(). + * + * Return: >= @target_bytes on full success, 0..target_bytes-1 if partial, + * negative error code on fatal error. + */ +s64 ttm_bo_evict_cgroup(struct ttm_device *bdev, + struct ttm_resource_manager *man, + struct dmem_cgroup_pool_state *limit_pool, + s64 target_bytes, + struct ttm_operation_ctx *ctx) +{ + struct ttm_bo_evict_walk evict_walk = { + .walk = { + .ops = &ttm_evict_walk_ops, + .arg = { .ctx = ctx }, + }, + .limit_pool = limit_pool, + /* place, evictor, res left NULL: selects cgroup drain mode */ + }; + s64 lret, pass; + + evict_walk.walk.arg.trylock_only = true; + lret = ttm_lru_walk_for_evict(&evict_walk.walk, bdev, man, target_bytes); + if (lret < 0 || lret >= target_bytes) + return lret; + + /* Second pass: also evict BOs at the low watermark. */ + if (evict_walk.hit_low) { + evict_walk.try_low = true; + pass = ttm_lru_walk_for_evict(&evict_walk.walk, bdev, man, + target_bytes - lret); + if (pass < 0) + return pass; + lret += pass; + if (lret >= target_bytes) + return lret; + } + + /* Full sleeping-lock pass for remaining target. */ + evict_walk.try_low = evict_walk.hit_low = false; + evict_walk.walk.arg.trylock_only = false; + +retry: + evict_walk.walk.arg.sleeping_lock = true; + do { + evict_walk.evicted = 0; + pass = ttm_lru_walk_for_evict(&evict_walk.walk, bdev, man, + target_bytes - lret); + if (pass < 0) { + lret = pass; + goto out; + } + lret += pass; + } while (lret < target_bytes && evict_walk.evicted); + + /* One more attempt if we hit the low limit during sleeping-lock pass. */ + if (lret < target_bytes && evict_walk.hit_low && !evict_walk.try_low) { + evict_walk.try_low = true; + goto retry; + } + +out: + return lret; +} +EXPORT_SYMBOL(ttm_bo_evict_cgroup); + /** * ttm_bo_pin - Pin the buffer object. * @bo: The buffer object to pin diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c index f83b7d5ec6c6..81c6a674c462 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_util.c +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c @@ -999,7 +999,8 @@ __ttm_bo_lru_cursor_next(struct ttm_bo_lru_cursor *curs) bo = res->bo; if (ttm_lru_walk_trylock(curs, bo)) bo_locked = true; - else if (!arg->ticket || arg->ctx->no_wait_gpu || arg->trylock_only) + else if ((!arg->ticket && !arg->sleeping_lock) || arg->ctx->no_wait_gpu || + arg->trylock_only) continue; if (!ttm_bo_get_unless_zero(bo)) { diff --git a/drivers/gpu/drm/ttm/ttm_resource.c b/drivers/gpu/drm/ttm/ttm_resource.c index 9f36631d48b6..936552f426a7 100644 --- a/drivers/gpu/drm/ttm/ttm_resource.c +++ b/drivers/gpu/drm/ttm/ttm_resource.c @@ -937,3 +937,39 @@ void ttm_resource_manager_create_debugfs(struct ttm_resource_manager *man, #endif } EXPORT_SYMBOL(ttm_resource_manager_create_debugfs); + +static int ttm_resource_manager_dmem_reclaim(struct dmem_cgroup_pool_state *pool, + u64 target_bytes, void *priv) +{ + struct ttm_resource_manager *man = priv; + struct ttm_operation_ctx ctx = { .interruptible = true }; + s64 freed; + + freed = ttm_bo_evict_cgroup(man->bdev, man, pool, target_bytes, &ctx); + if (freed < 0) + return freed; + + return freed >= (s64)target_bytes ? 0 : -ENOSPC; +} + +/** + * ttm_resource_manager_set_dmem_region - Associate a dmem cgroup region with a + * resource manager and register a reclaim + * callback. + * @man: The resource manager. + * @region: The dmem cgroup region to associate, may be NULL or IS_ERR(). + * + * Sets @man->cg and registers ttm_resource_manager_dmem_reclaim() so that + * writing to dmem.max below current usage triggers TTM eviction rather than + * returning -EBUSY to userspace. + */ +void ttm_resource_manager_set_dmem_region(struct ttm_resource_manager *man, + struct dmem_cgroup_region *region) +{ + man->cg = region; + if (!IS_ERR_OR_NULL(region)) + dmem_cgroup_region_set_reclaim(region, + ttm_resource_manager_dmem_reclaim, + man); +} +EXPORT_SYMBOL(ttm_resource_manager_set_dmem_region); diff --git a/include/drm/ttm/ttm_bo.h b/include/drm/ttm/ttm_bo.h index 8310bc3d55f9..32791c4db2a9 100644 --- a/include/drm/ttm/ttm_bo.h +++ b/include/drm/ttm/ttm_bo.h @@ -226,6 +226,11 @@ struct ttm_lru_walk_arg { struct ww_acquire_ctx *ticket; /** @trylock_only: Only use trylock for locking. */ bool trylock_only; + /** + * @sleeping_lock: Use sleeping locks even with %NULL @ticket. + * @trylock_only has precedence over this field. + */ + bool sleeping_lock; }; /** @@ -431,6 +436,11 @@ void ttm_bo_unpin(struct ttm_buffer_object *bo); int ttm_bo_evict_first(struct ttm_device *bdev, struct ttm_resource_manager *man, struct ttm_operation_ctx *ctx); +s64 ttm_bo_evict_cgroup(struct ttm_device *bdev, + struct ttm_resource_manager *man, + struct dmem_cgroup_pool_state *limit_pool, + s64 target_bytes, + struct ttm_operation_ctx *ctx); int ttm_bo_access(struct ttm_buffer_object *bo, unsigned long offset, void *buf, int len, int write); vm_fault_t ttm_bo_vm_reserve(struct ttm_buffer_object *bo, diff --git a/include/drm/ttm/ttm_resource.h b/include/drm/ttm/ttm_resource.h index 33e80f30b8b8..c187e6c8b871 100644 --- a/include/drm/ttm/ttm_resource.h +++ b/include/drm/ttm/ttm_resource.h @@ -39,6 +39,7 @@ struct dentry; struct dmem_cgroup_device; +struct dmem_cgroup_region; struct drm_printer; struct ttm_device; struct ttm_resource_manager; @@ -475,6 +476,9 @@ void ttm_resource_manager_init(struct ttm_resource_manager *man, struct ttm_device *bdev, uint64_t size); +void ttm_resource_manager_set_dmem_region(struct ttm_resource_manager *man, + struct dmem_cgroup_region *region); + int ttm_resource_manager_evict_all(struct ttm_device *bdev, struct ttm_resource_manager *man); -- 2.53.0
