In other words it should not matter if a BO is evicted, moved or freed.
Whenever a piece of memory becomes available again we keep around a fence which
marks the end of using this piece of memory.
When then somebody asks for new memory we work through the LRU and test if
using a certain piece of memory makes sense or not. If we find that a BO needs
to be evicted for this we return a reference to the BO in question to the upper
level handling.
If we find that we can do the allocation but only with recently freed up memory
we gather the fences and say you can only use the newly allocated memory after
waiting for those.
HEY! Wait a second! Did I just outlined what a potential replacement to TTM
would look like?
yes, that is a good picture. Looks like we could do more work hen. :)
thanks
xinhui
--git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index e795d5b5f8af..ac826a09b4ec 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -405,6 +405,9 @@ static int ttm_bo_individualize_resv(struct
ttm_buffer_object *bo)
if (bo->type != ttm_bo_type_sg) {
spin_lock(&ttm_bo_glob.lru_lock);
+ /* it is very likely to release bo successfully.
+ * if not, just adding it back.
+ */
ttm_bo_del_from_lru(bo);
spin_unlock(&ttm_bo_glob.lru_lock);
bo->base.resv = &bo->base._resv;
@@ -466,18 +469,20 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object
*bo,
if (unlock_resv)
dma_resv_unlock(bo->base.resv);
- spin_unlock(&ttm_bo_glob.lru_lock);
lret = dma_resv_wait_timeout_rcu(resv, true, interruptible,
30 * HZ);
if (lret < 0)
- return lret;
- else if (lret == 0)
- return -EBUSY;
+ goto busy;
+ else if (lret == 0) {
+ ret = -EBUSY;
+ goto busy;
+ }
- spin_lock(&ttm_bo_glob.lru_lock);
if (unlock_resv && !dma_resv_trylock(bo->base.resv)) {
+ /* no race should be on it now */
+ BUG();
/*
* We raced, and lost, someone else holds the
reservation now,
* and is probably busy in ttm_bo_cleanup_memtype_use.
@@ -486,20 +491,18 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object
*bo,
* delayed destruction would succeed, so just return
success
* here.
*/
- spin_unlock(&ttm_bo_glob.lru_lock);
return 0;
}
ret = 0;
}
- if (ret || unlikely(list_empty(&bo->ddestroy))) {
+ if (ret) {
if (unlock_resv)
dma_resv_unlock(bo->base.resv);
- spin_unlock(&ttm_bo_glob.lru_lock);
- return ret;
+ goto busy;
}
- ttm_bo_del_from_lru(bo);
+ spin_lock(&ttm_bo_glob.lru_lock);
list_del_init(&bo->ddestroy);
spin_unlock(&ttm_bo_glob.lru_lock);
ttm_bo_cleanup_memtype_use(bo);
@@ -510,11 +513,20 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object
*bo,
ttm_bo_put(bo);
return 0;
+
+busy:
+ spin_lock(&ttm_bo_glob.lru_lock);
+ ttm_bo_add_mem_to_lru(bo, &bo->mem);
+ spin_unlock(&ttm_bo_glob.lru_lock);
+
+ return ret;
}
/**
* Traverse the delayed list, and call ttm_bo_cleanup_refs on all
* encountered buffers.
+ *
+ * only called bo device release
*/
static bool ttm_bo_delayed_delete(struct ttm_bo_device *bdev, bool remove_all)
{
@@ -533,17 +545,17 @@ static bool ttm_bo_delayed_delete(struct ttm_bo_device
*bdev, bool remove_all)
list_move_tail(&bo->ddestroy, &removed);
if (!ttm_bo_get_unless_zero(bo))
continue;
+ ttm_bo_del_from_lru(bo);
+ spin_unlock(&glob->lru_lock);
if (remove_all || bo->base.resv != &bo->base._resv) {
- spin_unlock(&glob->lru_lock);
dma_resv_lock(bo->base.resv, NULL);
-
- spin_lock(&glob->lru_lock);
ttm_bo_cleanup_refs(bo, false, !remove_all, true);
-
} else if (dma_resv_trylock(bo->base.resv)) {
ttm_bo_cleanup_refs(bo, false, !remove_all, true);
} else {
+ spin_lock(&glob->lru_lock);
+ ttm_bo_add_mem_to_lru(bo, &bo->mem);
spin_unlock(&glob->lru_lock);
}
@@ -559,12 +571,8 @@ static bool ttm_bo_delayed_delete(struct ttm_bo_device *bdev, bool remove_all)
static void ttm_bo_delayed_workqueue(struct work_struct *work)
{
- struct ttm_bo_device *bdev =
- container_of(work, struct ttm_bo_device, wq.work);
-
- if (!ttm_bo_delayed_delete(bdev, false))
- schedule_delayed_work(&bdev->wq,
- ((HZ / 100) < 1) ? 1 : HZ / 100);
+ WARN(true, "do not schedule it");
+ return;
}
static void ttm_bo_release(struct kref *kref)
@@ -595,6 +603,7 @@ static void ttm_bo_release(struct kref *kref)
ttm_mem_io_unlock(man);
}
+ /* if bo is busy, spend a little more time to add bo to lru and ddestroy list*/
if (!dma_resv_test_signaled_rcu(bo->base.resv, true)) {
/* The BO is not idle, resurrect it for delayed destroy */
ttm_bo_flush_all_fences(bo);
@@ -615,8 +624,6 @@ static void ttm_bo_release(struct kref *kref)
list_add_tail(&bo->ddestroy, &bdev->ddestroy);
spin_unlock(&ttm_bo_glob.lru_lock);
- schedule_delayed_work(&bdev->wq,
- ((HZ / 100) < 1) ? 1 : HZ / 100);
return;
}
@@ -842,6 +849,9 @@ static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
return ret;
}
+ ttm_bo_del_from_lru(bo);
+ spin_unlock(&ttm_bo_glob.lru_lock);
+
if (bo->deleted) {
ret = ttm_bo_cleanup_refs(bo, ctx->interruptible,
ctx->no_wait_gpu, locked);
@@ -849,8 +859,6 @@ static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
return ret;
}
- spin_unlock(&ttm_bo_glob.lru_lock);
-
ret = ttm_bo_evict(bo, ctx);
if (locked)
ttm_bo_unreserve(bo);
@@ -1809,14 +1817,15 @@ int ttm_bo_swapout(struct ttm_bo_global *glob, struct
ttm_operation_ctx *ctx)
return ret;
}
+ ttm_bo_del_from_lru(bo);
+ spin_unlock(&glob->lru_lock);
+
if (bo->deleted) {
ret = ttm_bo_cleanup_refs(bo, false, false, locked);
ttm_bo_put(bo);
return ret;
}
- ttm_bo_del_from_lru(bo);
- spin_unlock(&glob->lru_lock);
/**
* Move to system cached