From: Dave Airlie <airl...@redhat.com> This is an initial port of the TTM pools for write combined and uncached pages to use the list_lru.
This makes the pool's more NUMA aware and avoids needing separate NUMA pools (later commit enables this). Cc: Christian Koenig <christian.koe...@amd.com> Cc: Johannes Weiner <han...@cmpxchg.org> Cc: Dave Chinner <da...@fromorbit.com> Signed-off-by: Dave Airlie <airl...@redhat.com> --- v2: drop the pt->lock, lru list has it's own lock which is sufficent. rearrange list isolates to fix bad locking orders. --- drivers/gpu/drm/ttm/tests/ttm_device_test.c | 2 +- drivers/gpu/drm/ttm/tests/ttm_pool_test.c | 32 ++++---- drivers/gpu/drm/ttm/ttm_pool.c | 91 ++++++++++++++------- include/drm/ttm/ttm_pool.h | 6 +- 4 files changed, 80 insertions(+), 51 deletions(-) diff --git a/drivers/gpu/drm/ttm/tests/ttm_device_test.c b/drivers/gpu/drm/ttm/tests/ttm_device_test.c index 1621903818e5..1f207fd222bc 100644 --- a/drivers/gpu/drm/ttm/tests/ttm_device_test.c +++ b/drivers/gpu/drm/ttm/tests/ttm_device_test.c @@ -183,7 +183,7 @@ static void ttm_device_init_pools(struct kunit *test) if (params->use_dma_alloc) KUNIT_ASSERT_FALSE(test, - list_empty(&pt.pages)); + !list_lru_count(&pt.pages)); } } } diff --git a/drivers/gpu/drm/ttm/tests/ttm_pool_test.c b/drivers/gpu/drm/ttm/tests/ttm_pool_test.c index 8ade53371f72..39234a3e98c4 100644 --- a/drivers/gpu/drm/ttm/tests/ttm_pool_test.c +++ b/drivers/gpu/drm/ttm/tests/ttm_pool_test.c @@ -248,7 +248,7 @@ static void ttm_pool_alloc_order_caching_match(struct kunit *test) pool = ttm_pool_pre_populated(test, size, caching); pt = &pool->caching[caching].orders[order]; - KUNIT_ASSERT_FALSE(test, list_empty(&pt->pages)); + KUNIT_ASSERT_FALSE(test, !list_lru_count(&pt->pages)); tt = ttm_tt_kunit_init(test, 0, caching, size); KUNIT_ASSERT_NOT_NULL(test, tt); @@ -256,7 +256,7 @@ static void ttm_pool_alloc_order_caching_match(struct kunit *test) err = ttm_pool_alloc(pool, tt, &simple_ctx); KUNIT_ASSERT_EQ(test, err, 0); - KUNIT_ASSERT_TRUE(test, list_empty(&pt->pages)); + KUNIT_ASSERT_TRUE(test, !list_lru_count(&pt->pages)); ttm_pool_free(pool, tt); ttm_tt_fini(tt); @@ -282,8 +282,8 @@ static void ttm_pool_alloc_caching_mismatch(struct kunit *test) tt = ttm_tt_kunit_init(test, 0, tt_caching, size); KUNIT_ASSERT_NOT_NULL(test, tt); - KUNIT_ASSERT_FALSE(test, list_empty(&pt_pool->pages)); - KUNIT_ASSERT_TRUE(test, list_empty(&pt_tt->pages)); + KUNIT_ASSERT_FALSE(test, !list_lru_count(&pt_pool->pages)); + KUNIT_ASSERT_TRUE(test, !list_lru_count(&pt_tt->pages)); err = ttm_pool_alloc(pool, tt, &simple_ctx); KUNIT_ASSERT_EQ(test, err, 0); @@ -291,8 +291,8 @@ static void ttm_pool_alloc_caching_mismatch(struct kunit *test) ttm_pool_free(pool, tt); ttm_tt_fini(tt); - KUNIT_ASSERT_FALSE(test, list_empty(&pt_pool->pages)); - KUNIT_ASSERT_FALSE(test, list_empty(&pt_tt->pages)); + KUNIT_ASSERT_FALSE(test, !list_lru_count(&pt_pool->pages)); + KUNIT_ASSERT_FALSE(test, !list_lru_count(&pt_tt->pages)); ttm_pool_fini(pool); } @@ -316,8 +316,8 @@ static void ttm_pool_alloc_order_mismatch(struct kunit *test) tt = ttm_tt_kunit_init(test, 0, caching, snd_size); KUNIT_ASSERT_NOT_NULL(test, tt); - KUNIT_ASSERT_FALSE(test, list_empty(&pt_pool->pages)); - KUNIT_ASSERT_TRUE(test, list_empty(&pt_tt->pages)); + KUNIT_ASSERT_FALSE(test, !list_lru_count(&pt_pool->pages)); + KUNIT_ASSERT_TRUE(test, !list_lru_count(&pt_tt->pages)); err = ttm_pool_alloc(pool, tt, &simple_ctx); KUNIT_ASSERT_EQ(test, err, 0); @@ -325,8 +325,8 @@ static void ttm_pool_alloc_order_mismatch(struct kunit *test) ttm_pool_free(pool, tt); ttm_tt_fini(tt); - KUNIT_ASSERT_FALSE(test, list_empty(&pt_pool->pages)); - KUNIT_ASSERT_FALSE(test, list_empty(&pt_tt->pages)); + KUNIT_ASSERT_FALSE(test, !list_lru_count(&pt_pool->pages)); + KUNIT_ASSERT_FALSE(test, !list_lru_count(&pt_tt->pages)); ttm_pool_fini(pool); } @@ -352,12 +352,12 @@ static void ttm_pool_free_dma_alloc(struct kunit *test) ttm_pool_alloc(pool, tt, &simple_ctx); pt = &pool->caching[caching].orders[order]; - KUNIT_ASSERT_TRUE(test, list_empty(&pt->pages)); + KUNIT_ASSERT_TRUE(test, !list_lru_count(&pt->pages)); ttm_pool_free(pool, tt); ttm_tt_fini(tt); - KUNIT_ASSERT_FALSE(test, list_empty(&pt->pages)); + KUNIT_ASSERT_FALSE(test, !list_lru_count(&pt->pages)); ttm_pool_fini(pool); } @@ -383,12 +383,12 @@ static void ttm_pool_free_no_dma_alloc(struct kunit *test) ttm_pool_alloc(pool, tt, &simple_ctx); pt = &pool->caching[caching].orders[order]; - KUNIT_ASSERT_TRUE(test, list_is_singular(&pt->pages)); + KUNIT_ASSERT_TRUE(test, list_lru_count(&pt->pages) == 1); ttm_pool_free(pool, tt); ttm_tt_fini(tt); - KUNIT_ASSERT_TRUE(test, list_is_singular(&pt->pages)); + KUNIT_ASSERT_TRUE(test, list_lru_count(&pt->pages) == 1); ttm_pool_fini(pool); } @@ -404,11 +404,11 @@ static void ttm_pool_fini_basic(struct kunit *test) pool = ttm_pool_pre_populated(test, size, caching); pt = &pool->caching[caching].orders[order]; - KUNIT_ASSERT_FALSE(test, list_empty(&pt->pages)); + KUNIT_ASSERT_FALSE(test, !list_lru_count(&pt->pages)); ttm_pool_fini(pool); - KUNIT_ASSERT_TRUE(test, list_empty(&pt->pages)); + KUNIT_ASSERT_TRUE(test, !list_lru_count(&pt->pages)); } static struct kunit_case ttm_pool_test_cases[] = { diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c index ee2344089d47..df6b81a43893 100644 --- a/drivers/gpu/drm/ttm/ttm_pool.c +++ b/drivers/gpu/drm/ttm/ttm_pool.c @@ -131,6 +131,15 @@ static struct list_head shrinker_list; static struct shrinker *mm_shrinker; static DECLARE_RWSEM(pool_shrink_rwsem); +static int ttm_pool_nid(struct ttm_pool *pool) { + int nid = NUMA_NO_NODE; + if (pool) + nid = pool->nid; + if (nid == NUMA_NO_NODE) + nid = numa_node_id(); + return nid; +} + /* Allocate pages of size 1 << order with the given gfp_flags */ static struct page *ttm_pool_alloc_page(struct ttm_pool *pool, gfp_t gfp_flags, unsigned int order) @@ -290,32 +299,41 @@ static void ttm_pool_type_give(struct ttm_pool_type *pt, struct page *p) clear_page(page_address(p + i)); } - spin_lock(&pt->lock); - list_add(&p->lru, &pt->pages); - spin_unlock(&pt->lock); + INIT_LIST_HEAD(&p->lru); + rcu_read_lock(); + list_lru_add(&pt->pages, &p->lru, nid, NULL); + rcu_read_unlock(); atomic_long_add(1 << pt->order, &allocated_pages); mod_node_page_state(NODE_DATA(nid), NR_GPU_ACTIVE, -num_pages); mod_node_page_state(NODE_DATA(nid), NR_GPU_RECLAIM, num_pages); } +static enum lru_status take_one_from_lru(struct list_head *item, + struct list_lru_one *list, + void *cb_arg) +{ + struct page **out_page = cb_arg; + struct page *p = container_of(item, struct page, lru); + list_lru_isolate(list, item); + + *out_page = p; + return LRU_REMOVED; +} + /* Take pages from a specific pool_type, return NULL when nothing available */ -static struct page *ttm_pool_type_take(struct ttm_pool_type *pt) +static struct page *ttm_pool_type_take(struct ttm_pool_type *pt, int nid) { - struct page *p; - int nid; + int ret; + struct page *p = NULL; + unsigned long nr_to_walk = 1; - spin_lock(&pt->lock); - p = list_first_entry_or_null(&pt->pages, typeof(*p), lru); - if (p) { - nid = page_to_nid(p); + ret = list_lru_walk_node(&pt->pages, nid, take_one_from_lru, (void *)&p, &nr_to_walk); + if (ret == 1 && p) { atomic_long_sub(1 << pt->order, &allocated_pages); mod_node_page_state(NODE_DATA(nid), NR_GPU_ACTIVE, (1 << pt->order)); mod_node_page_state(NODE_DATA(nid), NR_GPU_RECLAIM, -(1 << pt->order)); - list_del(&p->lru); } - spin_unlock(&pt->lock); - return p; } @@ -326,25 +344,47 @@ static void ttm_pool_type_init(struct ttm_pool_type *pt, struct ttm_pool *pool, pt->pool = pool; pt->caching = caching; pt->order = order; - spin_lock_init(&pt->lock); - INIT_LIST_HEAD(&pt->pages); + list_lru_init(&pt->pages); spin_lock(&shrinker_lock); list_add_tail(&pt->shrinker_list, &shrinker_list); spin_unlock(&shrinker_lock); } +static enum lru_status pool_move_to_dispose_list(struct list_head *item, + struct list_lru_one *list, + void *cb_arg) +{ + struct list_head *dispose = cb_arg; + + list_lru_isolate_move(list, item, dispose); + + return LRU_REMOVED; +} + +static void ttm_pool_dispose_list(struct ttm_pool_type *pt, + struct list_head *dispose) +{ + while (!list_empty(dispose)) { + struct page *p; + p = list_first_entry(dispose, struct page, lru); + list_del_init(&p->lru); + atomic_long_sub(1 << pt->order, &allocated_pages); + ttm_pool_free_page(pt->pool, pt->caching, pt->order, p, true); + } +} + /* Remove a pool_type from the global shrinker list and free all pages */ static void ttm_pool_type_fini(struct ttm_pool_type *pt) { - struct page *p; + LIST_HEAD(dispose); spin_lock(&shrinker_lock); list_del(&pt->shrinker_list); spin_unlock(&shrinker_lock); - while ((p = ttm_pool_type_take(pt))) - ttm_pool_free_page(pt->pool, pt->caching, pt->order, p, true); + list_lru_walk(&pt->pages, pool_move_to_dispose_list, &dispose, LONG_MAX); + ttm_pool_dispose_list(pt, &dispose); } /* Return the pool_type to use for the given caching and order */ @@ -394,7 +434,7 @@ static unsigned int ttm_pool_shrink(void) list_move_tail(&pt->shrinker_list, &shrinker_list); spin_unlock(&shrinker_lock); - p = ttm_pool_type_take(pt); + p = ttm_pool_type_take(pt, ttm_pool_nid(pt->pool)); if (p) { ttm_pool_free_page(pt->pool, pt->caching, pt->order, p, true); num_pages = 1 << pt->order; @@ -748,7 +788,7 @@ static int __ttm_pool_alloc(struct ttm_pool *pool, struct ttm_tt *tt, p = NULL; pt = ttm_pool_select_type(pool, page_caching, order); if (pt && allow_pools) - p = ttm_pool_type_take(pt); + p = ttm_pool_type_take(pt, ttm_pool_nid(pool)); /* * If that fails or previously failed, allocate from system. * Note that this also disallows additional pool allocations using @@ -1177,16 +1217,7 @@ static unsigned long ttm_pool_shrinker_count(struct shrinker *shrink, /* Count the number of pages available in a pool_type */ static unsigned int ttm_pool_type_count(struct ttm_pool_type *pt) { - unsigned int count = 0; - struct page *p; - - spin_lock(&pt->lock); - /* Only used for debugfs, the overhead doesn't matter */ - list_for_each_entry(p, &pt->pages, lru) - ++count; - spin_unlock(&pt->lock); - - return count; + return list_lru_count(&pt->pages); } /* Print a nice header for the order */ diff --git a/include/drm/ttm/ttm_pool.h b/include/drm/ttm/ttm_pool.h index 54cd34a6e4c0..df56527c4853 100644 --- a/include/drm/ttm/ttm_pool.h +++ b/include/drm/ttm/ttm_pool.h @@ -45,8 +45,7 @@ struct ttm_tt; * @order: the allocation order our pages have * @caching: the caching type our pages have * @shrinker_list: our place on the global shrinker list - * @lock: protection of the page list - * @pages: the list of pages in the pool + * @pages: the lru_list of pages in the pool */ struct ttm_pool_type { struct ttm_pool *pool; @@ -55,8 +54,7 @@ struct ttm_pool_type { struct list_head shrinker_list; - spinlock_t lock; - struct list_head pages; + struct list_lru pages; }; /** -- 2.49.0