Allocating small bos from one end and large ones from the other helps
improve the quality of fragmentation.

I have measured a suitable threshold to reduce eviction by up to 20%,
and to cause no harm in normal cases that fit VRAM fully (PTS gaming suite).
In some cases, even the VRAM-fitting cases improved slightly (openarena, urban 
terror).

This depends on "drm: Optionally create mm blocks from top-to-bottom" by
Chris Wilson, and so is just RFC for now.
---
 drivers/gpu/drm/ttm/ttm_bo.c         |  4 +++-
 drivers/gpu/drm/ttm/ttm_bo_manager.c | 12 +++++++++---
 include/drm/ttm/ttm_bo_driver.h      |  5 ++++-
 3 files changed, 16 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index a066513..7f30c2d 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -1450,7 +1450,8 @@ int ttm_bo_device_init(struct ttm_bo_device *bdev,
                       struct ttm_bo_global *glob,
                       struct ttm_bo_driver *driver,
                       uint64_t file_page_offset,
-                      bool need_dma32)
+                      bool need_dma32,
+                      uint32_t alloc_threshold)
 {
        int ret = -EINVAL;

@@ -1473,6 +1474,7 @@ int ttm_bo_device_init(struct ttm_bo_device *bdev,
        bdev->dev_mapping = NULL;
        bdev->glob = glob;
        bdev->need_dma32 = need_dma32;
+       bdev->alloc_threshold = alloc_threshold;
        bdev->val_seq = 0;
        spin_lock_init(&bdev->fence_lock);
        mutex_lock(&glob->device_list_mutex);
diff --git a/drivers/gpu/drm/ttm/ttm_bo_manager.c 
b/drivers/gpu/drm/ttm/ttm_bo_manager.c
index c58eba33..bd1cbf1 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_manager.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_manager.c
@@ -55,6 +55,7 @@ static int ttm_bo_man_get_node(struct ttm_mem_type_manager 
*man,
        struct ttm_range_manager *rman = (struct ttm_range_manager *) man->priv;
        struct drm_mm *mm = &rman->mm;
        struct drm_mm_node *node = NULL;
+       enum drm_mm_allocator_flags aflags = DRM_MM_CREATE_DEFAULT;
        unsigned long lpfn;
        int ret;

@@ -66,11 +67,16 @@ static int ttm_bo_man_get_node(struct ttm_mem_type_manager 
*man,
        if (!node)
                return -ENOMEM;

+       if (man->bdev->alloc_threshold &&
+               man->bdev->alloc_threshold < (mem->num_pages * PAGE_SIZE))
+               aflags = DRM_MM_CREATE_TOP;
+
        spin_lock(&rman->lock);
-       ret = drm_mm_insert_node_in_range(mm, node, mem->num_pages,
-                                         mem->page_alignment,
+       ret = drm_mm_insert_node_in_range_generic(mm, node, mem->num_pages,
+                                         mem->page_alignment, 0,
                                          placement->fpfn, lpfn,
-                                         DRM_MM_SEARCH_BEST);
+                                         DRM_MM_SEARCH_BEST,
+                                         aflags);
        spin_unlock(&rman->lock);

        if (unlikely(ret)) {
diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
index 32d34eb..831320e 100644
--- a/include/drm/ttm/ttm_bo_driver.h
+++ b/include/drm/ttm/ttm_bo_driver.h
@@ -565,6 +565,7 @@ struct ttm_bo_device {
        struct delayed_work wq;

        bool need_dma32;
+       uint32_t alloc_threshold;
 };

 /**
@@ -758,7 +759,9 @@ extern int ttm_bo_device_release(struct ttm_bo_device 
*bdev);
 extern int ttm_bo_device_init(struct ttm_bo_device *bdev,
                              struct ttm_bo_global *glob,
                              struct ttm_bo_driver *driver,
-                             uint64_t file_page_offset, bool need_dma32);
+                             uint64_t file_page_offset,
+                             bool need_dma32,
+                             uint32_t alloc_threshold);

 /**
  * ttm_bo_unmap_virtual
-- 
1.8.3.1

Reply via email to