From: Christian K?nig <christian.koe...@amd.com>

Scales much better than scanning the address range linearly.

Signed-off-by: Christian K?nig <christian.koenig at amd.com>
---
 drivers/gpu/drm/Kconfig               |  1 +
 drivers/gpu/drm/radeon/radeon.h       |  7 ++-
 drivers/gpu/drm/radeon/radeon_gem.c   |  4 +-
 drivers/gpu/drm/radeon/radeon_trace.h |  4 +-
 drivers/gpu/drm/radeon/radeon_vm.c    | 84 +++++++++++++++++------------------
 5 files changed, 48 insertions(+), 52 deletions(-)

diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index f512004..9b2eedc 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -114,6 +114,7 @@ config DRM_RADEON
        select POWER_SUPPLY
        select HWMON
        select BACKLIGHT_CLASS_DEVICE
+       select INTERVAL_TREE
        help
          Choose this option if you have an ATI Radeon graphics card.  There
          are both PCI and AGP versions.  You don't need to choose this to
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 0ec7864..aa8721b 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -64,6 +64,7 @@
 #include <linux/wait.h>
 #include <linux/list.h>
 #include <linux/kref.h>
+#include <linux/interval_tree.h>

 #include <ttm/ttm_bo_api.h>
 #include <ttm/ttm_bo_driver.h>
@@ -444,14 +445,12 @@ struct radeon_mman {
 struct radeon_bo_va {
        /* protected by bo being reserved */
        struct list_head                bo_list;
-       uint64_t                        soffset;
-       uint64_t                        eoffset;
        uint32_t                        flags;
        uint64_t                        addr;
        unsigned                        ref_count;

        /* protected by vm mutex */
-       struct list_head                vm_list;
+       struct interval_tree_node       it;
        struct list_head                vm_status;

        /* constant after initialization */
@@ -868,7 +867,7 @@ struct radeon_vm_pt {
 };

 struct radeon_vm {
-       struct list_head                va;
+       struct rb_root                  va;
        unsigned                        id;

        /* BOs moved, but not yet updated in the PT */
diff --git a/drivers/gpu/drm/radeon/radeon_gem.c 
b/drivers/gpu/drm/radeon/radeon_gem.c
index 07a13c9..b2f2ba2 100644
--- a/drivers/gpu/drm/radeon/radeon_gem.c
+++ b/drivers/gpu/drm/radeon/radeon_gem.c
@@ -502,9 +502,9 @@ int radeon_gem_va_ioctl(struct drm_device *dev, void *data,

        switch (args->operation) {
        case RADEON_VA_MAP:
-               if (bo_va->soffset) {
+               if (bo_va->it.start) {
                        args->operation = RADEON_VA_RESULT_VA_EXIST;
-                       args->offset = bo_va->soffset;
+                       args->offset = bo_va->it.start;
                        goto out;
                }
                r = radeon_vm_bo_set_addr(rdev, bo_va, args->offset, 
args->flags);
diff --git a/drivers/gpu/drm/radeon/radeon_trace.h 
b/drivers/gpu/drm/radeon/radeon_trace.h
index f749f2c..2ae4eaa 100644
--- a/drivers/gpu/drm/radeon/radeon_trace.h
+++ b/drivers/gpu/drm/radeon/radeon_trace.h
@@ -72,8 +72,8 @@ TRACE_EVENT(radeon_vm_bo_update,
                             ),

            TP_fast_assign(
-                          __entry->soffset = bo_va->soffset;
-                          __entry->eoffset = bo_va->eoffset;
+                          __entry->soffset = bo_va->it.start;
+                          __entry->eoffset = bo_va->it.last + 1;
                           __entry->flags = bo_va->flags;
                           ),
            TP_printk("soffs=%010llx, eoffs=%010llx, flags=%08x",
diff --git a/drivers/gpu/drm/radeon/radeon_vm.c 
b/drivers/gpu/drm/radeon/radeon_vm.c
index f1030fc..dcbcfbf 100644
--- a/drivers/gpu/drm/radeon/radeon_vm.c
+++ b/drivers/gpu/drm/radeon/radeon_vm.c
@@ -325,17 +325,15 @@ struct radeon_bo_va *radeon_vm_bo_add(struct 
radeon_device *rdev,
        }
        bo_va->vm = vm;
        bo_va->bo = bo;
-       bo_va->soffset = 0;
-       bo_va->eoffset = 0;
+       bo_va->it.start = 0;
+       bo_va->it.last = 0;
        bo_va->flags = 0;
        bo_va->addr = 0;
        bo_va->ref_count = 1;
        INIT_LIST_HEAD(&bo_va->bo_list);
-       INIT_LIST_HEAD(&bo_va->vm_list);
        INIT_LIST_HEAD(&bo_va->vm_status);

        mutex_lock(&vm->mutex);
-       list_add(&bo_va->vm_list, &vm->va);
        list_add_tail(&bo_va->bo_list, &bo->va);
        mutex_unlock(&vm->mutex);

@@ -419,11 +417,9 @@ int radeon_vm_bo_set_addr(struct radeon_device *rdev,
                          uint32_t flags)
 {
        uint64_t size = radeon_bo_size(bo_va->bo);
-       uint64_t eoffset, last_offset = 0;
        struct radeon_vm *vm = bo_va->vm;
-       struct radeon_bo_va *tmp;
-       struct list_head *head;
        unsigned last_pfn, pt_idx;
+       uint64_t eoffset;
        int r;

        if (soffset) {
@@ -445,44 +441,43 @@ int radeon_vm_bo_set_addr(struct radeon_device *rdev,
        }

        mutex_lock(&vm->mutex);
-       head = &vm->va;
-       last_offset = 0;
-       list_for_each_entry(tmp, &vm->va, vm_list) {
-               if (bo_va == tmp) {
-                       /* skip over currently modified bo */
-                       continue;
+       if (bo_va->it.start || bo_va->it.last) {
+               if (bo_va->addr) {
+                       /* add a clone of the bo_va to clear the old address */
+                       struct radeon_bo_va *tmp;
+                       tmp = kzalloc(sizeof(struct radeon_bo_va), GFP_KERNEL);
+                       tmp->it.start = bo_va->it.start;
+                       tmp->it.last = bo_va->it.last;
+                       tmp->vm = vm;
+                       tmp->addr = bo_va->addr;
+                       list_add(&tmp->vm_status, &vm->freed);
                }

-               if (soffset >= last_offset && eoffset <= tmp->soffset) {
-                       /* bo can be added before this one */
-                       break;
-               }
-               if (eoffset > tmp->soffset && soffset < tmp->eoffset) {
+               interval_tree_remove(&bo_va->it, &vm->va);
+               bo_va->it.start = 0;
+               bo_va->it.last = 0;
+       }
+
+       if (soffset || eoffset) {
+               struct interval_tree_node *it;
+               it = interval_tree_iter_first(&vm->va, soffset, eoffset - 1);
+               if (it) {
+                       struct radeon_bo_va *tmp;
+                       tmp = container_of(it, struct radeon_bo_va, it);
                        /* bo and tmp overlap, invalid offset */
-                       dev_err(rdev->dev, "bo %p va 0x%08X conflict with (bo 
%p 0x%08X 0x%08X)\n",
-                               bo_va->bo, (unsigned)bo_va->soffset, tmp->bo,
-                               (unsigned)tmp->soffset, (unsigned)tmp->eoffset);
+                       dev_err(rdev->dev, "bo %p va 0x%010Lx conflict with "
+                               "(bo %p 0x%010lx 0x%010lx)\n", bo_va->bo,
+                               soffset, tmp->bo, tmp->it.start, tmp->it.last);
                        mutex_unlock(&vm->mutex);
                        return -EINVAL;
                }
-               last_offset = tmp->eoffset;
-               head = &tmp->vm_list;
-       }
-
-       if (bo_va->soffset) {
-               /* add a clone of the bo_va to clear the old address */
-               tmp = kzalloc(sizeof(struct radeon_bo_va), GFP_KERNEL);
-               tmp->soffset = bo_va->soffset;
-               tmp->eoffset = bo_va->eoffset;
-               tmp->vm = vm;
-               list_add(&tmp->vm_status, &vm->freed);
+               bo_va->it.start = soffset;
+               bo_va->it.last = eoffset - 1;
+               interval_tree_insert(&bo_va->it, &vm->va);
        }

-       bo_va->soffset = soffset;
-       bo_va->eoffset = eoffset;
        bo_va->flags = flags;
        bo_va->addr = 0;
-       list_move(&bo_va->vm_list, head);

        soffset = (soffset / RADEON_GPU_PAGE_SIZE) >> radeon_vm_block_size;
        eoffset = (eoffset / RADEON_GPU_PAGE_SIZE) >> radeon_vm_block_size;
@@ -842,7 +837,7 @@ int radeon_vm_bo_update(struct radeon_device *rdev,
        uint64_t addr;
        int r;

-       if (!bo_va->soffset) {
+       if (!bo_va->it.start) {
                dev_err(rdev->dev, "bo %p don't has a mapping in vm %p\n",
                        bo_va->bo, vm);
                return -EINVAL;
@@ -872,7 +867,7 @@ int radeon_vm_bo_update(struct radeon_device *rdev,

        trace_radeon_vm_bo_update(bo_va);

-       nptes = (bo_va->eoffset - bo_va->soffset) / RADEON_GPU_PAGE_SIZE;
+       nptes = (bo_va->it.last - bo_va->it.start + 1) / RADEON_GPU_PAGE_SIZE;

        /* padding, etc. */
        ndw = 64;
@@ -897,8 +892,9 @@ int radeon_vm_bo_update(struct radeon_device *rdev,
                return r;
        ib.length_dw = 0;

-       radeon_vm_update_ptes(rdev, vm, &ib, bo_va->soffset, bo_va->eoffset,
-                             addr, radeon_vm_page_flags(bo_va->flags));
+       radeon_vm_update_ptes(rdev, vm, &ib, bo_va->it.start,
+                             bo_va->it.last + 1, addr,
+                             radeon_vm_page_flags(bo_va->flags));

        radeon_semaphore_sync_to(ib.semaphore, vm->fence);
        r = radeon_ib_schedule(rdev, &ib, NULL);
@@ -984,7 +980,7 @@ void radeon_vm_bo_rmv(struct radeon_device *rdev,
        list_del(&bo_va->bo_list);

        mutex_lock(&vm->mutex);
-       list_del(&bo_va->vm_list);
+       interval_tree_remove(&bo_va->it, &vm->va);
        list_del(&bo_va->vm_status);

        if (bo_va->addr) {
@@ -1041,7 +1037,7 @@ int radeon_vm_init(struct radeon_device *rdev, struct 
radeon_vm *vm)
        vm->last_flush = NULL;
        vm->last_id_use = NULL;
        mutex_init(&vm->mutex);
-       INIT_LIST_HEAD(&vm->va);
+       vm->va = RB_ROOT;
        INIT_LIST_HEAD(&vm->invalidated);;
        INIT_LIST_HEAD(&vm->freed);

@@ -1086,11 +1082,11 @@ void radeon_vm_fini(struct radeon_device *rdev, struct 
radeon_vm *vm)
        struct radeon_bo_va *bo_va, *tmp;
        int i, r;

-       if (!list_empty(&vm->va)) {
+       if (!RB_EMPTY_ROOT(&vm->va)) {
                dev_err(rdev->dev, "still active bo inside vm\n");
        }
-       list_for_each_entry_safe(bo_va, tmp, &vm->va, vm_list) {
-               list_del_init(&bo_va->vm_list);
+       rbtree_postorder_for_each_entry_safe(bo_va, tmp, &vm->va, it.rb) {
+               interval_tree_remove(&bo_va->it, &vm->va);
                r = radeon_bo_reserve(bo_va->bo, false);
                if (!r) {
                        list_del_init(&bo_va->bo_list);
-- 
1.9.1

Reply via email to