Wrap the active tracking for a GPU references in a slabcache for faster
allocations, and keep track of inflight nodes so we can reap the
stale entries upon idling (thereby trimming our memory usage).

v2: Automatically discard the trees everytime the tracker idles; they
should be rarely used and fast to allocate as required.

Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_active.c            | 48 +++++++++++++++----
 drivers/gpu/drm/i915/i915_active.h            | 19 ++++++--
 drivers/gpu/drm/i915/i915_active_types.h      | 10 +++-
 drivers/gpu/drm/i915/i915_drv.h               |  2 +
 drivers/gpu/drm/i915/i915_gem.c               | 15 ++++--
 drivers/gpu/drm/i915/i915_gem_gtt.c           |  2 +-
 drivers/gpu/drm/i915/i915_vma.c               |  3 +-
 drivers/gpu/drm/i915/selftests/i915_active.c  |  3 +-
 .../gpu/drm/i915/selftests/mock_gem_device.c  |  6 +++
 9 files changed, 86 insertions(+), 22 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_active.c 
b/drivers/gpu/drm/i915/i915_active.c
index e0182e19cb8b..fe475015221d 100644
--- a/drivers/gpu/drm/i915/i915_active.c
+++ b/drivers/gpu/drm/i915/i915_active.c
@@ -7,7 +7,9 @@
 #include "i915_drv.h"
 #include "i915_active.h"
 
-#define BKL(ref) (&(ref)->i915->drm.struct_mutex)
+#define i915_from_gt(x) \
+       container_of(x, struct drm_i915_private, gt.active_refs)
+#define BKL(ref) (&i915_from_gt((ref)->gt)->drm.struct_mutex)
 
 struct active_node {
        struct i915_gem_active base;
@@ -20,8 +22,13 @@ static void
 __active_retire(struct i915_active *ref)
 {
        GEM_BUG_ON(!ref->count);
-       if (!--ref->count)
-               ref->retire(ref);
+       if (--ref->count)
+               return;
+
+       /* return the unused nodes to our slabcache*/
+       i915_active_fini(ref);
+
+       ref->retire(ref);
 }
 
 static void
@@ -79,11 +86,11 @@ active_instance(struct i915_active *ref, u64 idx)
                        p = &parent->rb_left;
        }
 
-       node = kmalloc(sizeof(*node), GFP_KERNEL);
+       node = kmem_cache_alloc(ref->gt->slab_cache, GFP_KERNEL);
 
        /* kmalloc may retire the ref->last (thanks shrinker)! */
        if (unlikely(!i915_gem_active_raw(&ref->last, BKL(ref)))) {
-               kfree(node);
+               kmem_cache_free(ref->gt->slab_cache, node);
                goto out;
        }
 
@@ -119,11 +126,11 @@ active_instance(struct i915_active *ref, u64 idx)
        return &ref->last;
 }
 
-void i915_active_init(struct drm_i915_private *i915,
+void i915_active_init(struct i915_gt_active *gt,
                      struct i915_active *ref,
                      void (*retire)(struct i915_active *ref))
 {
-       ref->i915 = i915;
+       ref->gt = gt;
        ref->retire = retire;
        ref->tree = RB_ROOT;
        init_request_active(&ref->last, last_retire);
@@ -161,6 +168,7 @@ void i915_active_release(struct i915_active *ref)
 
 int i915_active_wait(struct i915_active *ref)
 {
+       struct kmem_cache *slab = ref->gt->slab_cache;
        struct active_node *it, *n;
        int ret;
 
@@ -168,13 +176,16 @@ int i915_active_wait(struct i915_active *ref)
        if (ret)
                return ret;
 
+       if (RB_EMPTY_ROOT(&ref->tree))
+               return 0;
+
        rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) {
                ret = i915_gem_active_retire(&it->base, BKL(ref));
                if (ret)
                        return ret;
 
                GEM_BUG_ON(i915_gem_active_isset(&it->base));
-               kfree(it);
+               kmem_cache_free(slab, it);
        }
        ref->tree = RB_ROOT;
 
@@ -210,17 +221,36 @@ int i915_request_await_active(struct i915_request *rq, 
struct i915_active *ref)
 
 void i915_active_fini(struct i915_active *ref)
 {
+       struct kmem_cache *slab = ref->gt->slab_cache;
        struct active_node *it, *n;
 
+       lockdep_assert_held(BKL(ref));
        GEM_BUG_ON(i915_gem_active_isset(&ref->last));
 
+       if (RB_EMPTY_ROOT(&ref->tree))
+               return;
+
        rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) {
                GEM_BUG_ON(i915_gem_active_isset(&it->base));
-               kfree(it);
+               kmem_cache_free(slab, it);
        }
        ref->tree = RB_ROOT;
 }
 
+int i915_gt_active_init(struct i915_gt_active *gt)
+{
+       gt->slab_cache = KMEM_CACHE(active_node, SLAB_HWCACHE_ALIGN);
+       if (!gt->slab_cache)
+               return -ENOMEM;
+
+       return 0;
+}
+
+void i915_gt_active_fini(struct i915_gt_active *gt)
+{
+       kmem_cache_destroy(gt->slab_cache);
+}
+
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
 #include "selftests/i915_active.c"
 #endif
diff --git a/drivers/gpu/drm/i915/i915_active.h 
b/drivers/gpu/drm/i915/i915_active.h
index c0729a046f98..13d644d7e689 100644
--- a/drivers/gpu/drm/i915/i915_active.h
+++ b/drivers/gpu/drm/i915/i915_active.h
@@ -9,10 +9,6 @@
 
 #include "i915_active_types.h"
 
-#include <linux/rbtree.h>
-
-#include "i915_request.h"
-
 /*
  * GPU activity tracking
  *
@@ -39,7 +35,7 @@
  * synchronisation.
  */
 
-void i915_active_init(struct drm_i915_private *i915,
+void i915_active_init(struct i915_gt_active *gt,
                      struct i915_active *ref,
                      void (*retire)(struct i915_active *ref));
 
@@ -63,4 +59,17 @@ i915_active_is_idle(const struct i915_active *ref)
 
 void i915_active_fini(struct i915_active *ref);
 
+/*
+ * Active refs memory management
+ *
+ * To be more economical with memory, we reap all the i915_active trees as
+ * they idle (when we know the active requests are inactive) and allocate the
+ * nodes from a local slab cache to hopefully reduce the fragmentation.
+ */
+
+int i915_gt_active_init(struct i915_gt_active *gt);
+void i915_gt_active_fini(struct i915_gt_active *gt);
+
+#define i915_gt_active(i915) (&(i915)->gt.active_refs)
+
 #endif /* _I915_ACTIVE_H_ */
diff --git a/drivers/gpu/drm/i915/i915_active_types.h 
b/drivers/gpu/drm/i915/i915_active_types.h
index 411e502ed8dd..817cc8c530f1 100644
--- a/drivers/gpu/drm/i915/i915_active_types.h
+++ b/drivers/gpu/drm/i915/i915_active_types.h
@@ -7,14 +7,16 @@
 #ifndef _I915_ACTIVE_TYPES_H_
 #define _I915_ACTIVE_TYPES_H_
 
+#include <linux/list.h>
 #include <linux/rbtree.h>
 
 #include "i915_request.h"
 
-struct drm_i915_private;
+struct i915_gt_active;
+struct kmem_cache;
 
 struct i915_active {
-       struct drm_i915_private *i915;
+       struct i915_gt_active *gt;
 
        struct rb_root tree;
        struct i915_gem_active last;
@@ -23,4 +25,8 @@ struct i915_active {
        void (*retire)(struct i915_active *ref);
 };
 
+struct i915_gt_active {
+       struct kmem_cache *slab_cache;
+};
+
 #endif /* _I915_ACTIVE_TYPES_H_ */
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 7946c271ab24..135be159091b 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1985,6 +1985,8 @@ struct drm_i915_private {
                        struct list_head hwsp_free_list;
                } timelines;
 
+               struct i915_gt_active active_refs;
+
                struct list_head active_rings;
                struct list_head closed_vma;
                u32 active_requests;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index caccff87a2a1..157079cec01d 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4998,15 +4998,19 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
                dev_priv->gt.cleanup_engine = intel_engine_cleanup;
        }
 
+       ret = i915_gt_active_init(i915_gt_active(dev_priv));
+       if (ret)
+               return ret;
+
        i915_timelines_init(dev_priv);
 
        ret = i915_gem_init_userptr(dev_priv);
        if (ret)
-               return ret;
+               goto err_timelines;
 
        ret = intel_uc_init_misc(dev_priv);
        if (ret)
-               return ret;
+               goto err_userptr;
 
        ret = intel_wopcm_init(&dev_priv->wopcm);
        if (ret)
@@ -5122,9 +5126,13 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
 err_uc_misc:
        intel_uc_fini_misc(dev_priv);
 
-       if (ret != -EIO) {
+err_userptr:
+       if (ret != -EIO)
                i915_gem_cleanup_userptr(dev_priv);
+err_timelines:
+       if (ret != -EIO) {
                i915_timelines_fini(dev_priv);
+               i915_gt_active_fini(i915_gt_active(dev_priv));
        }
 
        if (ret == -EIO) {
@@ -5177,6 +5185,7 @@ void i915_gem_fini(struct drm_i915_private *dev_priv)
        intel_uc_fini_misc(dev_priv);
        i915_gem_cleanup_userptr(dev_priv);
        i915_timelines_fini(dev_priv);
+       i915_gt_active_fini(i915_gt_active(dev_priv));
 
        i915_gem_drain_freed_objects(dev_priv);
 
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c 
b/drivers/gpu/drm/i915/i915_gem_gtt.c
index e625659c03a2..d8819de0d6ee 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -1917,7 +1917,7 @@ static struct i915_vma *pd_vma_create(struct 
gen6_hw_ppgtt *ppgtt, int size)
        if (!vma)
                return ERR_PTR(-ENOMEM);
 
-       i915_active_init(i915, &vma->active, NULL);
+       i915_active_init(i915_gt_active(i915), &vma->active, NULL);
        init_request_active(&vma->last_fence, NULL);
 
        vma->vm = &ggtt->vm;
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index d4772061e642..2456bfb4877b 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -119,7 +119,8 @@ vma_create(struct drm_i915_gem_object *obj,
        if (vma == NULL)
                return ERR_PTR(-ENOMEM);
 
-       i915_active_init(vm->i915, &vma->active, __i915_vma_retire);
+       i915_active_init(i915_gt_active(vm->i915),
+                        &vma->active, __i915_vma_retire);
        init_request_active(&vma->last_fence, NULL);
 
        vma->vm = vm;
diff --git a/drivers/gpu/drm/i915/selftests/i915_active.c 
b/drivers/gpu/drm/i915/selftests/i915_active.c
index 7c5c3068565b..0e923476920e 100644
--- a/drivers/gpu/drm/i915/selftests/i915_active.c
+++ b/drivers/gpu/drm/i915/selftests/i915_active.c
@@ -30,7 +30,8 @@ static int __live_active_setup(struct drm_i915_private *i915,
        unsigned int count = 0;
        int err = 0;
 
-       i915_active_init(i915, &active->base, __live_active_retire);
+       i915_active_init(i915_gt_active(i915),
+                        &active->base, __live_active_retire);
        active->retired = false;
 
        if (!i915_active_acquire(&active->base)) {
diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c 
b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
index 074a0d9cbf26..5b88f74c1677 100644
--- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c
+++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
@@ -69,6 +69,7 @@ static void mock_device_release(struct drm_device *dev)
        mutex_unlock(&i915->drm.struct_mutex);
 
        i915_timelines_fini(i915);
+       i915_gt_active_fini(i915_gt_active(i915));
 
        drain_workqueue(i915->wq);
        i915_gem_drain_freed_objects(i915);
@@ -228,6 +229,9 @@ struct drm_i915_private *mock_gem_device(void)
        if (!i915->priorities)
                goto err_dependencies;
 
+       if (i915_gt_active_init(i915_gt_active(i915)))
+               goto err_priorities;
+
        i915_timelines_init(i915);
 
        INIT_LIST_HEAD(&i915->gt.active_rings);
@@ -257,6 +261,8 @@ struct drm_i915_private *mock_gem_device(void)
 err_unlock:
        mutex_unlock(&i915->drm.struct_mutex);
        i915_timelines_fini(i915);
+       i915_gt_active_fini(i915_gt_active(i915));
+err_priorities:
        kmem_cache_destroy(i915->priorities);
 err_dependencies:
        kmem_cache_destroy(i915->dependencies);
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to