Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk>
---
 tests/Makefile.am         |   1 +
 tests/Makefile.sources    |   1 +
 tests/gem_exec_balancer.c | 446 ++++++++++++++++++++++++++++++++++++++
 tests/meson.build         |   1 +
 4 files changed, 449 insertions(+)
 create mode 100644 tests/gem_exec_balancer.c

diff --git a/tests/Makefile.am b/tests/Makefile.am
index 408c8b423..a255f1c1a 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -107,6 +107,7 @@ gem_close_race_LDADD = $(LDADD) -lpthread
 gem_ctx_freq_LDADD = $(LDADD) $(top_builddir)/lib/libigt_perf.la
 gem_ctx_thrash_CFLAGS = $(AM_CFLAGS) $(THREAD_CFLAGS)
 gem_ctx_thrash_LDADD = $(LDADD) -lpthread
+gem_exec_balancer_LDADD = $(LDADD) $(top_builddir)/lib/libigt_perf.la
 gem_exec_parallel_CFLAGS = $(AM_CFLAGS) $(THREAD_CFLAGS)
 gem_exec_parallel_LDADD = $(LDADD) -lpthread
 gem_fence_thrash_CFLAGS = $(AM_CFLAGS) $(THREAD_CFLAGS)
diff --git a/tests/Makefile.sources b/tests/Makefile.sources
index 179b709a2..49547a501 100644
--- a/tests/Makefile.sources
+++ b/tests/Makefile.sources
@@ -73,6 +73,7 @@ TESTS_progs = \
        gem_exec_async \
        gem_exec_await \
        gem_exec_bad_domains \
+       gem_exec_balancer \
        gem_exec_basic \
        gem_exec_big \
        gem_exec_blt \
diff --git a/tests/gem_exec_balancer.c b/tests/gem_exec_balancer.c
new file mode 100644
index 000000000..8a76e1008
--- /dev/null
+++ b/tests/gem_exec_balancer.c
@@ -0,0 +1,446 @@
+/*
+ * Copyright © 2018 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <sched.h>
+
+#include "igt.h"
+#include "igt_perf.h"
+#include "i915/gem_ring.h"
+#include "sw_sync.h"
+
+IGT_TEST_DESCRIPTION("Exercise in-kernel load-balancing");
+
+#define I915_CONTEXT_PARAM_ENGINES 0x9
+
+struct class_instance {
+       uint32_t class;
+       uint32_t instance;
+};
+
+static bool has_class_instance(int i915, uint32_t class, uint32_t instance)
+{
+       int fd;
+
+       fd = perf_i915_open(I915_PMU_ENGINE_BUSY(class, instance));
+       if (fd != -1) {
+               close(fd);
+               return true;
+       }
+
+       return false;
+}
+
+static struct class_instance *
+list_engines(int i915, uint32_t class_mask, unsigned int *out)
+{
+       unsigned int count = 0, size = 64;
+       struct class_instance *engines;
+
+       engines = malloc(size * sizeof(*engines));
+       if (!engines) {
+               *out = 0;
+               return NULL;
+       }
+
+       for (enum drm_i915_gem_engine_class class = I915_ENGINE_CLASS_RENDER;
+            class_mask;
+            class++, class_mask >>= 1) {
+               if (!(class_mask & 1))
+                       continue;
+
+               for (unsigned int instance = 0;
+                    has_class_instance(i915, class, instance);
+                    instance++) {
+                       if (count == size) {
+                               struct class_instance *e;
+
+                               size *= 2;
+                               e = realloc(engines, size*sizeof(*engines));
+                               if (!e) {
+                                       *out = count;
+                                       return engines;
+                               }
+
+                               engines = e;
+                       }
+
+                       engines[count++] = (struct class_instance){
+                               .class = class,
+                               .instance = instance,
+                       };
+               }
+       }
+
+       if (!count) {
+               free(engines);
+               engines = NULL;
+       }
+
+       *out = count;
+       return engines;
+}
+
+static int __set_load_balancer(int i915, uint32_t ctx,
+                              const struct class_instance *ci,
+                              unsigned int count)
+{
+       struct balancer {
+               uint64_t next_extension;
+               uint64_t name;
+               uint64_t flags;
+               uint64_t mbz[3];
+       } balancer = {
+               .name = 1, //I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE
+       };
+       struct engines {
+               uint64_t extension;
+               uint64_t class_instance[count];
+       } engines;
+       struct drm_i915_gem_context_param p = {
+               .ctx_id = ctx,
+               .param = I915_CONTEXT_PARAM_ENGINES,
+               .size = sizeof(engines),
+               .value = to_user_pointer(&engines)
+       };
+
+       engines.extension = to_user_pointer(&balancer);
+       memcpy(engines.class_instance, ci, sizeof(engines.class_instance));
+
+       return __gem_context_set_param(i915, &p);
+}
+
+static void set_load_balancer(int i915, uint32_t ctx,
+                             const struct class_instance *ci,
+                             unsigned int count)
+{
+       igt_assert_eq(__set_load_balancer(i915, ctx, ci, count), 0);
+}
+
+static uint32_t load_balancer_create(int i915,
+                                    const struct class_instance *ci,
+                                    unsigned int count)
+{
+       uint32_t ctx;
+
+       ctx = gem_queue_create(i915);
+       set_load_balancer(i915, ctx, ci, count);
+
+       return ctx;
+}
+
+static void kick_kthreads(int period_us)
+{
+       sched_yield();
+       usleep(period_us);
+}
+
+static double measure_load(int pmu, int period_us)
+{
+       uint64_t data[2];
+       uint64_t d_t, d_v;
+
+       kick_kthreads(period_us);
+
+       igt_assert_eq(read(pmu, data, sizeof(data)), sizeof(data));
+       d_v = -data[0];
+       d_t = -data[1];
+
+       usleep(period_us);
+
+       igt_assert_eq(read(pmu, data, sizeof(data)), sizeof(data));
+       d_v += data[0];
+       d_t += data[1];
+
+       return d_v / (double)d_t;
+}
+
+static double measure_min_load(int pmu, unsigned int num, int period_us)
+{
+       uint64_t data[2 + num];
+       uint64_t d_t, d_v[num];
+       uint64_t min = -1, max = 0;
+
+       kick_kthreads(period_us);
+
+       igt_assert_eq(read(pmu, data, sizeof(data)), sizeof(data));
+       for (unsigned int n = 0; n < num; n++)
+               d_v[n] = -data[2 + n];
+       d_t = -data[1];
+
+       usleep(period_us);
+
+       igt_assert_eq(read(pmu, data, sizeof(data)), sizeof(data));
+
+       d_t += data[1];
+       for (unsigned int n = 0; n < num; n++) {
+               d_v[n] += data[2 + n];
+               igt_debug("engine[%d]: %.1f%%\n",
+                         n, d_v[n] / (double)d_t * 100);
+               if (d_v[n] < min)
+                       min = d_v[n];
+               if (d_v[n] > max)
+                       max = d_v[n];
+       }
+
+       igt_debug("elapsed: %"PRIu64"ns, load [%.1f, %.1f]%%\n",
+                 d_t, min / (double)d_t * 100,  max / (double)d_t * 100);
+
+       return min / (double)d_t;
+}
+
+static void check_individual_engine(int i915,
+                                   uint32_t ctx,
+                                   const struct class_instance *ci,
+                                   int idx)
+{
+       igt_spin_t *spin;
+       double load;
+       int pmu;
+
+       pmu = perf_i915_open(I915_PMU_ENGINE_BUSY(ci[idx].class,
+                                                 ci[idx].instance));
+
+       spin = igt_spin_batch_new(i915, ctx, idx + 1, 0);
+       load = measure_load(pmu, 10000);
+       igt_spin_batch_free(i915, spin);
+
+       close(pmu);
+
+       igt_assert_f(load > 0.90,
+                    "engine %d (class:instance %d:%d) was found to be only 
%.1f%% busy\n",
+                    idx, ci[idx].class, ci[idx].instance, load*100);
+}
+
+static void individual(int i915)
+{
+       uint32_t ctx;
+
+       /*
+        * I915_CONTEXT_PARAM_ENGINE allows us to index into the user
+        * supplied array from gem_execbuf(). Our check is to build the
+        * ctx->engine[] with various different engine classes, feed in
+        * a spinner and then ask pmu to confirm it the expected engine
+        * was busy.
+        */
+
+       ctx = gem_queue_create(i915);
+
+       for (int mask = 0; mask < 32; mask++) {
+               struct class_instance *ci;
+               unsigned int count;
+
+               ci = list_engines(i915, 1u << mask, &count);
+               if (!ci)
+                       continue;
+
+               igt_debug("Found %d engines of class %d\n", count, mask);
+
+               for (int pass = 0; pass < count; pass++) { /* approx. count! */
+                       igt_permute_array(ci, count, igt_exchange_int64);
+                       set_load_balancer(i915, ctx, ci, count);
+                       for (unsigned int n = 0; n < count; n++)
+                               check_individual_engine(i915, ctx, ci, n);
+               }
+
+               free(ci);
+       }
+
+       gem_context_destroy(i915, ctx);
+}
+
+static int add_pmu(int pmu, const struct class_instance *ci)
+{
+       return perf_i915_open_group(I915_PMU_ENGINE_BUSY(ci->class,
+                                                        ci->instance),
+                                   pmu);
+}
+
+static uint32_t batch_create(int i915)
+{
+       const uint32_t bbe = MI_BATCH_BUFFER_END;
+       uint32_t handle;
+
+       handle = gem_create(i915, 4096);
+       gem_write(i915, handle, 0, &bbe, sizeof(bbe));
+
+       return handle;
+}
+
+static void full(int i915, unsigned int flags)
+#define PULSE 0x1
+#define LATE 0x2
+{
+       struct drm_i915_gem_exec_object2 batch = {
+               .handle = batch_create(i915),
+       };
+
+       if (flags & LATE)
+               igt_require_sw_sync();
+
+       /*
+        * I915_CONTEXT_PARAM_ENGINE changes the meaning of I915_EXEC_DEFAULT
+        * to provide an automatic selection from the ctx->engine[]. It
+        * employs load-balancing to evenly distribute the workload the
+        * array. If we submit N spinners, we expect them to be simultaneously
+        * running across N engines and use PMU to confirm that the entire
+        * set of engines are busy.
+        *
+        * We complicate matters by interpersing shortlived tasks to challenge
+        * the kernel to search for space in which to insert new batches.
+        */
+
+
+       for (int mask = 0; mask < 32; mask++) {
+               struct class_instance *ci;
+               igt_spin_t *spin = NULL;
+               unsigned int count;
+               IGT_CORK_FENCE(cork);
+               double load;
+               int fence = -1;
+               int *pmu;
+
+               ci = list_engines(i915, 1u << mask, &count);
+               if (!ci)
+                       continue;
+
+               igt_debug("Found %d engines of class %d\n", count, mask);
+
+               pmu = malloc(sizeof(*pmu) * count);
+               igt_assert(pmu);
+
+               if (flags & LATE)
+                       fence = igt_cork_plug(&cork, i915);
+
+               pmu[0] = -1;
+               for (unsigned int n = 0; n < count; n++) {
+                       uint32_t ctx;
+
+                       pmu[n] = add_pmu(pmu[0], &ci[n]);
+
+                       if (flags & PULSE) {
+                               struct drm_i915_gem_execbuffer2 eb = {
+                                       .buffers_ptr = to_user_pointer(&batch),
+                                       .buffer_count = 1,
+                                       .rsvd2 = fence,
+                                       .flags = flags & LATE ? 
I915_EXEC_FENCE_IN : 0,
+                               };
+
+                               gem_execbuf(i915, &eb);
+                       }
+
+                       /*
+                        * Each spinner needs to be one a new timeline,
+                        * otherwise they will just sit in the single queue
+                        * and not run concurrently.
+                        */
+                       ctx = load_balancer_create(i915, ci, count);
+
+                       if (spin == NULL) {
+                               spin = __igt_spin_batch_new(i915, ctx, 0, 0);
+                       } else {
+                               struct drm_i915_gem_exec_object2 obj = {
+                                       .handle = spin->handle,
+                               };
+                               struct drm_i915_gem_execbuffer2 eb = {
+                                       .buffers_ptr = to_user_pointer(&obj),
+                                       .buffer_count = 1,
+                                       .rsvd1 = ctx,
+                                       .rsvd2 = fence,
+                                       .flags = flags & LATE ? 
I915_EXEC_FENCE_IN : 0,
+                               };
+
+                               gem_execbuf(i915, &eb);
+                       }
+
+                       gem_context_destroy(i915, ctx);
+               }
+
+               if (flags & LATE) {
+                       igt_cork_unplug(&cork);
+                       close(fence);
+               }
+
+               load = measure_min_load(pmu[0], count, 10000);
+               igt_spin_batch_free(i915, spin);
+
+               close(pmu[0]);
+               free(pmu);
+
+               free(ci);
+
+               igt_assert_f(load > 0.90,
+                            "minimum load for %d x class:%d was found to be 
only %.1f%% busy\n",
+                            count, mask, load*100);
+       }
+
+       gem_close(i915, batch.handle);
+}
+
+static bool has_context_engines(int i915)
+{
+       struct drm_i915_gem_context_param p = {
+               .param = I915_CONTEXT_PARAM_ENGINES,
+       };
+
+       return __gem_context_set_param(i915, &p) == 0;
+}
+
+igt_main
+{
+       int i915 = -1;
+
+       igt_skip_on_simulation();
+
+       igt_fixture {
+               i915 = drm_open_driver(DRIVER_INTEL);
+               igt_require_gem(i915);
+
+               gem_require_contexts(i915);
+               igt_require(has_context_engines(i915));
+
+               igt_fork_hang_detector(i915);
+       }
+
+       igt_subtest("individual")
+               individual(i915);
+
+       igt_subtest_group {
+               static const struct {
+                       const char *name;
+                       unsigned int flags;
+               } phases[] = {
+                       { "", 0 },
+                       { "-pulse", PULSE },
+                       { "-late", LATE },
+                       { "-late-pulse", PULSE | LATE },
+                       { }
+               };
+               for (typeof(*phases) *p = phases; p->name; p++)
+                       igt_subtest_f("full%s", p->name)
+                               full(i915, p->flags);
+       }
+
+       igt_fixture {
+               igt_stop_hang_detector();
+       }
+}
diff --git a/tests/meson.build b/tests/meson.build
index a5beb2e42..b8819266c 100644
--- a/tests/meson.build
+++ b/tests/meson.build
@@ -53,6 +53,7 @@ test_progs = [
        'gem_exec_async',
        'gem_exec_await',
        'gem_exec_bad_domains',
+       'gem_exec_balancer',
        'gem_exec_basic',
        'gem_exec_big',
        'gem_exec_blt',
-- 
2.17.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to