Add a selftest to validate the behavior of the NUMA-aware scheduler
functionalities, including idle CPU selection within nodes, per-node
DSQs and CPU to node mapping.

Signed-off-by: Andrea Righi <ari...@nvidia.com>
---
 tools/testing/selftests/sched_ext/Makefile   |   1 +
 tools/testing/selftests/sched_ext/numa.bpf.c | 100 +++++++++++++++++++
 tools/testing/selftests/sched_ext/numa.c     |  59 +++++++++++
 3 files changed, 160 insertions(+)
 create mode 100644 tools/testing/selftests/sched_ext/numa.bpf.c
 create mode 100644 tools/testing/selftests/sched_ext/numa.c

diff --git a/tools/testing/selftests/sched_ext/Makefile 
b/tools/testing/selftests/sched_ext/Makefile
index 0117622246007..f4531327b8e76 100644
--- a/tools/testing/selftests/sched_ext/Makefile
+++ b/tools/testing/selftests/sched_ext/Makefile
@@ -172,6 +172,7 @@ auto-test-targets :=                        \
        maximal                         \
        maybe_null                      \
        minimal                         \
+       numa                            \
        prog_run                        \
        reload_loop                     \
        select_cpu_dfl                  \
diff --git a/tools/testing/selftests/sched_ext/numa.bpf.c 
b/tools/testing/selftests/sched_ext/numa.bpf.c
new file mode 100644
index 0000000000000..a79d86ed54a1b
--- /dev/null
+++ b/tools/testing/selftests/sched_ext/numa.bpf.c
@@ -0,0 +1,100 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * A scheduler that validates the behavior of the NUMA-aware
+ * functionalities.
+ *
+ * The scheduler creates a separate DSQ for each NUMA node, ensuring tasks
+ * are exclusively processed by CPUs within their respective nodes. Idle
+ * CPUs are selected only within the same node, so task migration can only
+ * occurs between CPUs belonging to the same node.
+ *
+ * Copyright (c) 2025 Andrea Righi <ari...@nvidia.com>
+ */
+
+#include <scx/common.bpf.h>
+
+char _license[] SEC("license") = "GPL";
+
+UEI_DEFINE(uei);
+
+const volatile unsigned int __COMPAT_SCX_PICK_IDLE_IN_NODE;
+
+static bool is_cpu_idle(s32 cpu, int node)
+{
+       const struct cpumask *idle_cpumask;
+       bool idle;
+
+       idle_cpumask = __COMPAT_scx_bpf_get_idle_cpumask_node(node);
+       idle = bpf_cpumask_test_cpu(cpu, idle_cpumask);
+       scx_bpf_put_cpumask(idle_cpumask);
+
+       return idle;
+}
+
+s32 BPF_STRUCT_OPS(numa_select_cpu,
+                  struct task_struct *p, s32 prev_cpu, u64 wake_flags)
+{
+       int node = __COMPAT_scx_bpf_cpu_node(scx_bpf_task_cpu(p));
+       s32 cpu;
+
+       /*
+        * We could just use __COMPAT_scx_bpf_pick_any_cpu_node() here,
+        * since it already tries to pick an idle CPU within the node
+        * first, but let's use both functions for better testing coverage.
+        */
+       cpu = __COMPAT_scx_bpf_pick_idle_cpu_node(p->cpus_ptr, node,
+                                       __COMPAT_SCX_PICK_IDLE_IN_NODE);
+       if (cpu < 0)
+               cpu = __COMPAT_scx_bpf_pick_any_cpu_node(p->cpus_ptr, node,
+                                               __COMPAT_SCX_PICK_IDLE_IN_NODE);
+
+       if (is_cpu_idle(cpu, node))
+               scx_bpf_error("CPU %d should be marked as busy", cpu);
+
+       if (__COMPAT_scx_bpf_cpu_node(cpu) != node)
+               scx_bpf_error("CPU %d should be in node %d", cpu, node);
+
+       return cpu;
+}
+
+void BPF_STRUCT_OPS(numa_enqueue, struct task_struct *p, u64 enq_flags)
+{
+       int node = __COMPAT_scx_bpf_cpu_node(scx_bpf_task_cpu(p));
+
+       scx_bpf_dsq_insert(p, node, SCX_SLICE_DFL, enq_flags);
+}
+
+void BPF_STRUCT_OPS(numa_dispatch, s32 cpu, struct task_struct *prev)
+{
+       int node = __COMPAT_scx_bpf_cpu_node(cpu);
+
+       scx_bpf_dsq_move_to_local(node);
+}
+
+s32 BPF_STRUCT_OPS_SLEEPABLE(numa_init)
+{
+       int node, err;
+
+       bpf_for(node, 0, __COMPAT_scx_bpf_nr_node_ids()) {
+               err = scx_bpf_create_dsq(node, node);
+               if (err)
+                       return err;
+       }
+
+       return 0;
+}
+
+void BPF_STRUCT_OPS(numa_exit, struct scx_exit_info *ei)
+{
+       UEI_RECORD(uei, ei);
+}
+
+SEC(".struct_ops.link")
+struct sched_ext_ops numa_ops = {
+       .select_cpu             = (void *)numa_select_cpu,
+       .enqueue                = (void *)numa_enqueue,
+       .dispatch               = (void *)numa_dispatch,
+       .init                   = (void *)numa_init,
+       .exit                   = (void *)numa_exit,
+       .name                   = "numa",
+};
diff --git a/tools/testing/selftests/sched_ext/numa.c 
b/tools/testing/selftests/sched_ext/numa.c
new file mode 100644
index 0000000000000..b060c3b65c82b
--- /dev/null
+++ b/tools/testing/selftests/sched_ext/numa.c
@@ -0,0 +1,59 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2025 Andrea Righi <ari...@nvidia.com>
+ */
+#include <bpf/bpf.h>
+#include <scx/common.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#include "numa.bpf.skel.h"
+#include "scx_test.h"
+
+static enum scx_test_status setup(void **ctx)
+{
+       struct numa *skel;
+
+       skel = numa__open();
+       SCX_FAIL_IF(!skel, "Failed to open");
+       SCX_ENUM_INIT(skel);
+       skel->rodata->__COMPAT_SCX_PICK_IDLE_IN_NODE = SCX_PICK_IDLE_IN_NODE;
+       skel->struct_ops.numa_ops->flags = SCX_OPS_BUILTIN_IDLE_PER_NODE;
+       SCX_FAIL_IF(numa__load(skel), "Failed to load skel");
+
+       *ctx = skel;
+
+       return SCX_TEST_PASS;
+}
+
+static enum scx_test_status run(void *ctx)
+{
+       struct numa *skel = ctx;
+       struct bpf_link *link;
+
+       link = bpf_map__attach_struct_ops(skel->maps.numa_ops);
+       SCX_FAIL_IF(!link, "Failed to attach scheduler");
+
+       /* Just sleeping is fine, plenty of scheduling events happening */
+       sleep(1);
+
+       SCX_EQ(skel->data->uei.kind, EXIT_KIND(SCX_EXIT_NONE));
+       bpf_link__destroy(link);
+
+       return SCX_TEST_PASS;
+}
+
+static void cleanup(void *ctx)
+{
+       struct numa *skel = ctx;
+
+       numa__destroy(skel);
+}
+
+struct scx_test numa = {
+       .name = "numa",
+       .description = "Verify NUMA-aware functionalities",
+       .setup = setup,
+       .run = run,
+       .cleanup = cleanup,
+};
+REGISTER_SCX_TEST(&numa)
-- 
2.48.1


Reply via email to