To monitor the effectiveness of vCPU soft-affinity on NUMA hosts,
we'd like to create a vCPU metric that accumulates the amount of
vCPU time running outside of the soft affinity mask of the sched-unit:

- Add a new time counter, nonaffine_time to struct vcpu.

- Accumulate the nonaffine_time on vcpu_runstate_change():
  Account the time spent in the RUNSTATE_running state outside
  of unit->cpu_soft_affinity: It is always initialized and defaults
  to cpumask_all (bits for all NR_CPUS set), so we only accumulate
  nonaffine time when the vCPU runs on an unset CPU (non-affine).

In the next patch, this field can be used to retrieve the accumulated
nonaffine running time e.g. using vcpu_runstate_get().

Signed-off-by: Bernhard Kaindl <bernhard.kai...@cloud.com>
---
 xen/common/sched/core.c | 20 ++++++++++++++++++++
 xen/include/xen/sched.h | 11 +++++++++++
 2 files changed, 31 insertions(+)

diff --git a/xen/common/sched/core.c b/xen/common/sched/core.c
index 13fdf57e57..489255b9c6 100644
--- a/xen/common/sched/core.c
+++ b/xen/common/sched/core.c
@@ -260,6 +260,23 @@ static inline void vcpu_urgent_count_update(struct vcpu *v)
     }
 }
 
+/*
+ * For accounting non-affine running time of a vCPU, return true if
+ * the vCPU is running in RUNSTATE_running state while not on a CPU
+ * in unit->cpu_soft_affinity.
+ */
+static inline bool nonaffine(const struct vcpu *v,
+                             const struct sched_unit *unit)
+{
+    /*
+     * unit->cpu_soft_affinity is always initialized and defaults to
+     * cpumask_all (bits for all NR_CPUS set), so we only accumulate
+     * nonaffine time when the vCPU runs on an unset CPU (non-affine).
+     */
+    return v->runstate.state == RUNSTATE_running &&
+           !cpumask_test_cpu(v->processor, unit->cpu_soft_affinity);
+}
+
 static inline void vcpu_runstate_change(
     struct vcpu *v, int new_state, s_time_t new_entry_time)
 {
@@ -285,6 +302,9 @@ static inline void vcpu_runstate_change(
     {
         v->runstate.time[v->runstate.state] += delta;
         v->runstate.state_entry_time = new_entry_time;
+
+        if ( nonaffine(v, unit) ) /* When running nonaffine, add delta */
+            v->nonaffine_time += delta;
     }
 
     v->runstate.state = new_state;
diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
index fe53d4fab7..aba60afd4f 100644
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -198,7 +198,18 @@ struct vcpu
 
     struct sched_unit *sched_unit;
 
+    /*
+     * The struct vcpu_runstate_info contains the vCPU time spent
+     * in each runstate and the entry time of the current runstate.
+     *
+     * Note: This field is used for the guest runstate shared memory area.
+     * Therefore, it is part of the frozen guest API and cannot be changed.
+     */
     struct vcpu_runstate_info runstate;
+
+    /* vCPU time running outside the scheduling unit's soft_affinity mask */
+    uint64_t         nonaffine_time;
+
 #ifndef CONFIG_COMPAT
 # define runstate_guest(v) ((v)->runstate_guest)
     XEN_GUEST_HANDLE(vcpu_runstate_info_t) runstate_guest; /* guest address */
-- 
2.43.0


Reply via email to