When a CPU is running a task with coresched enabled, its sibling will
be forced idle if the sibling does not have a trusted task to run. It
is useful to report forceidle to understand the performance of different
cookies of tasks throughout the system.

forceidle is added at the last column of /proc/stat:

  $ cat /proc/stat
  cpu  102034 0 11992 8347016 1046 0 11 0 0 0 991
  cpu0 59 0 212 80364 59 0 0 0 0 0 0
  cpu1 72057 0 89 9102 0 0 0 0 0 0 90

So forceidle% can be computed by any user space tools, for example:

  CPU   user%   system% iowait% forceidle%      idle%
  cpu53 24.75   0.00    0.00%   0.99%           74.26%
  CPU   user%   system% iowait% forceidle%      idle%
  cpu53 25.74   0.00    0.00%   0.99%           73.27%
  CPU   user%   system% iowait% forceidle%      idle%
  cpu53 24.75   0.00    0.00%   0.99%           74.26%
  CPU   user%   system% iowait% forceidle%      idle%
  cpu53 25.24   0.00    0.00%   3.88%           70.87%

Signed-off-by: Aubrey Li <[email protected]>
---
 fs/proc/stat.c              | 48 +++++++++++++++++++++++++++++++++++++
 include/linux/kernel_stat.h |  1 +
 include/linux/tick.h        |  2 ++
 kernel/time/tick-sched.c    | 48 +++++++++++++++++++++++++++++++++++++
 kernel/time/tick-sched.h    |  3 +++
 5 files changed, 102 insertions(+)

diff --git a/fs/proc/stat.c b/fs/proc/stat.c
index 46b3293015fe..b27ccac7b5a4 100644
--- a/fs/proc/stat.c
+++ b/fs/proc/stat.c
@@ -28,7 +28,11 @@ static u64 get_idle_time(struct kernel_cpustat *kcs, int cpu)
        u64 idle;
 
        idle = kcs->cpustat[CPUTIME_IDLE];
+#ifdef CONFIG_SCHED_CORE
+       if (cpu_online(cpu) && !nr_iowait_cpu(cpu) && 
!cpu_rq(cpu)->core->core_forceidle)
+#else
        if (cpu_online(cpu) && !nr_iowait_cpu(cpu))
+#endif
                idle += arch_idle_time(cpu);
        return idle;
 }
@@ -43,6 +47,17 @@ static u64 get_iowait_time(struct kernel_cpustat *kcs, int 
cpu)
        return iowait;
 }
 
+#ifdef CONFIG_SCHED_CORE
+static u64 get_forceidle_time(struct kernel_cpustat *kcs, int cpu)
+{
+       u64 forceidle;
+
+       forceidle = kcs->cpustat[CPUTIME_FORCEIDLE];
+       if (cpu_online(cpu) && cpu_rq(cpu)->core->core_forceidle)
+               forceidle += arch_idle_time(cpu);
+       return forceidle;
+}
+#endif
 #else
 
 static u64 get_idle_time(struct kernel_cpustat *kcs, int cpu)
@@ -77,6 +92,21 @@ static u64 get_iowait_time(struct kernel_cpustat *kcs, int 
cpu)
        return iowait;
 }
 
+static u64 get_forceidle_time(struct kernel_cpustat *kcs, int cpu)
+{
+       u64 forceidle, forceidle_usecs = -1ULL;
+
+       if (cpu_online(cpu))
+               forceidle_usecs = get_cpu_forceidle_time_us(cpu, NULL);
+
+       if (forceidle_usecs == -1ULL)
+               /* !NO_HZ or cpu offline so we can rely on cpustat.forceidle */
+               forceidle = kcs->cpustat[CPUTIME_FORCEIDLE];
+       else
+               forceidle = forceidle_usecs * NSEC_PER_USEC;
+
+       return forceidle;
+}
 #endif
 
 static void show_irq_gap(struct seq_file *p, unsigned int gap)
@@ -111,12 +141,18 @@ static int show_stat(struct seq_file *p, void *v)
        u64 guest, guest_nice;
        u64 sum = 0;
        u64 sum_softirq = 0;
+#ifdef CONFIG_SCHED_CORE
+       u64 forceidle;
+#endif
        unsigned int per_softirq_sums[NR_SOFTIRQS] = {0};
        struct timespec64 boottime;
 
        user = nice = system = idle = iowait =
                irq = softirq = steal = 0;
        guest = guest_nice = 0;
+#ifdef CONFIG_SCHED_CORE
+       forceidle = 0;
+#endif
        getboottime64(&boottime);
 
        for_each_possible_cpu(i) {
@@ -130,6 +166,9 @@ static int show_stat(struct seq_file *p, void *v)
                system          += cpustat[CPUTIME_SYSTEM];
                idle            += get_idle_time(&kcpustat, i);
                iowait          += get_iowait_time(&kcpustat, i);
+#ifdef CONFIG_SCHED_CORE
+               forceidle       += get_forceidle_time(&kcpustat, i);
+#endif
                irq             += cpustat[CPUTIME_IRQ];
                softirq         += cpustat[CPUTIME_SOFTIRQ];
                steal           += cpustat[CPUTIME_STEAL];
@@ -157,6 +196,9 @@ static int show_stat(struct seq_file *p, void *v)
        seq_put_decimal_ull(p, " ", nsec_to_clock_t(steal));
        seq_put_decimal_ull(p, " ", nsec_to_clock_t(guest));
        seq_put_decimal_ull(p, " ", nsec_to_clock_t(guest_nice));
+#ifdef CONFIG_SCHED_CORE
+       seq_put_decimal_ull(p, " ", nsec_to_clock_t(forceidle));
+#endif
        seq_putc(p, '\n');
 
        for_each_online_cpu(i) {
@@ -171,6 +213,9 @@ static int show_stat(struct seq_file *p, void *v)
                system          = cpustat[CPUTIME_SYSTEM];
                idle            = get_idle_time(&kcpustat, i);
                iowait          = get_iowait_time(&kcpustat, i);
+#ifdef CONFIG_SCHED_CORE
+               forceidle       = get_forceidle_time(&kcpustat, i);
+#endif
                irq             = cpustat[CPUTIME_IRQ];
                softirq         = cpustat[CPUTIME_SOFTIRQ];
                steal           = cpustat[CPUTIME_STEAL];
@@ -187,6 +232,9 @@ static int show_stat(struct seq_file *p, void *v)
                seq_put_decimal_ull(p, " ", nsec_to_clock_t(steal));
                seq_put_decimal_ull(p, " ", nsec_to_clock_t(guest));
                seq_put_decimal_ull(p, " ", nsec_to_clock_t(guest_nice));
+#ifdef CONFIG_SCHED_CORE
+               seq_put_decimal_ull(p, " ", nsec_to_clock_t(forceidle));
+#endif
                seq_putc(p, '\n');
        }
        seq_put_decimal_ull(p, "intr ", (unsigned long long)sum);
diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h
index 89f0745c096d..c7ce4bfe757e 100644
--- a/include/linux/kernel_stat.h
+++ b/include/linux/kernel_stat.h
@@ -25,6 +25,7 @@ enum cpu_usage_stat {
        CPUTIME_IRQ,
        CPUTIME_IDLE,
        CPUTIME_IOWAIT,
+       CPUTIME_FORCEIDLE,
        CPUTIME_STEAL,
        CPUTIME_GUEST,
        CPUTIME_GUEST_NICE,
diff --git a/include/linux/tick.h b/include/linux/tick.h
index 7340613c7eff..7fce78f46930 100644
--- a/include/linux/tick.h
+++ b/include/linux/tick.h
@@ -139,6 +139,7 @@ extern unsigned long tick_nohz_get_idle_calls(void);
 extern unsigned long tick_nohz_get_idle_calls_cpu(int cpu);
 extern u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time);
 extern u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time);
+extern u64 get_cpu_forceidle_time_us(int cpu, u64 *last_update_time);
 
 static inline void tick_nohz_idle_stop_tick_protected(void)
 {
@@ -169,6 +170,7 @@ static inline ktime_t tick_nohz_get_sleep_length(ktime_t 
*delta_next)
 }
 static inline u64 get_cpu_idle_time_us(int cpu, u64 *unused) { return -1; }
 static inline u64 get_cpu_iowait_time_us(int cpu, u64 *unused) { return -1; }
+static inline u64 get_cpu_forceidle_time_us(int cpu, u64 *unused) { return -1; 
}
 
 static inline void tick_nohz_idle_stop_tick_protected(void) { }
 #endif /* !CONFIG_NO_HZ_COMMON */
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 1b734070f028..de94e5bab5a1 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -29,6 +29,7 @@
 #include <asm/irq_regs.h>
 
 #include "tick-internal.h"
+#include "../sched/sched.h"
 
 #include <trace/events/timer.h>
 
@@ -547,6 +548,10 @@ update_ts_time_stats(int cpu, struct tick_sched *ts, 
ktime_t now, u64 *last_upda
                delta = ktime_sub(now, ts->idle_entrytime);
                if (nr_iowait_cpu(cpu) > 0)
                        ts->iowait_sleeptime = ktime_add(ts->iowait_sleeptime, 
delta);
+#ifdef CONFIG_SCHED_CORE
+               else if (cpu_rq(cpu)->core->core_forceidle)
+                       ts->forceidle_sleeptime = 
ktime_add(ts->forceidle_sleeptime, delta);
+#endif
                else
                        ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, 
delta);
                ts->idle_entrytime = now;
@@ -653,6 +658,49 @@ u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time)
 }
 EXPORT_SYMBOL_GPL(get_cpu_iowait_time_us);
 
+#ifdef CONFIG_SCHED_CORE
+/**
+ * get_cpu_forceidle_time_us - get the total force idle time of a CPU
+ * @cpu: CPU number to query
+ * @last_update_time: variable to store update time in. Do not update
+ * counters if NULL.
+ *
+ * Return the cumulative force idle time (since boot) for a given
+ * CPU, in microseconds.
+ *
+ * This time is measured via accounting rather than sampling,
+ * and is as accurate as ktime_get() is.
+ *
+ * This function returns -1 if NOHZ is not enabled.
+ */
+u64 get_cpu_forceidle_time_us(int cpu, u64 *last_update_time)
+{
+       struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
+       ktime_t now, forceidle;
+
+       if (!tick_nohz_active)
+               return -1;
+
+       now = ktime_get();
+       if (last_update_time) {
+               update_ts_time_stats(cpu, ts, now, last_update_time);
+               forceidle = ts->forceidle_sleeptime;
+       } else {
+               if (ts->idle_active && cpu_rq(cpu)->core->core_forceidle) {
+                       ktime_t delta = ktime_sub(now, ts->idle_entrytime);
+
+                       forceidle = ktime_add(ts->forceidle_sleeptime, delta);
+               } else {
+                       forceidle = ts->forceidle_sleeptime;
+               }
+       }
+
+       return ktime_to_us(forceidle);
+
+}
+EXPORT_SYMBOL_GPL(get_cpu_forceidle_time_us);
+#endif
+
 static void tick_nohz_restart(struct tick_sched *ts, ktime_t now)
 {
        hrtimer_cancel(&ts->sched_timer);
diff --git a/kernel/time/tick-sched.h b/kernel/time/tick-sched.h
index 4fb06527cf64..4c00c5399055 100644
--- a/kernel/time/tick-sched.h
+++ b/kernel/time/tick-sched.h
@@ -71,6 +71,9 @@ struct tick_sched {
        ktime_t                         idle_exittime;
        ktime_t                         idle_sleeptime;
        ktime_t                         iowait_sleeptime;
+#ifdef CONFIG_SCHED_CORE
+       ktime_t                         forceidle_sleeptime;
+#endif
        unsigned long                   last_jiffies;
        u64                             timer_expires;
        u64                             timer_expires_base;
-- 
2.17.1

Reply via email to