The attached patch-in-progress removes the per-cpu statistics from struct kernel_stat and puts them in a cpu_stat structure, one per cpu, cacheline padded. The data is still coolated and presented through /proc/stat, but another file /proc/cpustat is also added. The locking is as nonexistant as it was with kernel_stat, but who cares, they're just fuzzy stats to be eyeballed by system tuners :). A tool for printing the cpu stats specifically can be found near: http://www.osdlab.org/sw_resources/cpustat/index.shtml Its output is almost identical to solaris' mpstat. I'm not sure I like the macro use, but it shields the callers from the union garbage. We can easily also make a THIS_CPU_STAT_ADD() interface, as some have hinted would be nice :) Currently its mostly ( :) ) only collecting the stats that were collected in kernel_stat. I'd like to add more stats -- page faults, syscalls, cross-cpu calls, etc. I understand people not wanting more live cachelines in the fast paths. I can make CPU_CRITICAL_STAT defines that are config-ed out.. comments? If its ok I can whip up a patch that updates all the ports use of ->irqs[] as well. - z [ heading out for lunch :) ]
--- linux-2.4.5-cpustat/fs/proc/proc_misc.c.cpustat Fri Apr 13 20:26:07 2001 +++ linux-2.4.5-cpustat/fs/proc/proc_misc.c Thu Jun 21 12:23:49 2001 @@ -265,32 +265,36 @@ int i, len; extern unsigned long total_forks; unsigned long jif = jiffies; - unsigned int sum = 0, user = 0, nice = 0, system = 0; + unsigned int sum = 0, user = 0, nice = 0, system = 0, ctxt = 0; int major, disk; for (i = 0 ; i < smp_num_cpus; i++) { int cpu = cpu_logical_map(i), j; - user += kstat.per_cpu_user[cpu]; - nice += kstat.per_cpu_nice[cpu]; - system += kstat.per_cpu_system[cpu]; + user += CPU_STAT_VAL(cpu, user); + nice += CPU_STAT_VAL(cpu, nice); + system += CPU_STAT_VAL(cpu, system); + ctxt += CPU_STAT_VAL(cpu, context_swtch); #if !defined(CONFIG_ARCH_S390) for (j = 0 ; j < NR_IRQS ; j++) - sum += kstat.irqs[cpu][j]; + sum += CPU_STAT_VAL(cpu, irqs[j]); #endif } len = sprintf(page, "cpu %u %u %u %lu\n", user, nice, system, jif * smp_num_cpus - (user + nice + system)); - for (i = 0 ; i < smp_num_cpus; i++) + for (i = 0 ; i < smp_num_cpus; i++) { + unsigned int user_i, nice_i, system_i; + + user_i = CPU_STAT_VAL(cpu_logical_map(i), user); + nice_i = CPU_STAT_VAL(cpu_logical_map(i), nice); + system_i = CPU_STAT_VAL(cpu_logical_map(i), system); + len += sprintf(page + len, "cpu%d %u %u %u %lu\n", i, - kstat.per_cpu_user[cpu_logical_map(i)], - kstat.per_cpu_nice[cpu_logical_map(i)], - kstat.per_cpu_system[cpu_logical_map(i)], - jif - ( kstat.per_cpu_user[cpu_logical_map(i)] \ - + kstat.per_cpu_nice[cpu_logical_map(i)] \ - + kstat.per_cpu_system[cpu_logical_map(i)])); + user_i, nice_i, system_i, + jif - ( user_i + nice_i + system_i ) ); + } len += sprintf(page + len, "page %u %u\n" "swap %u %u\n" @@ -330,13 +334,58 @@ "\nctxt %u\n" "btime %lu\n" "processes %lu\n", - kstat.context_swtch, + ctxt, xtime.tv_sec - jif / HZ, total_forks); return proc_calc_metrics(page, start, off, count, eof, len); } +static int cstat_read_proc(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + int i, len; + + len = sprintf(page, "cpu_stat 0.0\n"); + + for (i = 0 ; i < smp_num_cpus; i++) { + unsigned int user, nice, system; + int j, cpu = cpu_logical_map(i); + +#if !defined(CONFIG_ARCH_S390) + len += sprintf(page + len, "cpu%d irqs ", cpu); + for (j = 0 ; j < NR_IRQS ; j++) { + len += sprintf(page + len, " %u", + CPU_STAT_VAL(cpu, irqs[j])); + } + len += sprintf(page + len, "\n"); +#endif +#if defined(CONFIG_SMP) + len += sprintf(page + len, "cpu%d context_migration %u\n", + cpu, CPU_STAT_VAL(cpu, context_migration)); +#endif + len += sprintf(page + len, "cpu%d bottom_halves %u\n", + cpu, CPU_STAT_VAL(cpu, bh)); + len += sprintf(page + len, "cpu%d context_switches %u\n", + cpu, CPU_STAT_VAL(cpu, context_swtch)); + + user = CPU_STAT_VAL(cpu_logical_map(i), user); + nice = CPU_STAT_VAL(cpu_logical_map(i), nice); + system = CPU_STAT_VAL(cpu_logical_map(i), system); + + len += sprintf(page + len, "cpu%d user_time %u\n", + cpu, user); + len += sprintf(page + len, "cpu%d nice_time %u\n", + cpu, nice); + len += sprintf(page + len, "cpu%d system_time %u\n", + cpu, system); + len += sprintf(page + len, "cpu%d unaccounted_time %u\n", + cpu, jiffies - ( user + nice + system ) ); + } + + return proc_calc_metrics(page, start, off, count, eof, len); +} + static int devices_read_proc(char *page, char **start, off_t off, int count, int *eof, void *data) { @@ -532,6 +581,7 @@ {"ksyms", ksyms_read_proc}, #endif {"stat", kstat_read_proc}, + {"cpustat", cstat_read_proc}, {"devices", devices_read_proc}, {"partitions", partitions_read_proc}, #if !defined(CONFIG_ARCH_S390) --- linux-2.4.5-cpustat/kernel/sched.c.cpustat Fri Apr 20 18:26:16 2001 +++ linux-2.4.5-cpustat/kernel/sched.c Thu Jun 21 12:01:34 2001 @@ -107,6 +107,8 @@ struct kernel_stat kstat; +union cpu_stat_u cpu_stats[NR_CPUS] __cacheline_aligned = { {{0, }}}; + #ifdef CONFIG_SMP #define idle_task(cpu) (init_tasks[cpu_number_map(cpu)]) @@ -607,6 +609,7 @@ sched_data->curr = next; #ifdef CONFIG_SMP next->has_cpu = 1; + CPU_STAT_ADD(this_cpu, context_migration, next->processor != this_cpu ); next->processor = this_cpu; #endif spin_unlock_irq(&runqueue_lock); @@ -632,7 +635,7 @@ #endif /* CONFIG_SMP */ - kstat.context_swtch++; + CPU_STAT_ADD(this_cpu, context_swtch, 1); /* * there are 3 processes which are affected by a context switch: * --- linux-2.4.5-cpustat/kernel/timer.c.cpustat Sun Dec 10 09:53:19 2000 +++ linux-2.4.5-cpustat/kernel/timer.c Wed Jun 20 13:45:06 2001 @@ -588,12 +588,12 @@ p->need_resched = 1; } if (p->nice > 0) - kstat.per_cpu_nice[cpu] += user_tick; + CPU_STAT_ADD(cpu, nice, user_tick); else - kstat.per_cpu_user[cpu] += user_tick; - kstat.per_cpu_system[cpu] += system; + CPU_STAT_ADD(cpu, user, user_tick); + CPU_STAT_ADD(cpu, system, system); } else if (local_bh_count(cpu) || local_irq_count(cpu) > 1) - kstat.per_cpu_system[cpu] += system; + CPU_STAT_ADD(cpu, system, system); } /* --- linux-2.4.5-cpustat/kernel/softirq.c.cpustat Fri Dec 29 14:07:24 2000 +++ linux-2.4.5-cpustat/kernel/softirq.c Thu Jun 21 11:29:01 2001 @@ -74,7 +74,7 @@ mask &= ~active; do { - if (active & 1) + if (active & 1) h->action(h); h++; active >>= 1; @@ -253,8 +253,10 @@ if (!hardirq_trylock(cpu)) goto resched_unlock; - if (bh_base[nr]) + if (bh_base[nr]) { bh_base[nr](); + CPU_STAT_ADD(cpu, bh, 1); + } hardirq_endlock(cpu); spin_unlock(&global_bh_lock); --- linux-2.4.5-cpustat/include/linux/kernel_stat.h.cpustat Fri May 25 18:01:27 2001 +++ linux-2.4.5-cpustat/include/linux/kernel_stat.h Thu Jun 21 11:28:52 2001 @@ -16,9 +16,6 @@ #define DK_MAX_DISK 16 struct kernel_stat { - unsigned int per_cpu_user[NR_CPUS], - per_cpu_nice[NR_CPUS], - per_cpu_system[NR_CPUS]; unsigned int dk_drive[DK_MAX_MAJOR][DK_MAX_DISK]; unsigned int dk_drive_rio[DK_MAX_MAJOR][DK_MAX_DISK]; unsigned int dk_drive_wio[DK_MAX_MAJOR][DK_MAX_DISK]; @@ -26,17 +23,33 @@ unsigned int dk_drive_wblk[DK_MAX_MAJOR][DK_MAX_DISK]; unsigned int pgpgin, pgpgout; unsigned int pswpin, pswpout; -#if !defined(CONFIG_ARCH_S390) - unsigned int irqs[NR_CPUS][NR_IRQS]; -#endif unsigned int ipackets, opackets; unsigned int ierrors, oerrors; unsigned int collisions; - unsigned int context_swtch; }; extern struct kernel_stat kstat; +union cpu_stat_u { + struct cpu_stat { + unsigned int user, nice, system; + unsigned int context_swtch; + unsigned int bh; +#if defined(CONFIG_SMP) + unsigned int context_migration; +#endif +#if !defined(CONFIG_ARCH_S390) + unsigned int irqs[NR_IRQS]; +#endif + } cs; + char __pad [SMP_CACHE_BYTES]; +}; + +extern union cpu_stat_u cpu_stats[NR_CPUS]; + +#define CPU_STAT_ADD(CPU, STAT, VAL) cpu_stats[CPU].cs.STAT += VAL +#define CPU_STAT_VAL(CPU, STAT) cpu_stats[CPU].cs.STAT + #if !defined(CONFIG_ARCH_S390) /* * Number of interrupts per specific IRQ source, since bootup @@ -46,7 +59,7 @@ int i, sum=0; for (i = 0 ; i < smp_num_cpus ; i++) - sum += kstat.irqs[cpu_logical_map(i)][irq]; + sum += CPU_STAT_VAL(cpu_logical_map(i), irqs[irq]); return sum; } --- linux-2.4.5-cpustat/arch/i386/kernel/irq.c.cpustat Fri Feb 9 11:29:44 2001 +++ linux-2.4.5-cpustat/arch/i386/kernel/irq.c Wed Jun 20 14:02:34 2001 @@ -146,7 +146,7 @@ #else for (j = 0; j < smp_num_cpus; j++) p += sprintf(p, "%10u ", - kstat.irqs[cpu_logical_map(j)][i]); + CPU_STAT_VAL(cpu_logical_map(j), irqs[i])); #endif p += sprintf(p, " %14s", irq_desc[i].handler->typename); p += sprintf(p, " %s", action->name); @@ -564,7 +564,7 @@ struct irqaction * action; unsigned int status; - kstat.irqs[cpu][irq]++; + CPU_STAT_ADD(cpu, irqs[irq], 1); spin_lock(&desc->lock); desc->handler->ack(irq); /*