Revaluating the bitmap wheight of the online cpus bitmap in every invocation of num_online_cpus() over and over is a pretty useless exercise. Especially when num_online_cpus() is used in code pathes like the IPI delivery of x86 or the membarrier code.
Cache the number of online CPUs in the core and just return the cached variable. Signed-off-by: Thomas Gleixner <t...@linutronix.de> --- V2: Use READ/WRITE_ONCE() and add comment what it actually achieves. Remove the bogus lockdep assert in the write path as the caller cannot hold the lock. It's a task on the plugged CPU which is not the controlling task. --- include/linux/cpumask.h | 26 +++++++++++++++++--------- kernel/cpu.c | 22 ++++++++++++++++++++++ 2 files changed, 39 insertions(+), 9 deletions(-) --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -95,8 +95,23 @@ extern struct cpumask __cpu_active_mask; #define cpu_present_mask ((const struct cpumask *)&__cpu_present_mask) #define cpu_active_mask ((const struct cpumask *)&__cpu_active_mask) +extern unsigned int __num_online_cpus; + #if NR_CPUS > 1 -#define num_online_cpus() cpumask_weight(cpu_online_mask) +/** + * num_online_cpus() - Read the number of online CPUs + * + * READ_ONCE() protects against theoretical load tearing and prevents + * the compiler from reloading the value in a function or loop. + * + * Even with that, this interface gives only a momentary snapshot and is + * not protected against concurrent CPU hotplug operations unless invoked + * from a cpuhp_lock held region. + */ +static inline unsigned int num_online_cpus(void) +{ + return READ_ONCE(__num_online_cpus); +} #define num_possible_cpus() cpumask_weight(cpu_possible_mask) #define num_present_cpus() cpumask_weight(cpu_present_mask) #define num_active_cpus() cpumask_weight(cpu_active_mask) @@ -805,14 +820,7 @@ set_cpu_present(unsigned int cpu, bool p cpumask_clear_cpu(cpu, &__cpu_present_mask); } -static inline void -set_cpu_online(unsigned int cpu, bool online) -{ - if (online) - cpumask_set_cpu(cpu, &__cpu_online_mask); - else - cpumask_clear_cpu(cpu, &__cpu_online_mask); -} +void set_cpu_online(unsigned int cpu, bool online); static inline void set_cpu_active(unsigned int cpu, bool active) --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -2288,6 +2288,9 @@ EXPORT_SYMBOL(__cpu_present_mask); struct cpumask __cpu_active_mask __read_mostly; EXPORT_SYMBOL(__cpu_active_mask); +unsigned int __num_online_cpus __read_mostly; +EXPORT_SYMBOL(__num_online_cpus); + void init_cpu_present(const struct cpumask *src) { cpumask_copy(&__cpu_present_mask, src); @@ -2303,6 +2306,25 @@ void init_cpu_online(const struct cpumas cpumask_copy(&__cpu_online_mask, src); } +void set_cpu_online(unsigned int cpu, bool online) +{ + int adj = 0; + + if (online) { + if (!cpumask_test_and_set_cpu(cpu, &__cpu_online_mask)) + adj = 1; + } else { + if (cpumask_test_and_clear_cpu(cpu, &__cpu_online_mask)) + adj = -1; + } + /* + * WRITE_ONCE() protects only against the theoretical stupidity of + * a compiler to tear the store, but won't protect readers which + * are not serialized against concurrent hotplug operations. + */ + WRITE_ONCE(__num_online_cpus, __num_online_cpus + adj); +} + /* * Activate the first processor. */