Revaluating the bitmap wheight of the online cpus bitmap in every
invocation of num_online_cpus() over and over is a pretty useless
exercise. Especially when num_online_cpus() is used in code pathes like the
IPI delivery of x86 or the membarrier code.

Cache the number of online CPUs in the core and just return the cached
variable.

Signed-off-by: Thomas Gleixner <t...@linutronix.de>
---
V2: Use READ/WRITE_ONCE() and add comment what it actually achieves. Remove
    the bogus lockdep assert in the write path as the caller cannot hold the
    lock. It's a task on the plugged CPU which is not the controlling task.
---
 include/linux/cpumask.h |   26 +++++++++++++++++---------
 kernel/cpu.c            |   22 ++++++++++++++++++++++
 2 files changed, 39 insertions(+), 9 deletions(-)

--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -95,8 +95,23 @@ extern struct cpumask __cpu_active_mask;
 #define cpu_present_mask  ((const struct cpumask *)&__cpu_present_mask)
 #define cpu_active_mask   ((const struct cpumask *)&__cpu_active_mask)
 
+extern unsigned int __num_online_cpus;
+
 #if NR_CPUS > 1
-#define num_online_cpus()      cpumask_weight(cpu_online_mask)
+/**
+ * num_online_cpus() - Read the number of online CPUs
+ *
+ * READ_ONCE() protects against theoretical load tearing and prevents
+ * the compiler from reloading the value in a function or loop.
+ *
+ * Even with that, this interface gives only a momentary snapshot and is
+ * not protected against concurrent CPU hotplug operations unless invoked
+ * from a cpuhp_lock held region.
+ */
+static inline unsigned int num_online_cpus(void)
+{
+       return READ_ONCE(__num_online_cpus);
+}
 #define num_possible_cpus()    cpumask_weight(cpu_possible_mask)
 #define num_present_cpus()     cpumask_weight(cpu_present_mask)
 #define num_active_cpus()      cpumask_weight(cpu_active_mask)
@@ -805,14 +820,7 @@ set_cpu_present(unsigned int cpu, bool p
                cpumask_clear_cpu(cpu, &__cpu_present_mask);
 }
 
-static inline void
-set_cpu_online(unsigned int cpu, bool online)
-{
-       if (online)
-               cpumask_set_cpu(cpu, &__cpu_online_mask);
-       else
-               cpumask_clear_cpu(cpu, &__cpu_online_mask);
-}
+void set_cpu_online(unsigned int cpu, bool online);
 
 static inline void
 set_cpu_active(unsigned int cpu, bool active)
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -2288,6 +2288,9 @@ EXPORT_SYMBOL(__cpu_present_mask);
 struct cpumask __cpu_active_mask __read_mostly;
 EXPORT_SYMBOL(__cpu_active_mask);
 
+unsigned int __num_online_cpus __read_mostly;
+EXPORT_SYMBOL(__num_online_cpus);
+
 void init_cpu_present(const struct cpumask *src)
 {
        cpumask_copy(&__cpu_present_mask, src);
@@ -2303,6 +2306,25 @@ void init_cpu_online(const struct cpumas
        cpumask_copy(&__cpu_online_mask, src);
 }
 
+void set_cpu_online(unsigned int cpu, bool online)
+{
+       int adj = 0;
+
+       if (online) {
+               if (!cpumask_test_and_set_cpu(cpu, &__cpu_online_mask))
+                       adj = 1;
+       } else {
+               if (cpumask_test_and_clear_cpu(cpu, &__cpu_online_mask))
+                       adj = -1;
+       }
+       /*
+        * WRITE_ONCE() protects only against the theoretical stupidity of
+        * a compiler to tear the store, but won't protect readers which
+        * are not serialized against concurrent hotplug operations.
+        */
+       WRITE_ONCE(__num_online_cpus, __num_online_cpus + adj);
+}
+
 /*
  * Activate the first processor.
  */

Reply via email to