Linus,

Please pull from the repository and branch

  git://git.kernel.org/pub/scm/linux/kernel/git/avi/kvm.git hotplug-linus

to receive a patchset which makes cpu hotplug (and therefore, suspend and
resume) more robust when running virtual machines.  The core issue is that
we need a notification about a cpu going away at a point in time where it's
still alive, but not running any user processes.  Such a notification does
not exist today, so the patchset adds it as a new CPU_DYING notification.

The patchset is against 2.6.22-rc3.

Shortlog:

Avi Kivity (7):
      HOTPLUG: Add CPU_DYING notifier
      HOTPLUG: Adapt cpuset hotplug callback to CPU_DYING
      HOTPLUG: Adapt thermal throttle to CPU_DYING
      SMP: Implement on_cpu()
      KVM: Keep track of which cpus have virtualization enabled
      KVM: Tune hotplug/suspend IPIs
      KVM: Use CPU_DYING for disabling virtualization

 arch/i386/kernel/cpu/mcheck/therm_throt.c |    6 ++-
 drivers/kvm/kvm_main.c                    |   50 +++++++++++++++++++++--------
 include/linux/notifier.h                  |    3 ++
 include/linux/smp.h                       |   16 +++++++++
 kernel/cpu.c                              |   16 ++++++++-
 kernel/cpuset.c                           |    3 ++
 kernel/softirq.c                          |   24 ++++++++++++++
 7 files changed, 100 insertions(+), 18 deletions(-)

And the patch in all its glory:

diff --git a/arch/i386/kernel/cpu/mcheck/therm_throt.c 
b/arch/i386/kernel/cpu/mcheck/therm_throt.c
index 7ba7c3a..1203dc5 100644
--- a/arch/i386/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/i386/kernel/cpu/mcheck/therm_throt.c
@@ -134,19 +134,21 @@ static __cpuinit int thermal_throttle_cpu_callback(struct 
notifier_block *nfb,
        int err;
 
        sys_dev = get_cpu_sysdev(cpu);
-       mutex_lock(&therm_cpu_lock);
        switch (action) {
        case CPU_ONLINE:
        case CPU_ONLINE_FROZEN:
+               mutex_lock(&therm_cpu_lock);
                err = thermal_throttle_add_dev(sys_dev);
+               mutex_unlock(&therm_cpu_lock);
                WARN_ON(err);
                break;
        case CPU_DEAD:
        case CPU_DEAD_FROZEN:
+               mutex_lock(&therm_cpu_lock);
                thermal_throttle_remove_dev(sys_dev);
+               mutex_unlock(&therm_cpu_lock);
                break;
        }
-       mutex_unlock(&therm_cpu_lock);
        return NOTIFY_OK;
 }
 
diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c
index da985b3..1ad5ea1 100644
--- a/drivers/kvm/kvm_main.c
+++ b/drivers/kvm/kvm_main.c
@@ -41,6 +41,7 @@
 #include <linux/fs.h>
 #include <linux/mount.h>
 #include <linux/sched.h>
+#include <linux/cpumask.h>
 
 #include "x86_emulate.h"
 #include "segment_descriptor.h"
@@ -51,8 +52,12 @@ MODULE_LICENSE("GPL");
 static DEFINE_SPINLOCK(kvm_lock);
 static LIST_HEAD(vm_list);
 
+static cpumask_t cpus_hardware_enabled;
+
 struct kvm_arch_ops *kvm_arch_ops;
 
+static void hardware_disable(void *ignored);
+
 #define STAT_OFFSET(x) offsetof(struct kvm_vcpu, stat.x)
 
 static struct kvm_stats_debugfs_item {
@@ -2840,7 +2845,7 @@ static int kvm_reboot(struct notifier_block *notifier, 
unsigned long val,
                 * in vmx root mode.
                 */
                printk(KERN_INFO "kvm: exiting hardware virtualization\n");
-               on_each_cpu(kvm_arch_ops->hardware_disable, NULL, 0, 1);
+               on_each_cpu(hardware_disable, NULL, 0, 1);
        }
        return NOTIFY_OK;
 }
@@ -2883,28 +2888,46 @@ static void decache_vcpus_on_cpu(int cpu)
        spin_unlock(&kvm_lock);
 }
 
+static void hardware_enable(void *junk)
+{
+       int cpu = raw_smp_processor_id();
+
+       if (cpu_isset(cpu, cpus_hardware_enabled))
+               return;
+       cpu_set(cpu, cpus_hardware_enabled);
+       kvm_arch_ops->hardware_enable(NULL);
+}
+
+static void hardware_disable(void *junk)
+{
+       int cpu = raw_smp_processor_id();
+
+       if (!cpu_isset(cpu, cpus_hardware_enabled))
+               return;
+       cpu_clear(cpu, cpus_hardware_enabled);
+       decache_vcpus_on_cpu(cpu);
+       kvm_arch_ops->hardware_disable(NULL);
+}
+
 static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val,
                           void *v)
 {
        int cpu = (long)v;
 
        switch (val) {
-       case CPU_DOWN_PREPARE:
-       case CPU_DOWN_PREPARE_FROZEN:
+       case CPU_DYING:
+       case CPU_DYING_FROZEN:
        case CPU_UP_CANCELED:
        case CPU_UP_CANCELED_FROZEN:
                printk(KERN_INFO "kvm: disabling virtualization on CPU%d\n",
                       cpu);
-               decache_vcpus_on_cpu(cpu);
-               smp_call_function_single(cpu, kvm_arch_ops->hardware_disable,
-                                        NULL, 0, 1);
+               on_cpu(cpu, hardware_disable, NULL, 0, 1);
                break;
        case CPU_ONLINE:
        case CPU_ONLINE_FROZEN:
                printk(KERN_INFO "kvm: enabling virtualization on CPU%d\n",
                       cpu);
-               smp_call_function_single(cpu, kvm_arch_ops->hardware_enable,
-                                        NULL, 0, 1);
+               on_cpu(cpu, hardware_enable, NULL, 0, 1);
                break;
        }
        return NOTIFY_OK;
@@ -2961,14 +2984,13 @@ static void kvm_exit_debug(void)
 
 static int kvm_suspend(struct sys_device *dev, pm_message_t state)
 {
-       decache_vcpus_on_cpu(raw_smp_processor_id());
-       on_each_cpu(kvm_arch_ops->hardware_disable, NULL, 0, 1);
+       hardware_disable(NULL);
        return 0;
 }
 
 static int kvm_resume(struct sys_device *dev)
 {
-       on_each_cpu(kvm_arch_ops->hardware_enable, NULL, 0, 1);
+       hardware_enable(NULL);
        return 0;
 }
 
@@ -3021,7 +3043,7 @@ int kvm_init_arch(struct kvm_arch_ops *ops, struct module 
*module)
        if (r < 0)
                goto out;
 
-       on_each_cpu(kvm_arch_ops->hardware_enable, NULL, 0, 1);
+       on_each_cpu(hardware_enable, NULL, 0, 1);
        r = register_cpu_notifier(&kvm_cpu_notifier);
        if (r)
                goto out_free_1;
@@ -3053,7 +3075,7 @@ out_free_2:
        unregister_reboot_notifier(&kvm_reboot_notifier);
        unregister_cpu_notifier(&kvm_cpu_notifier);
 out_free_1:
-       on_each_cpu(kvm_arch_ops->hardware_disable, NULL, 0, 1);
+       on_each_cpu(hardware_disable, NULL, 0, 1);
        kvm_arch_ops->hardware_unsetup();
 out:
        kvm_arch_ops = NULL;
@@ -3067,7 +3089,7 @@ void kvm_exit_arch(void)
        sysdev_class_unregister(&kvm_sysdev_class);
        unregister_reboot_notifier(&kvm_reboot_notifier);
        unregister_cpu_notifier(&kvm_cpu_notifier);
-       on_each_cpu(kvm_arch_ops->hardware_disable, NULL, 0, 1);
+       on_each_cpu(hardware_disable, NULL, 0, 1);
        kvm_arch_ops->hardware_unsetup();
        kvm_arch_ops = NULL;
 }
diff --git a/include/linux/notifier.h b/include/linux/notifier.h
index 9431101..576f2bb 100644
--- a/include/linux/notifier.h
+++ b/include/linux/notifier.h
@@ -196,6 +196,8 @@ extern int __srcu_notifier_call_chain(struct 
srcu_notifier_head *nh,
 #define CPU_DEAD               0x0007 /* CPU (unsigned)v dead */
 #define CPU_LOCK_ACQUIRE       0x0008 /* Acquire all hotcpu locks */
 #define CPU_LOCK_RELEASE       0x0009 /* Release all hotcpu locks */
+#define CPU_DYING              0x000A /* CPU (unsigned)v not running any task,
+                                       * not handling interrupts, soon dead */
 
 /* Used for CPU hotplug events occuring while tasks are frozen due to a suspend
  * operation in progress
@@ -208,6 +210,7 @@ extern int __srcu_notifier_call_chain(struct 
srcu_notifier_head *nh,
 #define CPU_DOWN_PREPARE_FROZEN        (CPU_DOWN_PREPARE | CPU_TASKS_FROZEN)
 #define CPU_DOWN_FAILED_FROZEN (CPU_DOWN_FAILED | CPU_TASKS_FROZEN)
 #define CPU_DEAD_FROZEN                (CPU_DEAD | CPU_TASKS_FROZEN)
+#define CPU_DYING_FROZEN       (CPU_DYING | CPU_TASKS_FROZEN)
 
 #endif /* __KERNEL__ */
 #endif /* _LINUX_NOTIFIER_H */
diff --git a/include/linux/smp.h b/include/linux/smp.h
index 96ac21f..613edd2 100644
--- a/include/linux/smp.h
+++ b/include/linux/smp.h
@@ -7,6 +7,7 @@
  */
 
 #include <linux/errno.h>
+#include <asm/system.h>
 
 extern void cpu_idle(void);
 
@@ -61,6 +62,11 @@ int smp_call_function_single(int cpuid, void (*func) (void 
*info), void *info,
  * Call a function on all processors
  */
 int on_each_cpu(void (*func) (void *info), void *info, int retry, int wait);
+/*
+ * Call a function on one processor
+ */
+int on_cpu(int cpu, void (*func)(void *info), void *info,
+              int retry, int wait);
 
 #define MSG_ALL_BUT_SELF       0x8000  /* Assume <32768 CPU's */
 #define MSG_ALL                        0x8001
@@ -96,6 +102,16 @@ static inline int up_smp_call_function(void)
                local_irq_enable();             \
                0;                              \
        })
+
+static inline int on_cpu(int cpu, void (*func)(void *info), void *info,
+                            int retry, int wait)
+{
+       local_irq_disable();
+       func(info);
+       local_irq_enable();
+       return 0;
+}
+
 static inline void smp_send_reschedule(int cpu) { }
 #define num_booting_cpus()                     1
 #define smp_prepare_boot_cpu()                 do {} while (0)
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 208cf34..181ae70 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -103,11 +103,19 @@ static inline void check_for_tasks(int cpu)
        write_unlock_irq(&tasklist_lock);
 }
 
+struct take_cpu_down_param {
+       unsigned long mod;
+       void *hcpu;
+};
+
 /* Take this CPU down. */
-static int take_cpu_down(void *unused)
+static int take_cpu_down(void *_param)
 {
+       struct take_cpu_down_param *param = _param;
        int err;
 
+       raw_notifier_call_chain(&cpu_chain, CPU_DYING | param->mod,
+                               param->hcpu);
        /* Ensure this CPU doesn't handle any more interrupts. */
        err = __cpu_disable();
        if (err < 0)
@@ -127,6 +135,10 @@ static int _cpu_down(unsigned int cpu, int tasks_frozen)
        cpumask_t old_allowed, tmp;
        void *hcpu = (void *)(long)cpu;
        unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0;
+       struct take_cpu_down_param tcd_param = {
+               .mod = mod,
+               .hcpu = hcpu,
+       };
 
        if (num_online_cpus() == 1)
                return -EBUSY;
@@ -153,7 +165,7 @@ static int _cpu_down(unsigned int cpu, int tasks_frozen)
        set_cpus_allowed(current, tmp);
 
        mutex_lock(&cpu_bitmask_lock);
-       p = __stop_machine_run(take_cpu_down, NULL, cpu);
+       p = __stop_machine_run(take_cpu_down, &tcd_param, cpu);
        mutex_unlock(&cpu_bitmask_lock);
 
        if (IS_ERR(p) || cpu_online(cpu)) {
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index f57854b..584953a 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -2138,6 +2138,9 @@ static void common_cpu_mem_hotplug_unplug(void)
 static int cpuset_handle_cpuhp(struct notifier_block *nb,
                                unsigned long phase, void *cpu)
 {
+       if (phase == CPU_DYING || phase == CPU_DYING_FROZEN)
+               return NOTIFY_DONE;
+
        common_cpu_mem_hotplug_unplug();
        return 0;
 }
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 0b9886a..11666f7 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -658,4 +658,28 @@ int on_each_cpu(void (*func) (void *info), void *info, int 
retry, int wait)
        return ret;
 }
 EXPORT_SYMBOL(on_each_cpu);
+
+/*
+ * Call a function on one processor, which might be the currently executing
+ * processor.
+ */
+int on_cpu(int cpu, void (*func) (void *info), void *info,
+              int retry, int wait)
+{
+       int ret;
+       int this_cpu;
+
+       this_cpu = get_cpu();
+       if (this_cpu == cpu) {
+               local_irq_disable();
+               func(info);
+               local_irq_enable();
+               ret = 0;
+       } else
+               ret = smp_call_function_single(cpu, func, info, retry, wait);
+       put_cpu();
+       return ret;
+}
+EXPORT_SYMBOL(on_cpu);
+
 #endif
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to