SYSRQ-T on serial console can crash the machine.  This
is because a large amount of output is sent to a slow
device while interrupts are disabled.  The NMI
watchdog triggers.

The interrupt disabling happens in pc_keyb.c:keyboard_interrupt().
Changing this code to *not* disable interrupts looks complex.

I see two ways of fixing this.  One is to do the sysrq
stuff outside the spin_lock_irq(), with:

static void keyboard_interrupt(int irq, void *dev_id, struct pt_regs *regs)
{
+       extern void (*sysrq_handler)(void);
+       void (*my_sysrq_handler)(void);

        spin_lock_irq(&kbd_controller_lock);
        handle_kbd_event();
+       my_sysrq_handler = sysrq_handler;
+       sysrq_handler = 0;
        spin_unlock_irq(&kbd_controller_lock);
+       if (my_sysrq_handler)
+               (*my_sysrq_handler)();
}

But I didn't do that, because I suspect there are other
places in the kernel (development and debug stuff) where
we want to turn the NMI watchdog handler off for a while.

So this patch creates a new API function

        enable_nmi_watchdog(int yes);

and uses it within the sysrq code.

BTW: NMI watchdog is now disabled by default in 2.4.3-pre3.
The `nmi_watchdog=1' boot option is needed to enable it.



--- linux-2.4.2-ac16/include/linux/irq.h        Fri Mar  9 17:11:17 2001
+++ linux-ac/include/linux/irq.h        Sat Mar 10 01:02:12 2001
@@ -56,6 +56,20 @@
 
 #include <asm/hw_irq.h> /* the arch dependent stuff */
 
+/**
+ * enable_nmi_watchdog - enables/disables NMI watchdog checking.
+ * @yes: If zero, disable
+ * 
+ * If the architecture supports the NMI watchdog, enable_nmi_watchdog() may be used
+ * to temporarily disable it.  Calls to enable_nmi_watchdog() may be nested - it is
+ * implemented as an up/down counter, so the calls must be balanced.
+ */
+#ifdef ARCH_HAS_NMI_WATCHDOG
+extern void enable_nmi_watchdog(int yes);
+#else
+#define enable_nmi_watchdog(yes) do{} while(0)
+#endif
+
 extern int handle_IRQ_event(unsigned int, struct pt_regs *, struct irqaction *);
 extern int setup_irq(unsigned int , struct irqaction * );
 
--- linux-2.4.2-ac16/include/asm-i386/irq.h     Fri Oct  8 03:17:09 1999
+++ linux-ac/include/asm-i386/irq.h     Fri Mar  9 22:59:15 2001
@@ -32,4 +32,8 @@
 extern void disable_irq_nosync(unsigned int);
 extern void enable_irq(unsigned int);
 
+#ifdef CONFIG_X86_LOCAL_APIC
+#define ARCH_HAS_NMI_WATCHDOG          /* See include/linux/irq.h */
+#endif
+
 #endif /* _ASM_IRQ_H */
--- linux-2.4.2-ac16/drivers/char/sysrq.c       Sun Feb 25 17:37:04 2001
+++ linux-ac/drivers/char/sysrq.c       Fri Mar  9 23:00:39 2001
@@ -23,6 +23,7 @@
 #include <linux/quotaops.h>
 #include <linux/smp_lock.h>
 #include <linux/module.h>
+#include <linux/irq.h>
 
 #include <asm/ptrace.h>
 
@@ -69,6 +70,11 @@
        if (!key)
                return;
 
+       /*
+        * Interrupts are disabled, and serial consoles are slow. So
+        * Let's suspend the NMI watchdog.
+        */
+       enable_nmi_watchdog(0);
        console_loglevel = 7;
        printk(KERN_INFO "SysRq: ");
        switch (key) {
@@ -152,6 +158,7 @@
                /* Don't use 'A' as it's handled specially on the Sparc */
        }
 
+       enable_nmi_watchdog(1);
        console_loglevel = orig_log_level;
 }
 
--- linux-2.4.2-ac16/arch/i386/kernel/nmi.c     Fri Mar  9 17:10:51 2001
+++ linux-ac/arch/i386/kernel/nmi.c     Sat Mar 10 01:10:50 2001
@@ -226,6 +226,15 @@
 }
 
 static spinlock_t nmi_print_lock = SPIN_LOCK_UNLOCKED;
+static atomic_t nmi_watchdog_enabled = ATOMIC_INIT(0); /* 0 == enabled */
+
+void enable_nmi_watchdog(int yes)
+{
+       if (yes)
+               atomic_inc(&nmi_watchdog_enabled);
+       else
+               atomic_dec(&nmi_watchdog_enabled);
+}
 
 void nmi_watchdog_tick (struct pt_regs * regs)
 {
@@ -255,7 +264,7 @@
 
        sum = apic_timer_irqs[cpu];
 
-       if (last_irq_sums[cpu] == sum) {
+       if (last_irq_sums[cpu] == sum && atomic_read(&nmi_watchdog_enabled) == 0) {
                /*
                 * Ayiee, looks like this CPU is stuck ...
                 * wait a few IRQs (5 seconds) before doing the oops ...
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to