Ingo,

Doing some more tests on the patch, I found that I don't like the
placement in sched.c of the touch_light_softlockup_watchdog. I figure
that it may be better to put in into __schedule instead.  This really
shows where a task is taken off the run queue.

-- Steve

New patch: I've only tested this with the deadlock modules, and not with
the kjournald case. I'll do that tonight when I'm no longer needing my
computer for other tests.

Signed-off-by: Steven Rostedt <[EMAIL PROTECTED]>

Index: linux_realtime_ernie/kernel/irq/handle.c
===================================================================
--- linux_realtime_ernie/kernel/irq/handle.c    (revision 266)
+++ linux_realtime_ernie/kernel/irq/handle.c    (working copy)
@@ -177,6 +177,14 @@
         */
        local_irq_save(flags);
 #endif
+       /*
+        * If the task is currently running in user mode, don't 
+        * detect soft lockups.  If CONFIG_DETECT_SOFTLOCKUP is not
+        * configured, this should be optimized out.
+        */
+       if (user_mode(regs))
+               touch_light_softlockup_watchdog();
+
        kstat_this_cpu.irqs[irq]++;
        if (desc->status & IRQ_PER_CPU) {
                irqreturn_t action_ret;
Index: linux_realtime_ernie/kernel/sched.c
===================================================================
--- linux_realtime_ernie/kernel/sched.c (revision 266)
+++ linux_realtime_ernie/kernel/sched.c (working copy)
@@ -3209,6 +3209,7 @@
                else {
                        if (prev->state == TASK_UNINTERRUPTIBLE)
                                rq->nr_uninterruptible++;
+                       touch_light_softlockup_watchdog();
                        deactivate_task(prev, rq);
                }
        }
Index: linux_realtime_ernie/kernel/softlockup.c
===================================================================
--- linux_realtime_ernie/kernel/softlockup.c    (revision 269)
+++ linux_realtime_ernie/kernel/softlockup.c    (working copy)
@@ -3,6 +3,10 @@
  *
  * started by Ingo Molnar, (C) 2005, Red Hat
  *
+ * Steven Rostedt, Kihon Technologies Inc.
+ *   Added light softlockup detection off of what Daniel Walker of
+ *   MontaVista started.
+ *
  * this code detects soft lockups: incidents in where on a CPU
  * the kernel does not reschedule for 10 seconds or more.
  */
@@ -20,9 +24,7 @@
 static DEFINE_PER_CPU(unsigned long, timeout) = INITIAL_JIFFIES;
 static DEFINE_PER_CPU(unsigned long, timestamp) = INITIAL_JIFFIES;
 static DEFINE_PER_CPU(unsigned long, print_timestamp) = INITIAL_JIFFIES;
-static DEFINE_PER_CPU(struct task_struct *, prev_task);
 static DEFINE_PER_CPU(struct task_struct *, watchdog_task);
-static DEFINE_PER_CPU(unsigned long, task_counter);
 
 static int did_panic = 0;
 static int softlock_panic(struct notifier_block *this, unsigned long event,
@@ -42,6 +44,11 @@
        per_cpu(timestamp, raw_smp_processor_id()) = jiffies;
 }
 
+void touch_light_softlockup_watchdog(void)
+{
+       current->softlockup_count = 0;
+}
+
 /*
  * This callback runs from the timer interrupt, and checks
  * whether the watchdog thread has hung or not:
@@ -59,24 +66,20 @@
                if (!per_cpu(watchdog_task, this_cpu))
                        return;
 
-               if (per_cpu(prev_task, this_cpu) != current || 
-                       !rt_task(current)) {
-                       per_cpu(prev_task, this_cpu) = current;
-                       per_cpu(task_counter, this_cpu) = 0;
-               }
-               else if ((++per_cpu(task_counter, this_cpu) > 10) && 
printk_ratelimit()) {
-
-                       spin_lock(&print_lock);
-                       printk(KERN_ERR "BUG: possible soft lockup detected on 
CPU#%u! %lu-%lu(%lu)\n",
-                               this_cpu, jiffies, timestamp, timeout);
-                       printk("curr=%s:%d\n",current->comm,current->pid);
-                       
-                       dump_stack();
+               if (current->pid) {
+                       if (++current->softlockup_count > 10) {
+                               spin_lock(&print_lock);
+                               printk(KERN_ERR "BUG: possible soft lockup 
detected on CPU#%u! %lu-%lu(%lu)\n",
+                                      this_cpu, jiffies, timestamp, timeout);
+                               printk("curr=%s:%d 
count=%ld\n",current->comm,current->pid,
+                                      current->softlockup_count);
+                               dump_stack();
 #if defined(__i386__) && defined(CONFIG_SMP)
-                       nmi_show_all_regs();
+                               nmi_show_all_regs();
 #endif
-                       spin_unlock(&print_lock);
-                       per_cpu(task_counter, this_cpu) = 0;
+                               spin_unlock(&print_lock);
+                               touch_light_softlockup_watchdog();
+                       }
                }
 
                wake_up_process(per_cpu(watchdog_task, this_cpu));
@@ -101,7 +104,6 @@
                nmi_show_all_regs();
 #endif
                spin_unlock(&print_lock);
-               per_cpu(task_counter, this_cpu) = 0;
        }
 }
 
Index: linux_realtime_ernie/include/linux/sched.h
===================================================================
--- linux_realtime_ernie/include/linux/sched.h  (revision 266)
+++ linux_realtime_ernie/include/linux/sched.h  (working copy)
@@ -307,6 +307,7 @@
 extern void softlockup_tick(void);
 extern void spawn_softlockup_task(void);
 extern void touch_softlockup_watchdog(void);
+extern void touch_light_softlockup_watchdog(void);
 #else
 static inline void softlockup_tick(void)
 {
@@ -317,6 +318,9 @@
 static inline void touch_softlockup_watchdog(void)
 {
 }
+static inline void touch_light_softlockup_watchdog(void)
+{
+}
 #endif
 
 /* Attach to any functions which should be ignored in wchan output. */
@@ -898,6 +902,12 @@
 #ifdef CONFIG_DEBUG_PREEMPT
        int lock_count;
 #endif
+#ifdef CONFIG_DETECT_SOFTLOCKUP
+       unsigned long   softlockup_count; /* Count to keep track how long the
+                                          *  thread is in the kernel without
+                                          *  sleeping.
+                                          */
+#endif
        /* realtime bits */
        struct list_head delayed_put;
        struct plist pi_waiters;


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to