On Mon, 30 Jul 2001 16:52:27 +0200, Sheldon Hearn wrote:

> I'll be a lot happier when I can enabled DDB_UNATTENDED and do whatever
> it is that causes my panic of the day and actually get a crashdump
> instead of
> 
>       panic: witness_restore: lock (sleep mutex) Giant not locked

Right!  We have interesting progress!

The following patchset fixes two problems:

1) The witness code may still panic when panicstr is not NULL.  This is
   a problem when the original panic was caused by the witness code
   itself.

   Solution: when panicstr != NULL, witness backs off on the fascism.

2) The ata code calls await/mawait inside a dump.  This results in a
   hard, uninterruptable lock in ad_reinit().  An NMI might interrupt
   it, but not all of us have NMI boards.

   Solution: as a quick fix, when panicstr != NULL, bail out of
   masleep() without spinning on mutexes.

   The real solution here is arguably for the ata code to be aware of
   the fact that sleeping inside a dump (i.e. panicstr != NULL) is bad
   and stop doing so.

With these fixes in place, I can get a crashdump with a populated
ktr_buf from a witness-related panic.  Joy!

Many thanks to jhb for providing the patch for #1 above and to peter for
the patch for #2 above.  None of this is actually my own work.  I'm just
the guy pressing the panic button on his box incessantly. :-)

Ciao,
Sheldon.

Index: kern/kern_mutex.c
===================================================================
RCS file: /home/ncvs/src/sys/kern/kern_mutex.c,v
retrieving revision 1.64
diff -u -d -r1.64 kern_mutex.c
--- kern/kern_mutex.c   2001/06/25 18:29:32     1.64
+++ kern/kern_mutex.c   2001/07/30 23:14:32
@@ -562,6 +562,9 @@
 void
 _mtx_assert(struct mtx *m, int what, const char *file, int line)
 {
+
+       if (panicstr != NULL)
+               return;
        switch (what) {
        case MA_OWNED:
        case MA_OWNED | MA_RECURSED:
Index: kern/kern_synch.c
===================================================================
RCS file: /home/ncvs/src/sys/kern/kern_synch.c,v
retrieving revision 1.148
diff -u -d -r1.148 kern_synch.c
--- kern/kern_synch.c   2001/07/06 01:16:42     1.148
+++ kern/kern_synch.c   2001/07/31 01:07:23
@@ -554,6 +554,18 @@
        KASSERT(timo > 0 || mtx_owned(&Giant) || mtx != NULL,
            ("sleeping without a mutex"));
        mtx_lock_spin(&sched_lock);
+       if (cold || panicstr) {
+               /*
+                * After a panic, or during autoconfiguration,
+                * just give interrupts a chance, then just return;
+                * don't run any other procs or panic below,
+                * in case this is the idle process and already asleep.
+                */
+               if (mtx != NULL && priority & PDROP)
+                       mtx_unlock_flags(mtx, MTX_NOSWITCH);
+               mtx_unlock_spin(&sched_lock);
+               return (0);
+       }
        DROP_GIANT_NOSWITCH();
        if (mtx != NULL) {
                mtx_assert(mtx, MA_OWNED | MA_NOTRECURSED);
Index: kern/subr_trap.c
===================================================================
RCS file: /home/ncvs/src/sys/kern/subr_trap.c,v
retrieving revision 1.196
diff -u -d -r1.196 subr_trap.c
--- kern/subr_trap.c    2001/07/04 15:36:30     1.196
+++ kern/subr_trap.c    2001/07/30 23:14:42
@@ -72,9 +72,9 @@
        while ((sig = CURSIG(p)) != 0)
                postsig(sig);
        mtx_unlock(&Giant);
+       PROC_UNLOCK(p);
 
        mtx_lock_spin(&sched_lock);
-       PROC_UNLOCK_NOSWITCH(p);
        p->p_pri.pri_level = p->p_pri.pri_user;
        if (resched_wanted(p)) {
                /*
@@ -96,24 +96,22 @@
                while ((sig = CURSIG(p)) != 0)
                        postsig(sig);
                mtx_unlock(&Giant);
-               mtx_lock_spin(&sched_lock);
-               PROC_UNLOCK_NOSWITCH(p);
-       }
+               PROC_UNLOCK(p);
+       } else
+               mtx_unlock_spin(&sched_lock);
 
        /*
         * Charge system time if profiling.
         */
-       if (p->p_sflag & PS_PROFIL) {
-               mtx_unlock_spin(&sched_lock);
+       if (p->p_sflag & PS_PROFIL)
                addupc_task(p, TRAPF_PC(frame),
                            (u_int)(p->p_sticks - oticks) * psratio);
-       } else
-               mtx_unlock_spin(&sched_lock);
 }
 
 /*
  * Process an asynchronous software trap.
  * This is relatively easy.
+ * This function will return with interrupts disabled.
  */
 void
 ast(framep)
@@ -121,68 +119,64 @@
 {
        struct proc *p = CURPROC;
        u_quad_t sticks;
+       critical_t s;
+       int sflag;
 #if defined(DEV_NPX) && !defined(SMP)
        int ucode;
 #endif
 
        KASSERT(TRAPF_USERMODE(framep), ("ast in kernel mode"));
-
-       /*
-        * We check for a pending AST here rather than in the assembly as
-        * acquiring and releasing mutexes in assembly is not fun.
-        */
-       mtx_lock_spin(&sched_lock);
-       if (!(astpending(p) || resched_wanted(p))) {
-               mtx_unlock_spin(&sched_lock);
-               return;
-       }
-
-       sticks = p->p_sticks;
-       p->p_frame = framep;
-
-       astoff(p);
-       cnt.v_soft++;
-       mtx_intr_enable(&sched_lock);
-       if (p->p_sflag & PS_OWEUPC) {
-               p->p_sflag &= ~PS_OWEUPC;
-               mtx_unlock_spin(&sched_lock);
-               mtx_lock(&Giant);
-               addupc_task(p, p->p_stats->p_prof.pr_addr,
-                           p->p_stats->p_prof.pr_ticks);
+       s = critical_enter();
+       while (astpending(p) || resched_wanted(p)) {
+               critical_exit(s);
+               p->p_frame = framep;
+               /*
+                * This updates the p_sflag's for the checks below in one
+                * "atomic" operation with turning off the astpending flag.
+                * If another AST is triggered while we are handling the
+                * AST's saved in sflag, the astpending flag will be set and
+                * we will loop again.
+                */
                mtx_lock_spin(&sched_lock);
-       }
-       if (p->p_sflag & PS_ALRMPEND) {
-               p->p_sflag &= ~PS_ALRMPEND;
+               sticks = p->p_sticks;
+               sflag = p->p_sflag;
+               p->p_sflag &= ~(PS_OWEUPC | PS_ALRMPEND | PS_PROFPEND);
+               astoff(p);
+               cnt.v_soft++;
                mtx_unlock_spin(&sched_lock);
-               PROC_LOCK(p);
-               psignal(p, SIGVTALRM);
-               PROC_UNLOCK(p);
-               mtx_lock_spin(&sched_lock);
-       }
+               if (sflag & PS_OWEUPC)
+                       addupc_task(p, p->p_stats->p_prof.pr_addr,
+                                   p->p_stats->p_prof.pr_ticks);
+               if (sflag & PS_ALRMPEND) {
+                       PROC_LOCK(p);
+                       psignal(p, SIGVTALRM);
+                       PROC_UNLOCK(p);
+               }
 #if defined(DEV_NPX) && !defined(SMP)
-       if (PCPU_GET(curpcb)->pcb_flags & PCB_NPXTRAP) {
-               PCPU_GET(curpcb)->pcb_flags &= ~PCB_NPXTRAP;
-               mtx_unlock_spin(&sched_lock);
-               ucode = npxtrap();
-               if (ucode != -1) {
-                       if (!mtx_owned(&Giant))
-                               mtx_lock(&Giant);
-                       trapsignal(p, SIGFPE, ucode);
+               /*
+                * XXX: Does this need a lock?  So long as npxtrap's only
+                * happen due to a userland fault, we don't need to worry
+                * about nested faults while in a kernel context.
+                */
+               if (PCPU_GET(curpcb)->pcb_flags & PCB_NPXTRAP) {
+                       PCPU_GET(curpcb)->pcb_flags &= ~PCB_NPXTRAP;
+                       ucode = npxtrap();
+                       if (ucode != -1) {
+                               if (!mtx_owned(&Giant))
+                                       mtx_lock(&Giant);
+                               trapsignal(p, SIGFPE, ucode);
+                       }
                }
-               mtx_lock_spin(&sched_lock);
-       }
 #endif
-       if (p->p_sflag & PS_PROFPEND) {
-               p->p_sflag &= ~PS_PROFPEND;
-               mtx_unlock_spin(&sched_lock);
-               PROC_LOCK(p);
-               psignal(p, SIGPROF);
-               PROC_UNLOCK(p);
-       } else
-               mtx_unlock_spin(&sched_lock);
-
-       userret(p, framep, sticks);
+               if (sflag & PS_PROFPEND) {
+                       PROC_LOCK(p);
+                       psignal(p, SIGPROF);
+                       PROC_UNLOCK(p);
+               }
 
-       if (mtx_owned(&Giant))
-               mtx_unlock(&Giant);
+               userret(p, framep, sticks);
+               if (mtx_owned(&Giant))
+                       mtx_unlock(&Giant);
+               s = critical_enter();
+       }
 }
Index: kern/subr_witness.c
===================================================================
RCS file: /home/ncvs/src/sys/kern/subr_witness.c,v
retrieving revision 1.80
diff -u -d -r1.80 subr_witness.c
--- kern/subr_witness.c 2001/07/20 23:29:25     1.80
+++ kern/subr_witness.c 2001/07/30 23:17:52
@@ -355,7 +355,7 @@
        if (lock_cur_cnt > lock_max_cnt)
                lock_max_cnt = lock_cur_cnt;
        mtx_unlock(&all_mtx);
-       if (!witness_cold && !witness_dead &&
+       if (!witness_cold && !witness_dead && panicstr == NULL &&
            (lock->lo_flags & LO_WITNESS) != 0)
                lock->lo_witness = enroll(lock->lo_name, class);
        else
@@ -469,7 +469,7 @@
 #endif /* DDB */
 
        if (witness_cold || witness_dead || lock->lo_witness == NULL ||
-           panicstr)
+           panicstr != NULL)
                return;
        w = lock->lo_witness;
        class = lock->lo_class;
@@ -723,7 +723,7 @@
        int i, j;
 
        if (witness_cold || witness_dead || lock->lo_witness == NULL ||
-           panicstr)
+           panicstr != NULL)
                return;
        p = curproc;
        class = lock->lo_class;
@@ -821,7 +821,7 @@
        critical_t savecrit;
        int i, n;
 
-       if (witness_dead || panicstr)
+       if (witness_dead || panicstr != NULL)
                return (0);
        KASSERT(!witness_cold, ("%s: witness_cold\n", __func__));
        n = 0;
@@ -872,7 +872,7 @@
 {
        struct witness *w;
 
-       if (!witness_watch || witness_dead)
+       if (!witness_watch || witness_dead || panicstr != NULL)
                return (NULL);
 
        if ((lock_class->lc_flags & LC_SPINLOCK) && witness_skipspin)
@@ -1309,7 +1309,7 @@
        struct lock_instance *instance;
 
        KASSERT(!witness_cold, ("%s: witness_cold\n", __func__));
-       if (lock->lo_witness == NULL || witness_dead)
+       if (lock->lo_witness == NULL || witness_dead || panicstr != NULL)
                return;
 
        KASSERT(lock->lo_class->lc_flags & LC_SLEEPLOCK,
@@ -1329,7 +1329,7 @@
        struct lock_instance *instance;
 
        KASSERT(!witness_cold, ("%s: witness_cold\n", __func__));
-       if (lock->lo_witness == NULL || witness_dead)
+       if (lock->lo_witness == NULL || witness_dead || panicstr != NULL)
                return;
 
        KASSERT(lock->lo_class->lc_flags & LC_SLEEPLOCK,
@@ -1351,7 +1351,7 @@
 #ifdef INVARIANT_SUPPORT
        struct lock_instance *instance;
 
-       if (lock->lo_witness == NULL || witness_dead)
+       if (lock->lo_witness == NULL || witness_dead || panicstr != NULL)
                return;
 
        if ((lock->lo_class->lc_flags & LC_SLEEPLOCK) != 0)

To Unsubscribe: send mail to [EMAIL PROTECTED]
with "unsubscribe freebsd-current" in the body of the message

Reply via email to