NOTES:

    I would like to thank Bruce for supplying the sample code that allowed
    me to do this in a day instead of several days.

    * debug.critical_mode sysctl.  This will not be in the final commit, 
      nor will any of the code that tests the variable.  The final commit
      will use code as if the critical_mode were '1'.

      The default is 1, which means to use the new streamlined
      interrupt and cpu_critical_enter/exit() code.   Setting it to 0
      will revert to the old hard-interrupt-disablement operation.  You
      can change the mode at any time.

    * Additional cpu_critical_enter/exit() calls around icu_lock.  Since
      critical_enter() no longer disables interrupts, special care must
      be taken when dealing with the icu_lock spin mutex because it is
      the one thing the interrupt code needs to be able to defer the
      interrupt.

    * MACHINE_CRITICAL_ENTER define.   This exists to maintain compatibility
      with other architectures.  i386 defines this to cause fork_exit to
      use the new API and to allow the i386 MD code to supply the
      critical_enter() and critical_exit() procedures rather then
      kern_switch.c

      I would much prefer it if the other architectures were brought around
      to use this new mechanism.  The old mechanism makes assumptions 
      in regards to hard disablement that is no longer correct for i386.

    * Trampoline 'sti'.  In the final commit, the trampoline will simply
      'sti' after setting up td_critnest.  The other junk to handle the
      hard-disablement case will be gone.

    * PSL save/restore in cpu_switch().  In the original code interrupts
      were always hard-disabled due to holding the sched_lock.  cpu_switch
      never bothered to save/restore the hard interrupt enable/disable
      bit (the PSL).  In the new code, hard disablement has no relationship
      to the holding of spin mutexes and so we have to save/restore the
      PSL.  If we don't, one thread's interrupt disablement will propogate
      to another thread unexpectedly.

    * Additional STI's.  It may be possible to emplace additional STI's
      in the code.  For example, we should be able to enable interrupts
      in the dounpend() code after we complete processing of FAST
      interrupts and start processing normal interrupts.

    * Additional cpu_critical_enter()/exit() calls in CY and TIMER code.
      Bruce had additional hard interrupt disablements in these modules.

      I'm not sure why so if I need to do that as well I would like to
      know.

    * Additional optimization and work.  This is ongoing work but this
      basic patch set, with some cleanups, is probably what I will
      commit initially.  This code will give us a huge amount of 
      flexibility in regards to handling interrupts.

                                                -Matt


Index: i386/i386/exception.s
===================================================================
RCS file: /home/ncvs/src/sys/i386/i386/exception.s,v
retrieving revision 1.91
diff -u -r1.91 exception.s
--- i386/i386/exception.s       11 Feb 2002 03:41:58 -0000      1.91
+++ i386/i386/exception.s       24 Feb 2002 08:41:40 -0000
@@ -222,6 +222,18 @@
        pushl   %esp                    /* trapframe pointer */
        pushl   %ebx                    /* arg1 */
        pushl   %esi                    /* function */
+       movl    PCPU(CURTHREAD),%ebx    /* setup critnest */
+       movl    $1,TD_CRITNEST(%ebx)
+       cmpl    $0,critical_mode
+       jne     1f
+       pushfl
+       popl    TD_SAVECRIT(%ebx)
+       orl     $PSL_I,TD_SAVECRIT(%ebx)
+       jmp     2f
+1:
+       movl    $-1,TD_SAVECRIT(%ebx)
+       sti                             /* enable interrupts */
+2:
        call    fork_exit
        addl    $12,%esp
        /* cut from syscall */
Index: i386/i386/genassym.c
===================================================================
RCS file: /home/ncvs/src/sys/i386/i386/genassym.c,v
retrieving revision 1.121
diff -u -r1.121 genassym.c
--- i386/i386/genassym.c        17 Feb 2002 17:40:27 -0000      1.121
+++ i386/i386/genassym.c        24 Feb 2002 09:06:56 -0000
@@ -89,6 +89,8 @@
 ASSYM(TD_KSE, offsetof(struct thread, td_kse));
 ASSYM(TD_PROC, offsetof(struct thread, td_proc));
 ASSYM(TD_INTR_NESTING_LEVEL, offsetof(struct thread, td_intr_nesting_level));
+ASSYM(TD_CRITNEST, offsetof(struct thread, td_critnest));
+ASSYM(TD_SAVECRIT, offsetof(struct thread, td_savecrit));
 
 ASSYM(P_MD, offsetof(struct proc, p_md));
 ASSYM(MD_LDT, offsetof(struct mdproc, md_ldt));
@@ -134,6 +136,7 @@
 ASSYM(PCB_DR3, offsetof(struct pcb, pcb_dr3));
 ASSYM(PCB_DR6, offsetof(struct pcb, pcb_dr6));
 ASSYM(PCB_DR7, offsetof(struct pcb, pcb_dr7));
+ASSYM(PCB_PSL, offsetof(struct pcb, pcb_psl));
 ASSYM(PCB_DBREGS, PCB_DBREGS);
 ASSYM(PCB_EXT, offsetof(struct pcb, pcb_ext));
 
@@ -176,6 +179,10 @@
 ASSYM(PC_SIZEOF, sizeof(struct pcpu));
 ASSYM(PC_PRVSPACE, offsetof(struct pcpu, pc_prvspace));
 ASSYM(PC_CURTHREAD, offsetof(struct pcpu, pc_curthread));
+ASSYM(PC_INT_PENDING, offsetof(struct pcpu, pc_int_pending));
+ASSYM(PC_IPENDING, offsetof(struct pcpu, pc_ipending));
+ASSYM(PC_FPENDING, offsetof(struct pcpu, pc_fpending));
+ASSYM(PC_SPENDING, offsetof(struct pcpu, pc_spending));
 ASSYM(PC_FPCURTHREAD, offsetof(struct pcpu, pc_fpcurthread));
 ASSYM(PC_IDLETHREAD, offsetof(struct pcpu, pc_idlethread));
 ASSYM(PC_CURPCB, offsetof(struct pcpu, pc_curpcb));
Index: i386/i386/machdep.c
===================================================================
RCS file: /home/ncvs/src/sys/i386/i386/machdep.c,v
retrieving revision 1.497
diff -u -r1.497 machdep.c
--- i386/i386/machdep.c 17 Feb 2002 17:40:28 -0000      1.497
+++ i386/i386/machdep.c 24 Feb 2002 19:04:20 -0000
@@ -138,6 +138,8 @@
 #endif /* CPU_ENABLE_SSE */
 SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL)
 
+void unpend(void);     /* note: not static */
+
 int    _udatasel, _ucodesel;
 u_int  atdevbase;
 
@@ -148,6 +150,9 @@
 SYSCTL_INT(_debug, OID_AUTO, tlb_flush_count,
        CTLFLAG_RD, &tlb_flush_count, 0, "");
 #endif
+int critical_mode = 1;
+SYSCTL_INT(_debug, OID_AUTO, critical_mode,
+       CTLFLAG_RW, &critical_mode, 0, "");
 
 #ifdef PC98
 static int     ispc98 = 1;
@@ -270,6 +275,121 @@
 }
 
 /*
+ * Critical section handling.
+ *
+ *     Note that our interrupt code handles any interrupt race that occurs
+ *     after we decrement td_critnest.
+ */
+void
+critical_enter(void)
+{
+       struct thread *td = curthread;
+
+       if (critical_mode == 0) {
+               if (td->td_critnest == 0)
+                       td->td_savecrit = cpu_critical_enter();
+               td->td_critnest++;
+       } else {
+               ++td->td_critnest;
+       }
+}
+
+void
+critical_exit(void)
+{
+       struct thread *td = curthread;
+       KASSERT(td->td_critnest > 0, ("bad td_critnest value!"));
+       if (--td->td_critnest == 0) {
+               if (td->td_savecrit != (critical_t)-1) {
+                       cpu_critical_exit(td->td_savecrit);
+                       td->td_savecrit = (critical_t)-1;
+               } else {
+               /*
+                * We may have to schedule pending interrupts.  Create
+                * conditions similar to an interrupt context and call
+                * unpend().
+                */
+               if (PCPU_GET(int_pending) && td->td_intr_nesting_level == 0) {
+                       critical_t eflags;
+
+                       eflags = cpu_critical_enter();
+                       if (PCPU_GET(int_pending)) {
+                               ++td->td_intr_nesting_level;
+                               unpend();
+                               --td->td_intr_nesting_level;
+                       }
+                       cpu_critical_exit(eflags);
+               }
+               }
+       }
+}
+
+/*
+ * Called from critical_exit() or called from the assembly vector code
+ * to process any interrupts which may have occured while we were in
+ * a critical section.
+ *
+ *     - interrupts must be disabled
+ *     - td_intr_nesting_level may not be 0
+ *     - td_critnest must be 0
+ */
+void
+unpend(void)
+{
+       curthread->td_critnest = 1;
+       for (;;) {
+               u_int32_t mask;
+
+               /*
+                * Fast interrupts have priority
+                */
+               if ((mask = PCPU_GET(fpending)) != 0) {
+                       int irq = bsfl(mask);
+                       PCPU_SET(fpending, mask & ~(1 << irq));
+                       call_fast_unpend(irq);
+                       continue;
+               }
+
+               /*
+                * Threaded interrupts come next
+                */
+               if ((mask = PCPU_GET(ipending)) != 0) {
+                       int irq = bsfl(mask);
+                       PCPU_SET(ipending, mask & ~(1 << irq));
+                       sched_ithd((void *)irq);
+                       continue;
+               }
+
+               /*
+                * Software interrupts and delayed IPIs are last
+                *
+                * XXX give the bits #defined names.  see also
+                * isa/xxx_vector.s
+                */
+               if ((mask = PCPU_GET(spending)) != 0) {
+                       int irq = bsfl(mask);
+                       PCPU_SET(spending, mask & ~(1 << irq));
+                       switch(irq) {
+                       case 0:         /* bit 0 - hardclock */
+                               mtx_lock_spin(&sched_lock);
+                               hardclock_process(curthread, 0);
+                               mtx_unlock_spin(&sched_lock);
+                               break;
+                       case 1:         /* bit 1 - statclock */
+                               mtx_lock_spin(&sched_lock);
+                               statclock_process(curthread->td_kse, 
+(register_t)unpend, 0);
+                               mtx_unlock_spin(&sched_lock);
+                               break;
+                       }
+                       continue;
+               }
+               break;
+       }
+       PCPU_SET(int_pending, 0);
+       curthread->td_critnest = 0;
+}
+
+/*
  * Send an interrupt to process.
  *
  * Stack is set up to allow sigcode stored
@@ -1732,12 +1852,17 @@
 
        /*
         * Initialize mutexes.
+        *
+        * icu_lock: in order to allow an interrupt to occur in a critical
+        *           section, to set pcpu->ipending (etc...) properly, we
+        *           must be able to get the icu lock, so it can't be
+        *           under witness.
         */
        mtx_init(&Giant, "Giant", MTX_DEF | MTX_RECURSE);
        mtx_init(&sched_lock, "sched lock", MTX_SPIN | MTX_RECURSE);
        mtx_init(&proc0.p_mtx, "process lock", MTX_DEF);
        mtx_init(&clock_lock, "clk", MTX_SPIN | MTX_RECURSE);
-       mtx_init(&icu_lock, "icu", MTX_SPIN);
+       mtx_init(&icu_lock, "icu", MTX_SPIN | MTX_NOWITNESS);
        mtx_lock(&Giant);
 
        /* make ldt memory segments */
Index: i386/i386/mp_machdep.c
===================================================================
RCS file: /home/ncvs/src/sys/i386/i386/mp_machdep.c,v
retrieving revision 1.174
diff -u -r1.174 mp_machdep.c
--- i386/i386/mp_machdep.c      22 Feb 2002 13:31:55 -0000      1.174
+++ i386/i386/mp_machdep.c      24 Feb 2002 08:09:50 -0000
@@ -2306,6 +2306,9 @@
 /*
  * For statclock, we send an IPI to all CPU's to have them call this
  * function.
+ *
+ * WARNING! unpend() will call statclock_process() directly and skip this
+ * routine.
  */
 void
 forwarded_statclock(struct trapframe frame)
@@ -2337,6 +2340,9 @@
  * sched_lock if we could simply peek at the CPU to determine the user/kernel
  * state and call hardclock_process() on the CPU receiving the clock interrupt
  * and then just use a simple IPI to handle any ast's if needed.
+ *
+ * WARNING! unpend() will call hardclock_process() directly and skip this
+ * routine.
  */
 void
 forwarded_hardclock(struct trapframe frame)
Index: i386/i386/mpapic.c
===================================================================
RCS file: /home/ncvs/src/sys/i386/i386/mpapic.c,v
retrieving revision 1.52
diff -u -r1.52 mpapic.c
--- i386/i386/mpapic.c  5 Jan 2002 06:44:27 -0000       1.52
+++ i386/i386/mpapic.c  24 Feb 2002 10:49:23 -0000
@@ -190,6 +190,7 @@
        u_int32_t       target;         /* the window register is 32 bits */
        u_int32_t       vector;         /* the window register is 32 bits */
        int             level;
+       critical_t      crit;
 
        target = IOART_DEST;
 
@@ -210,11 +211,13 @@
         * shouldn't and stop the carnage.
         */
        vector = NRSVIDT + pin;                 /* IDT vec */
+       crit = cpu_critical_enter();
        mtx_lock_spin(&icu_lock);
        io_apic_write(apic, select,
                      (io_apic_read(apic, select) & ~IOART_INTMASK 
                       & ~0xff)|IOART_INTMSET|vector);
        mtx_unlock_spin(&icu_lock);
+       cpu_critical_exit(crit);
        
        /* we only deal with vectored INTs here */
        if (apic_int_type(apic, pin) != 0)
@@ -258,10 +261,12 @@
                printf("IOAPIC #%d intpin %d -> irq %d\n",
                       apic, pin, irq);
        vector = NRSVIDT + irq;                 /* IDT vec */
+       crit = cpu_critical_enter();
        mtx_lock_spin(&icu_lock);
        io_apic_write(apic, select, flags | vector);
        io_apic_write(apic, select + 1, target);
        mtx_unlock_spin(&icu_lock);
+       cpu_critical_exit(crit);
 }
 
 int
Index: i386/i386/swtch.s
===================================================================
RCS file: /home/ncvs/src/sys/i386/i386/swtch.s,v
retrieving revision 1.128
diff -u -r1.128 swtch.s
--- i386/i386/swtch.s   7 Feb 2002 22:40:34 -0000       1.128
+++ i386/i386/swtch.s   24 Feb 2002 09:09:05 -0000
@@ -96,6 +96,8 @@
        movl    %esi,PCB_ESI(%edx)
        movl    %edi,PCB_EDI(%edx)
        movl    %gs,PCB_GS(%edx)
+       pushfl                                  /* PSL */
+       popl    PCB_PSL(%edx)
 
        /* Test if debug registers should be saved. */
        testl   $PCB_DBREGS,PCB_FLAGS(%edx)
@@ -233,6 +235,8 @@
        movl    PCB_EDI(%edx),%edi
        movl    PCB_EIP(%edx),%eax
        movl    %eax,(%esp)
+       pushl   PCB_PSL(%edx)
+       popfl
 
 #if defined(SMP) && defined(GRAB_LOPRIO)
        /* Hold LOPRIO for interrupts. */
@@ -339,6 +343,8 @@
        movl    %esi,PCB_ESI(%ecx)
        movl    %edi,PCB_EDI(%ecx)
        movl    %gs,PCB_GS(%ecx)
+       pushfl
+       popl    PCB_PSL(%ecx)
 
 #ifdef DEV_NPX
        /*
Index: i386/i386/vm_machdep.c
===================================================================
RCS file: /home/ncvs/src/sys/i386/i386/vm_machdep.c,v
retrieving revision 1.181
diff -u -r1.181 vm_machdep.c
--- i386/i386/vm_machdep.c      12 Feb 2002 05:50:43 -0000      1.181
+++ i386/i386/vm_machdep.c      24 Feb 2002 09:11:16 -0000
@@ -193,6 +193,7 @@
        pcb2->pcb_esp = (int)td2->td_frame - sizeof(void *);
        pcb2->pcb_ebx = (int)td2;               /* fork_trampoline argument */
        pcb2->pcb_eip = (int)fork_trampoline;
+       pcb2->pcb_psl = td2->td_frame->tf_eflags & ~PSL_I; /* ints disabled */
        /*-
         * pcb2->pcb_dr*:       cloned above.
         * pcb2->pcb_savefpu:   cloned above.
Index: i386/include/cpufunc.h
===================================================================
RCS file: /home/ncvs/src/sys/i386/include/cpufunc.h,v
retrieving revision 1.108
diff -u -r1.108 cpufunc.h
--- i386/include/cpufunc.h      12 Feb 2002 21:06:48 -0000      1.108
+++ i386/include/cpufunc.h      24 Feb 2002 03:36:24 -0000
@@ -52,7 +52,11 @@
 #define writew(va, d)  (*(volatile u_int16_t *) (va) = (d))
 #define writel(va, d)  (*(volatile u_int32_t *) (va) = (d))
 
+#if 0
 #define        CRITICAL_FORK   (read_eflags() | PSL_I)
+#else
+#define MACHINE_CRITICAL_ENTER /* MD code defines critical_enter/exit/fork */
+#endif
 
 #ifdef __GNUC__
 
Index: i386/include/pcb.h
===================================================================
RCS file: /home/ncvs/src/sys/i386/include/pcb.h,v
retrieving revision 1.41
diff -u -r1.41 pcb.h
--- i386/include/pcb.h  17 Jan 2002 17:49:23 -0000      1.41
+++ i386/include/pcb.h  24 Feb 2002 09:06:27 -0000
@@ -69,7 +69,8 @@
        caddr_t pcb_onfault;    /* copyin/out fault recovery */
        int     pcb_gs;
        struct  pcb_ext *pcb_ext;       /* optional pcb extension */
-       u_long  __pcb_spare[3]; /* adjust to avoid core dump size changes */
+       int     pcb_psl;        /* process status long */
+       u_long  __pcb_spare[2]; /* adjust to avoid core dump size changes */
 };
 
 /*
Index: i386/isa/apic_vector.s
===================================================================
RCS file: /home/ncvs/src/sys/i386/isa/apic_vector.s,v
retrieving revision 1.75
diff -u -r1.75 apic_vector.s
--- i386/isa/apic_vector.s      5 Jan 2002 08:47:10 -0000       1.75
+++ i386/isa/apic_vector.s      24 Feb 2002 17:58:34 -0000
@@ -19,11 +19,19 @@
 #define PUSH_FRAME                                                     \
        pushl   $0 ;            /* dummy error code */                  \
        pushl   $0 ;            /* dummy trap type */                   \
-       pushal ;                                                        \
+       pushal ;                /* 8 ints */                            \
        pushl   %ds ;           /* save data and extra segments ... */  \
        pushl   %es ;                                                   \
        pushl   %fs
 
+#define PUSH_DUMMY                                                     \
+       pushfl ;                /* eflags */                            \
+       pushl   %cs ;           /* cs */                                \
+       pushl   $0 ;            /* dummy eip */                         \
+       pushl   $0 ;            /* dummy error code */                  \
+       pushl   $0 ;            /* dummy trap type */                   \
+       subl    $11*4,%esp ;
+
 #define POP_FRAME                                                      \
        popl    %fs ;                                                   \
        popl    %es ;                                                   \
@@ -31,37 +39,8 @@
        popal ;                                                         \
        addl    $4+4,%esp
 
-/*
- * Macros for interrupt entry, call to handler, and exit.
- */
-
-#define        FAST_INTR(irq_num, vec_name)                                    \
-       .text ;                                                         \
-       SUPERALIGN_TEXT ;                                               \
-IDTVEC(vec_name) ;                                                     \
-       PUSH_FRAME ;                                                    \
-       movl    $KDSEL,%eax ;                                           \
-       mov     %ax,%ds ;                                               \
-       mov     %ax,%es ;                                               \
-       movl    $KPSEL,%eax ;                                           \
-       mov     %ax,%fs ;                                               \
-       FAKE_MCOUNT(13*4(%esp)) ;                                       \
-       call    critical_enter ;                                        \
-       movl    PCPU(CURTHREAD),%ebx ;                                  \
-       incl    TD_INTR_NESTING_LEVEL(%ebx) ;                           \
-       pushl   intr_unit + (irq_num) * 4 ;                             \
-       call    *intr_handler + (irq_num) * 4 ; /* do the work ASAP */  \
-       addl    $4, %esp ;                                              \
-       movl    $0, lapic+LA_EOI ;                                      \
-       lock ;                                                          \
-       incl    cnt+V_INTR ;    /* book-keeping can wait */             \
-       movl    intr_countp + (irq_num) * 4, %eax ;                     \
-       lock ;                                                          \
-       incl    (%eax) ;                                                \
-       decl    TD_INTR_NESTING_LEVEL(%ebx) ;                           \
-       call    critical_exit ;                                         \
-       MEXITCOUNT ;                                                    \
-       jmp     doreti
+#define POP_DUMMY                                                      \
+       addl    $16*4,%esp
 
 #define IOAPICADDR(irq_num) CNAME(int_to_apicintpin) + 16 * (irq_num) + 8
 #define REDIRIDX(irq_num) CNAME(int_to_apicintpin) + 16 * (irq_num) + 12
@@ -114,9 +93,9 @@
  */
 #define UNMASK_IRQ(irq_num)                                    \
        ICU_LOCK ;                              /* into critical reg */ \
-       testl   $IRQ_BIT(irq_num), _apic_imen ;                         \
+       testl   $IRQ_BIT(irq_num), apic_imen ;                          \
        je      7f ;                    /* bit clear, not masked */     \
-       andl    $~IRQ_BIT(irq_num), _apic_imen ;/* clear mask bit */    \
+       andl    $~IRQ_BIT(irq_num), apic_imen ;/* clear mask bit */     \
        movl    IOAPICADDR(irq_num), %ecx ;     /* ioapic addr */       \
        movl    REDIRIDX(irq_num), %eax ;       /* get the index */     \
        movl    %eax, (%ecx) ;                  /* write the index */   \
@@ -126,6 +105,92 @@
 7: ;                                           /* already unmasked */  \
        ICU_UNLOCK
 
+/*
+ * Test to see whether we are handling an edge or level triggered INT.
+ *  Level-triggered INTs have to be unmasked.
+ */
+#define UNMASK_LEVEL_IRQ(irq_num)                                      \
+       testl   $IRQ_BIT(irq_num), apic_pin_trigger ;                   \
+       jz      9f ;                    /* edge, don't unmask */        \
+       UNMASK_IRQ(irq_num) ;                                           \
+9:
+
+/*
+ * Macros for interrupt entry, call to handler, and exit.
+ */
+
+#define        FAST_INTR(irq_num, vec_name)                                    \
+       .text ;                                                         \
+       SUPERALIGN_TEXT ;                                               \
+IDTVEC(vec_name) ;                                                     \
+       PUSH_FRAME ;                                                    \
+       movl    $KDSEL,%eax ;                                           \
+       mov     %ax,%ds ;                                               \
+       mov     %ax,%es ;                                               \
+       movl    $KPSEL,%eax ;                                           \
+       mov     %ax,%fs ;                                               \
+       FAKE_MCOUNT(13*4(%esp)) ;                                       \
+       movl    PCPU(CURTHREAD),%ebx ;                                  \
+       cmpl    $0,TD_CRITNEST(%ebx) ;                                  \
+       je      1f ;                                                    \
+;                                                                      \
+       movl    $1,PCPU(INT_PENDING) ;                                  \
+       orl     $IRQ_BIT(irq_num),PCPU(FPENDING) ;                      \
+       MASK_LEVEL_IRQ(irq_num) ;                                       \
+       movl    $0, lapic+LA_EOI ;                                      \
+       jmp     10f ;                                                   \
+1: ;                                                                   \
+       incl    TD_CRITNEST(%ebx) ;                                     \
+       incl    TD_INTR_NESTING_LEVEL(%ebx) ;                           \
+       pushl   intr_unit + (irq_num) * 4 ;                             \
+       call    *intr_handler + (irq_num) * 4 ; /* do the work ASAP */  \
+       addl    $4, %esp ;                                              \
+       movl    $0, lapic+LA_EOI ;                                      \
+       lock ;                                                          \
+       incl    cnt+V_INTR ;    /* book-keeping can wait */             \
+       movl    intr_countp + (irq_num) * 4, %eax ;                     \
+       lock ;                                                          \
+       incl    (%eax) ;                                                \
+       decl    TD_CRITNEST(%ebx) ;                                     \
+       cmpl    $0,PCPU(INT_PENDING) ;                                  \
+       je      2f ;                                                    \
+;                                                                      \
+       call    unpend ;                                                \
+2: ;                                                                   \
+       decl    TD_INTR_NESTING_LEVEL(%ebx) ;                           \
+10: ;                                                                  \
+       MEXITCOUNT ;                                                    \
+       jmp     doreti
+
+/*
+ * Restart a fast interrupt that was held up by a critical section.
+ * This routine is called from unpend().  unpend() ensures we are
+ * in a critical section and deals with the interrupt nesting level
+ * for us.  If we previously masked the irq, we have to unmask it.
+ *
+ * We have a choice.  We can regenerate the irq using the 'int'
+ * instruction or we can create a dummy frame and call the interrupt
+ * handler directly.  I've chosen to use the dummy-frame method.
+ */
+#define        FAST_UNPEND(irq_num, vec_name)                                  \
+       .text ;                                                         \
+       SUPERALIGN_TEXT ;                                               \
+IDTVEC(vec_name) ;                                                     \
+;                                                                      \
+       PUSH_DUMMY ;                                                    \
+       pushl   intr_unit + (irq_num) * 4 ;                             \
+       call    *intr_handler + (irq_num) * 4 ; /* do the work ASAP */  \
+       addl    $4, %esp ;                                              \
+       lock ;                                                          \
+       incl    cnt+V_INTR ;    /* book-keeping can wait */             \
+       movl    intr_countp + (irq_num) * 4, %eax ;                     \
+       lock ;                                                          \
+       incl    (%eax) ;                                                \
+       UNMASK_LEVEL_IRQ(irq_num) ;                                     \
+       POP_DUMMY ;                                                     \
+       ret ;                                                           \
+
+
 /* 
  * Slow, threaded interrupts.
  *
@@ -151,16 +216,27 @@
 ;                                                                      \
        MASK_LEVEL_IRQ(irq_num) ;                                       \
        EOI_IRQ(irq_num) ;                                              \
-0: ;                                                                   \
+;                                                                      \
        movl    PCPU(CURTHREAD),%ebx ;                                  \
+       cmpl    $0,TD_CRITNEST(%ebx) ;                                  \
+       je      1f ;                                                    \
+       movl    $1,PCPU(INT_PENDING) ;                                  \
+       orl     $IRQ_BIT(irq_num),PCPU(IPENDING) ;                      \
+       jmp     10f ;                                                   \
+1: ;                                                                   \
        incl    TD_INTR_NESTING_LEVEL(%ebx) ;                           \
 ;                                                                      \
        FAKE_MCOUNT(13*4(%esp)) ;               /* XXX avoid dbl cnt */ \
+       cmpl    $0,PCPU(INT_PENDING) ;                                  \
+       je      9f ;                                                    \
+       call    unpend ;                                                \
+9: ;                                                                   \
        pushl   $irq_num;                       /* pass the IRQ */      \
        call    sched_ithd ;                                            \
        addl    $4, %esp ;              /* discard the parameter */     \
 ;                                                                      \
        decl    TD_INTR_NESTING_LEVEL(%ebx) ;                           \
+10: ;                                                                  \
        MEXITCOUNT ;                                                    \
        jmp     doreti
 
@@ -226,9 +302,16 @@
        movl    $0, lapic+LA_EOI        /* End Of Interrupt to APIC */
 
        movl    PCPU(CURTHREAD),%ebx
+       cmpl    $0,TD_CRITNEST(%ebx)
+       je      1f
+       movl    $1,PCPU(INT_PENDING)
+       orl     $1,PCPU(SPENDING);
+       jmp     10f
+1:
        incl    TD_INTR_NESTING_LEVEL(%ebx)
        call    forwarded_hardclock
        decl    TD_INTR_NESTING_LEVEL(%ebx)
+10:
        MEXITCOUNT
        jmp     doreti
 
@@ -250,10 +333,18 @@
        movl    $0, lapic+LA_EOI        /* End Of Interrupt to APIC */
 
        FAKE_MCOUNT(13*4(%esp))
+
        movl    PCPU(CURTHREAD),%ebx
+       cmpl    $0,TD_CRITNEST(%ebx)
+       je      1f
+       movl    $1,PCPU(INT_PENDING)
+       orl     $2,PCPU(SPENDING);
+       jmp     10f
+1:
        incl    TD_INTR_NESTING_LEVEL(%ebx)
        call    forwarded_statclock
        decl    TD_INTR_NESTING_LEVEL(%ebx)
+10:
        MEXITCOUNT
        jmp     doreti
 
@@ -417,6 +508,41 @@
        INTR(30,intr30,)
        INTR(31,intr31,)
 MCOUNT_LABEL(eintr)
+
+MCOUNT_LABEL(bfunpend)
+       FAST_UNPEND(0,fastunpend0)
+       FAST_UNPEND(1,fastunpend1)
+       FAST_UNPEND(2,fastunpend2)
+       FAST_UNPEND(3,fastunpend3)
+       FAST_UNPEND(4,fastunpend4)
+       FAST_UNPEND(5,fastunpend5)
+       FAST_UNPEND(6,fastunpend6)
+       FAST_UNPEND(7,fastunpend7)
+       FAST_UNPEND(8,fastunpend8)
+       FAST_UNPEND(9,fastunpend9)
+       FAST_UNPEND(10,fastunpend10)
+       FAST_UNPEND(11,fastunpend11)
+       FAST_UNPEND(12,fastunpend12)
+       FAST_UNPEND(13,fastunpend13)
+       FAST_UNPEND(14,fastunpend14)
+       FAST_UNPEND(15,fastunpend15)
+       FAST_UNPEND(16,fastunpend16)
+       FAST_UNPEND(17,fastunpend17)
+       FAST_UNPEND(18,fastunpend18)
+       FAST_UNPEND(19,fastunpend19)
+       FAST_UNPEND(20,fastunpend20)
+       FAST_UNPEND(21,fastunpend21)
+       FAST_UNPEND(22,fastunpend22)
+       FAST_UNPEND(23,fastunpend23)
+       FAST_UNPEND(24,fastunpend24)
+       FAST_UNPEND(25,fastunpend25)
+       FAST_UNPEND(26,fastunpend26)
+       FAST_UNPEND(27,fastunpend27)
+       FAST_UNPEND(28,fastunpend28)
+       FAST_UNPEND(29,fastunpend29)
+       FAST_UNPEND(30,fastunpend30)
+       FAST_UNPEND(31,fastunpend31)
+MCOUNT_LABEL(efunpend)
 
 /*
  * Executed by a CPU when it receives a RENDEZVOUS IPI from another CPU.
Index: i386/isa/clock.c
===================================================================
RCS file: /home/ncvs/src/sys/i386/isa/clock.c,v
retrieving revision 1.180
diff -u -r1.180 clock.c
--- i386/isa/clock.c    30 Jan 2002 12:41:11 -0000      1.180
+++ i386/isa/clock.c    24 Feb 2002 10:43:58 -0000
@@ -995,6 +995,7 @@
        int apic_8254_trial;
        void *clkdesc;
 #endif /* APIC_IO */
+       critical_t crit;
 
        if (statclock_disable) {
                /*
@@ -1029,9 +1030,11 @@
 
        inthand_add("clk", apic_8254_intr, (driver_intr_t *)clkintr, NULL,
            INTR_TYPE_CLK | INTR_FAST, &clkdesc);
+       crit = cpu_critical_enter();
        mtx_lock_spin(&icu_lock);
        INTREN(1 << apic_8254_intr);
        mtx_unlock_spin(&icu_lock);
+       cpu_critical_exit(crit);
 
 #else /* APIC_IO */
 
@@ -1042,9 +1045,11 @@
         */
        inthand_add("clk", 0, (driver_intr_t *)clkintr, NULL,
            INTR_TYPE_CLK | INTR_FAST, NULL);
+       crit = cpu_critical_enter();
        mtx_lock_spin(&icu_lock);
        INTREN(IRQ0);
        mtx_unlock_spin(&icu_lock);
+       cpu_critical_exit(crit);
 
 #endif /* APIC_IO */
 
@@ -1067,6 +1072,7 @@
        inthand_add("rtc", 8, (driver_intr_t *)rtcintr, NULL,
            INTR_TYPE_CLK | INTR_FAST, NULL);
 
+       crit = cpu_critical_enter();
        mtx_lock_spin(&icu_lock);
 #ifdef APIC_IO
        INTREN(APIC_IRQ8);
@@ -1074,6 +1080,7 @@
        INTREN(IRQ8);
 #endif /* APIC_IO */
        mtx_unlock_spin(&icu_lock);
+       cpu_critical_exit(crit);
 
        writertc(RTC_STATUSB, rtc_statusb);
 
@@ -1090,9 +1097,13 @@
                         * on the IO APIC.
                         * Workaround: Limited variant of mixed mode.
                         */
+                       critical_t crit;
+
+                       crit = cpu_critical_enter();
                        mtx_lock_spin(&icu_lock);
                        INTRDIS(1 << apic_8254_intr);
                        mtx_unlock_spin(&icu_lock);
+                       cpu_critical_exit(crit);
                        inthand_remove(clkdesc);
                        printf("APIC_IO: Broken MP table detected: "
                               "8254 is not connected to "
@@ -1115,9 +1126,11 @@
                        inthand_add("clk", apic_8254_intr,
                                    (driver_intr_t *)clkintr, NULL,
                                    INTR_TYPE_CLK | INTR_FAST, NULL);
+                       crit = cpu_critical_enter();
                        mtx_lock_spin(&icu_lock);
                        INTREN(1 << apic_8254_intr);
                        mtx_unlock_spin(&icu_lock);
+                       cpu_critical_exit(crit);
                }
                
        }
Index: i386/isa/icu_vector.s
===================================================================
RCS file: /home/ncvs/src/sys/i386/isa/icu_vector.s,v
retrieving revision 1.31
diff -u -r1.31 icu_vector.s
--- i386/isa/icu_vector.s       5 Jan 2002 08:47:11 -0000       1.31
+++ i386/isa/icu_vector.s       24 Feb 2002 19:01:27 -0000
@@ -16,17 +16,23 @@
 #define        ICU_EOI                 0x20    /* XXX - define elsewhere */
 
 #define        IRQ_BIT(irq_num)        (1 << ((irq_num) % 8))
+#define        IRQ_LBIT(irq_num)       (1 << (irq_num))
 #define        IRQ_BYTE(irq_num)       ((irq_num) >> 3)
 
 #ifdef AUTO_EOI_1
+
 #define        ENABLE_ICU1             /* use auto-EOI to reduce i/o */
 #define        OUTB_ICU1
+
 #else
-#define        ENABLE_ICU1 \
-       movb    $ICU_EOI,%al ;  /* as soon as possible send EOI ... */ \
+
+#define        ENABLE_ICU1                                                     \
+       movb    $ICU_EOI,%al ;  /* as soon as possible send EOI ... */  \
        OUTB_ICU1               /* ... to clear in service bit */
-#define        OUTB_ICU1 \
+
+#define        OUTB_ICU1                                                       \
        outb    %al,$IO_ICU1
+
 #endif
 
 #ifdef AUTO_EOI_2
@@ -34,48 +40,124 @@
  * The data sheet says no auto-EOI on slave, but it sometimes works.
  */
 #define        ENABLE_ICU1_AND_2       ENABLE_ICU1
+
 #else
-#define        ENABLE_ICU1_AND_2 \
-       movb    $ICU_EOI,%al ;  /* as above */ \
-       outb    %al,$IO_ICU2 ;  /* but do second icu first ... */ \
+
+#define        ENABLE_ICU1_AND_2                                               \
+       movb    $ICU_EOI,%al ;  /* as above */                          \
+       outb    %al,$IO_ICU2 ;  /* but do second icu first ... */       \
        OUTB_ICU1               /* ... then first icu (if !AUTO_EOI_1) */
+
 #endif
 
+#define PUSH_FRAME                                                     \
+       pushl   $0 ;            /* dummy error code */                  \
+       pushl   $0 ;            /* dummy trap type */                   \
+       pushal ;                /* 8 ints */                            \
+       pushl   %ds ;           /* save data and extra segments ... */  \
+       pushl   %es ;                                                   \
+       pushl   %fs
+
+#define PUSH_DUMMY                                                     \
+       pushfl ;                /* eflags */                            \
+       pushl   %cs ;           /* cs */                                \
+       pushl   $0 ;            /* dummy eip */                         \
+       pushl   $0 ;            /* dummy error code */                  \
+       pushl   $0 ;            /* dummy trap type */                   \
+       subl    $11*4,%esp
+
+#define POP_FRAME                                                      \
+       popl    %fs ;                                                   \
+       popl    %es ;                                                   \
+       popl    %ds ;                                                   \
+       popal ;                                                         \
+       addl    $4+4,%esp
+
+#define POP_DUMMY                                                      \
+       addl    $16*4,%esp
+
+#define MASK_IRQ(icu, irq_num)                                         \
+       movb    imen + IRQ_BYTE(irq_num),%al ;                          \
+       orb     $IRQ_BIT(irq_num),%al ;                                 \
+       movb    %al,imen + IRQ_BYTE(irq_num) ;                          \
+       outb    %al,$icu+ICU_IMR_OFFSET
+
+#define UNMASK_IRQ(icu, irq_num)                                       \
+       movb    imen + IRQ_BYTE(irq_num),%al ;                          \
+       andb    $~IRQ_BIT(irq_num),%al ;                                \
+       movb    %al,imen + IRQ_BYTE(irq_num) ;                          \
+       outb    %al,$icu+ICU_IMR_OFFSET
 /*
  * Macros for interrupt interrupt entry, call to handler, and exit.
  */
 
-#define        FAST_INTR(irq_num, vec_name, enable_icus) \
-       .text ; \
-       SUPERALIGN_TEXT ; \
-IDTVEC(vec_name) ; \
-       pushl   $0 ;            /* dummy error code */ \
-       pushl   $0 ;            /* dummy trap type */ \
-       pushal ; \
-       pushl   %ds ; \
-       pushl   %es ; \
-       pushl   %fs ; \
-       mov     $KDSEL,%ax ; \
-       mov     %ax,%ds ; \
-       mov     %ax,%es ; \
-       mov     $KPSEL,%ax ; \
-       mov     %ax,%fs ; \
-       FAKE_MCOUNT((12+ACTUALLY_PUSHED)*4(%esp)) ; \
-       call    critical_enter ; \
-       movl    PCPU(CURTHREAD),%ebx ; \
-       incl    TD_INTR_NESTING_LEVEL(%ebx) ; \
-       pushl   intr_unit + (irq_num) * 4 ; \
-       call    *intr_handler + (irq_num) * 4 ; /* do the work ASAP */ \
-       enable_icus ;           /* (re)enable ASAP (helps edge trigger?) */ \
-       addl    $4,%esp ; \
-       incl    cnt+V_INTR ;    /* book-keeping can wait */ \
-       movl    intr_countp + (irq_num) * 4,%eax ; \
-       incl    (%eax) ; \
-       decl    TD_INTR_NESTING_LEVEL(%ebx) ; \
-       call    critical_exit ; \
-       MEXITCOUNT ; \
+#define        FAST_INTR(irq_num, vec_name, icu, enable_icus)                  \
+       .text ;                                                         \
+       SUPERALIGN_TEXT ;                                               \
+IDTVEC(vec_name) ;                                                     \
+       PUSH_FRAME ;                                                    \
+       mov     $KDSEL,%ax ;                                            \
+       mov     %ax,%ds ;                                               \
+       mov     %ax,%es ;                                               \
+       mov     $KPSEL,%ax ;                                            \
+       mov     %ax,%fs ;                                               \
+       FAKE_MCOUNT((12+ACTUALLY_PUSHED)*4(%esp)) ;                     \
+       movl    PCPU(CURTHREAD),%ebx ;                                  \
+       cmpl    $0,TD_CRITNEST(%ebx) ;                                  \
+       je      1f ;                                                    \
+;                                                                      \
+       movl    $1,PCPU(INT_PENDING) ;                                  \
+       orl     $IRQ_LBIT(irq_num),PCPU(FPENDING) ;                     \
+       MASK_IRQ(icu, irq_num) ;                                        \
+       enable_icus ;                                                   \
+       jmp     10f ;                                                   \
+1: ;                                                                   \
+       incl    TD_CRITNEST(%ebx) ;                                     \
+       incl    TD_INTR_NESTING_LEVEL(%ebx) ;                           \
+       pushl   intr_unit + (irq_num) * 4 ;                             \
+       call    *intr_handler + (irq_num) * 4 ;                         \
+       addl    $4,%esp ;                                               \
+       enable_icus ;                                                   \
+       incl    cnt+V_INTR ;    /* book-keeping can wait */             \
+       movl    intr_countp + (irq_num) * 4,%eax ;                      \
+       incl    (%eax) ;                                                \
+       decl    TD_CRITNEST(%ebx) ;                                     \
+       cmpl    $0,PCPU(INT_PENDING) ;                                  \
+       je      2f ;                                                    \
+;                                                                      \
+       call    unpend ;                                                \
+2: ;                                                                   \
+       decl    TD_INTR_NESTING_LEVEL(%ebx) ;                           \
+10: ;                                                                  \
+       MEXITCOUNT ;                                                    \
        jmp     doreti
 
+/*
+ * Restart a fast interrupt that was held up by a critical section.
+ * This routine is called from unpend().  unpend() ensures we are
+ * in a critical section and deals with the interrupt nesting level
+ * for us.  If we previously masked the irq, we have to unmask it.
+ *
+ * We have a choice.  We can regenerate the irq using the 'int'
+ * instruction or we can create a dummy frame and call the interrupt
+ * handler directly.  I've chosen to use the dummy-frame method.
+ */
+#define        FAST_UNPEND(irq_num, vec_name, icu)                             \
+       .text ;                                                         \
+       SUPERALIGN_TEXT ;                                               \
+IDTVEC(vec_name) ;                                                     \
+;                                                                      \
+       PUSH_DUMMY ;                                                    \
+       pushl   intr_unit + (irq_num) * 4 ;                             \
+       call    *intr_handler + (irq_num) * 4 ; /* do the work ASAP */  \
+       addl    $4, %esp ;                                              \
+       incl    cnt+V_INTR ;    /* book-keeping can wait */             \
+       movl    intr_countp + (irq_num) * 4,%eax ;                      \
+       incl    (%eax) ;                                                \
+       UNMASK_IRQ(icu, irq_num) ;                                      \
+       POP_DUMMY ;                                                     \
+       ret
+
 /* 
  * Slow, threaded interrupts.
  *
@@ -85,74 +167,100 @@
  * interrupt handler and don't run anything.  We could just do an
  * iret.  FIXME.
  */
-#define        INTR(irq_num, vec_name, icu, enable_icus, reg, maybe_extra_ipending) \
-       .text ; \
-       SUPERALIGN_TEXT ; \
-IDTVEC(vec_name) ; \
-       pushl   $0 ;            /* dummy error code */ \
-       pushl   $0 ;            /* dummy trap type */ \
-       pushal ; \
-       pushl   %ds ;           /* save our data and extra segments ... */ \
-       pushl   %es ; \
-       pushl   %fs ; \
-       mov     $KDSEL,%ax ;    /* load kernel ds, es and fs */ \
-       mov     %ax,%ds ; \
-       mov     %ax,%es ; \
-       mov     $KPSEL,%ax ; \
-       mov     %ax,%fs ; \
-       maybe_extra_ipending ; \
-       movb    imen + IRQ_BYTE(irq_num),%al ; \
-       orb     $IRQ_BIT(irq_num),%al ; \
-       movb    %al,imen + IRQ_BYTE(irq_num) ; \
-       outb    %al,$icu+ICU_IMR_OFFSET ; \
-       enable_icus ; \
-       movl    PCPU(CURTHREAD),%ebx ; \
-       incl    TD_INTR_NESTING_LEVEL(%ebx) ; \
+#define        INTR(irq_num, vec_name, icu, enable_icus, maybe_extra_ipending) \
+       .text ;                                                         \
+       SUPERALIGN_TEXT ;                                               \
+IDTVEC(vec_name) ;                                                     \
+       PUSH_FRAME ;                                                    \
+       mov     $KDSEL,%ax ;    /* load kernel ds, es and fs */         \
+       mov     %ax,%ds ;                                               \
+       mov     %ax,%es ;                                               \
+       mov     $KPSEL,%ax ;                                            \
+       mov     %ax,%fs ;                                               \
+;                                                                      \
+       maybe_extra_ipending ;                                          \
+       MASK_IRQ(icu, irq_num) ;                                        \
+       enable_icus ;                                                   \
+;                                                                      \
+       movl    PCPU(CURTHREAD),%ebx ;                                  \
+        cmpl   $0,TD_CRITNEST(%ebx) ;                                  \
+       je      1f ;                                                    \
+       movl    $1,PCPU(INT_PENDING);                                   \
+       orl     $IRQ_LBIT(irq_num),PCPU(IPENDING) ;                     \
+       jmp     10f ;                                                   \
+1: ;                                                                   \
+       incl    TD_INTR_NESTING_LEVEL(%ebx) ;                           \
+;                                                                      \
        FAKE_MCOUNT(13*4(%esp)) ;       /* XXX late to avoid double count */ \
-       pushl   $irq_num;       /* pass the IRQ */ \
-       call    sched_ithd ; \
-       addl    $4, %esp ;      /* discard the parameter */ \
-       decl    TD_INTR_NESTING_LEVEL(%ebx) ; \
-       MEXITCOUNT ; \
-       /* We could usually avoid the following jmp by inlining some of */ \
-       /* doreti, but it's probably better to use less cache. */ \
-       jmp     doreti          /* and catch up inside doreti */
+       cmpl    $0,PCPU(INT_PENDING) ;                                  \
+       je      9f ;                                                    \
+       call    unpend ;                                                \
+9: ;                                                                   \
+       pushl   $irq_num;       /* pass the IRQ */                      \
+       call    sched_ithd ;                                            \
+       addl    $4, %esp ;      /* discard the parameter */             \
+;                                                                      \
+       decl    TD_INTR_NESTING_LEVEL(%ebx) ;                           \
+10: ;                                                                  \
+       MEXITCOUNT ;                                                    \
+       jmp     doreti
 
 MCOUNT_LABEL(bintr)
-       FAST_INTR(0,fastintr0, ENABLE_ICU1)
-       FAST_INTR(1,fastintr1, ENABLE_ICU1)
-       FAST_INTR(2,fastintr2, ENABLE_ICU1)
-       FAST_INTR(3,fastintr3, ENABLE_ICU1)
-       FAST_INTR(4,fastintr4, ENABLE_ICU1)
-       FAST_INTR(5,fastintr5, ENABLE_ICU1)
-       FAST_INTR(6,fastintr6, ENABLE_ICU1)
-       FAST_INTR(7,fastintr7, ENABLE_ICU1)
-       FAST_INTR(8,fastintr8, ENABLE_ICU1_AND_2)
-       FAST_INTR(9,fastintr9, ENABLE_ICU1_AND_2)
-       FAST_INTR(10,fastintr10, ENABLE_ICU1_AND_2)
-       FAST_INTR(11,fastintr11, ENABLE_ICU1_AND_2)
-       FAST_INTR(12,fastintr12, ENABLE_ICU1_AND_2)
-       FAST_INTR(13,fastintr13, ENABLE_ICU1_AND_2)
-       FAST_INTR(14,fastintr14, ENABLE_ICU1_AND_2)
-       FAST_INTR(15,fastintr15, ENABLE_ICU1_AND_2)
+       FAST_INTR(0,fastintr0, IO_ICU1, ENABLE_ICU1)
+       FAST_INTR(1,fastintr1, IO_ICU1, ENABLE_ICU1)
+       FAST_INTR(2,fastintr2, IO_ICU1, ENABLE_ICU1)
+       FAST_INTR(3,fastintr3, IO_ICU1, ENABLE_ICU1)
+       FAST_INTR(4,fastintr4, IO_ICU1, ENABLE_ICU1)
+       FAST_INTR(5,fastintr5, IO_ICU1, ENABLE_ICU1)
+       FAST_INTR(6,fastintr6, IO_ICU1, ENABLE_ICU1)
+       FAST_INTR(7,fastintr7, IO_ICU1, ENABLE_ICU1)
+       FAST_INTR(8,fastintr8, IO_ICU2, ENABLE_ICU1_AND_2)
+       FAST_INTR(9,fastintr9, IO_ICU2, ENABLE_ICU1_AND_2)
+       FAST_INTR(10,fastintr10, IO_ICU2, ENABLE_ICU1_AND_2)
+       FAST_INTR(11,fastintr11, IO_ICU2, ENABLE_ICU1_AND_2)
+       FAST_INTR(12,fastintr12, IO_ICU2, ENABLE_ICU1_AND_2)
+       FAST_INTR(13,fastintr13, IO_ICU2, ENABLE_ICU1_AND_2)
+       FAST_INTR(14,fastintr14, IO_ICU2, ENABLE_ICU1_AND_2)
+       FAST_INTR(15,fastintr15, IO_ICU2, ENABLE_ICU1_AND_2)
 
 #define        CLKINTR_PENDING movl $1,CNAME(clkintr_pending)
 /* Threaded interrupts */
-       INTR(0,intr0, IO_ICU1, ENABLE_ICU1, al, CLKINTR_PENDING)
-       INTR(1,intr1, IO_ICU1, ENABLE_ICU1, al,)
-       INTR(2,intr2, IO_ICU1, ENABLE_ICU1, al,)
-       INTR(3,intr3, IO_ICU1, ENABLE_ICU1, al,)
-       INTR(4,intr4, IO_ICU1, ENABLE_ICU1, al,)
-       INTR(5,intr5, IO_ICU1, ENABLE_ICU1, al,)
-       INTR(6,intr6, IO_ICU1, ENABLE_ICU1, al,)
-       INTR(7,intr7, IO_ICU1, ENABLE_ICU1, al,)
-       INTR(8,intr8, IO_ICU2, ENABLE_ICU1_AND_2, ah,)
-       INTR(9,intr9, IO_ICU2, ENABLE_ICU1_AND_2, ah,)
-       INTR(10,intr10, IO_ICU2, ENABLE_ICU1_AND_2, ah,)
-       INTR(11,intr11, IO_ICU2, ENABLE_ICU1_AND_2, ah,)
-       INTR(12,intr12, IO_ICU2, ENABLE_ICU1_AND_2, ah,)
-       INTR(13,intr13, IO_ICU2, ENABLE_ICU1_AND_2, ah,)
-       INTR(14,intr14, IO_ICU2, ENABLE_ICU1_AND_2, ah,)
-       INTR(15,intr15, IO_ICU2, ENABLE_ICU1_AND_2, ah,)
+       INTR(0,intr0, IO_ICU1, ENABLE_ICU1, CLKINTR_PENDING)
+       INTR(1,intr1, IO_ICU1, ENABLE_ICU1,)
+       INTR(2,intr2, IO_ICU1, ENABLE_ICU1,)
+       INTR(3,intr3, IO_ICU1, ENABLE_ICU1,)
+       INTR(4,intr4, IO_ICU1, ENABLE_ICU1,)
+       INTR(5,intr5, IO_ICU1, ENABLE_ICU1,)
+       INTR(6,intr6, IO_ICU1, ENABLE_ICU1,)
+       INTR(7,intr7, IO_ICU1, ENABLE_ICU1,)
+       INTR(8,intr8, IO_ICU2, ENABLE_ICU1_AND_2,)
+       INTR(9,intr9, IO_ICU2, ENABLE_ICU1_AND_2,)
+       INTR(10,intr10, IO_ICU2, ENABLE_ICU1_AND_2,)
+       INTR(11,intr11, IO_ICU2, ENABLE_ICU1_AND_2,)
+       INTR(12,intr12, IO_ICU2, ENABLE_ICU1_AND_2,)
+       INTR(13,intr13, IO_ICU2, ENABLE_ICU1_AND_2,)
+       INTR(14,intr14, IO_ICU2, ENABLE_ICU1_AND_2,)
+       INTR(15,intr15, IO_ICU2, ENABLE_ICU1_AND_2,)
 
 MCOUNT_LABEL(eintr)
+
+MCOUNT_LABEL(bfunpend)
+       FAST_UNPEND(0,fastunpend0, IO_ICU1)
+       FAST_UNPEND(1,fastunpend1, IO_ICU1)
+       FAST_UNPEND(2,fastunpend2, IO_ICU1)
+       FAST_UNPEND(3,fastunpend3, IO_ICU1)
+       FAST_UNPEND(4,fastunpend4, IO_ICU1)
+       FAST_UNPEND(5,fastunpend5, IO_ICU1)
+       FAST_UNPEND(6,fastunpend6, IO_ICU1)
+       FAST_UNPEND(7,fastunpend7, IO_ICU1)
+       FAST_UNPEND(8,fastunpend8, IO_ICU2)
+       FAST_UNPEND(9,fastunpend9, IO_ICU2)
+       FAST_UNPEND(10,fastunpend10, IO_ICU2)
+       FAST_UNPEND(11,fastunpend11, IO_ICU2)
+       FAST_UNPEND(12,fastunpend12, IO_ICU2)
+       FAST_UNPEND(13,fastunpend13, IO_ICU2)
+       FAST_UNPEND(14,fastunpend14, IO_ICU2)
+       FAST_UNPEND(15,fastunpend15, IO_ICU2)
+MCOUNT_LABEL(efunpend)
+
+
Index: i386/isa/intr_machdep.c
===================================================================
RCS file: /home/ncvs/src/sys/i386/isa/intr_machdep.c,v
retrieving revision 1.65
diff -u -r1.65 intr_machdep.c
--- i386/isa/intr_machdep.c     8 Feb 2002 18:30:35 -0000       1.65
+++ i386/isa/intr_machdep.c     24 Feb 2002 10:50:26 -0000
@@ -117,6 +117,27 @@
 #endif /* APIC_IO */
 };
 
+static unpendhand_t *fastunpend[ICU_LEN] = {
+       &IDTVEC(fastunpend0), &IDTVEC(fastunpend1),
+       &IDTVEC(fastunpend2), &IDTVEC(fastunpend3),
+       &IDTVEC(fastunpend4), &IDTVEC(fastunpend5),
+       &IDTVEC(fastunpend6), &IDTVEC(fastunpend7),
+       &IDTVEC(fastunpend8), &IDTVEC(fastunpend9),
+       &IDTVEC(fastunpend10), &IDTVEC(fastunpend11),
+       &IDTVEC(fastunpend12), &IDTVEC(fastunpend13),
+       &IDTVEC(fastunpend14), &IDTVEC(fastunpend15),
+#if defined(APIC_IO)
+       &IDTVEC(fastunpend16), &IDTVEC(fastunpend17),
+       &IDTVEC(fastunpend18), &IDTVEC(fastunpend19),
+       &IDTVEC(fastunpend20), &IDTVEC(fastunpend21),
+       &IDTVEC(fastunpend22), &IDTVEC(fastunpend23),
+       &IDTVEC(fastunpend24), &IDTVEC(fastunpend25),
+       &IDTVEC(fastunpend26), &IDTVEC(fastunpend27),
+       &IDTVEC(fastunpend28), &IDTVEC(fastunpend29),
+       &IDTVEC(fastunpend30), &IDTVEC(fastunpend31),
+#endif /* APIC_IO */
+};
+
 static inthand_t *slowintr[ICU_LEN] = {
        &IDTVEC(intr0), &IDTVEC(intr1), &IDTVEC(intr2), &IDTVEC(intr3),
        &IDTVEC(intr4), &IDTVEC(intr5), &IDTVEC(intr6), &IDTVEC(intr7),
@@ -291,13 +312,16 @@
 void icu_reinit()
 {
        int i;
+       critical_t crit;
 
+       crit = cpu_critical_enter();
        mtx_lock_spin(&icu_lock);
        init_i8259();
        for(i=0;i<ICU_LEN;i++)
                if(intr_handler[i] != isa_strayintr)
                        INTREN(1<<i);
        mtx_unlock_spin(&icu_lock);
+       cpu_critical_exit(crit);
 }
 
 /*
@@ -309,13 +333,16 @@
 isa_defaultirq()
 {
        int i;
+       critical_t crit;
 
        /* icu vectors */
        for (i = 0; i < ICU_LEN; i++)
                icu_unset(i, (driver_intr_t *)NULL);
+       crit = cpu_critical_enter();
        mtx_lock_spin(&icu_lock);
        init_i8259();
        mtx_unlock_spin(&icu_lock);
+       cpu_critical_exit(crit);
 }
 
 
@@ -476,6 +503,7 @@
        int             vector;
        u_int32_t       value;          /* the window register is 32 bits */
 #endif /* FAST_HI */
+       critical_t      crit;
 
 #if defined(APIC_IO)
        if ((u_int)intr >= ICU_LEN)     /* no 8259 SLAVE to ignore */
@@ -488,6 +516,7 @@
                return (EBUSY);
 #endif
 
+       crit = cpu_critical_enter();
        mtx_lock_spin(&icu_lock);
        intr_handler[intr] = handler;
        intr_unit[intr] = arg;
@@ -530,6 +559,7 @@
 #endif /* FAST_HI */
        INTREN(1 << intr);
        mtx_unlock_spin(&icu_lock);
+       cpu_critical_exit(crit);
        return (0);
 }
 
@@ -543,10 +573,12 @@
        int     intr;
        driver_intr_t *handler;
 {
+       critical_t crit;
 
        if ((u_int)intr >= ICU_LEN || handler != intr_handler[intr])
                return (EINVAL);
 
+       crit = cpu_critical_enter();
        mtx_lock_spin(&icu_lock);
        INTRDIS(1 << intr);
        intr_countp[intr] = &intrcnt[1 + intr];
@@ -564,6 +596,7 @@
            GSEL(GCODE_SEL, SEL_KPL));
 #endif /* FAST_HI */
        mtx_unlock_spin(&icu_lock);
+       cpu_critical_exit(crit);
        return (0);
 }
 
@@ -578,19 +611,25 @@
 static void
 ithread_enable(int vector)
 {
+       critical_t crit;
 
+       crit = cpu_critical_enter();
        mtx_lock_spin(&icu_lock);
        INTREN(1 << vector);
        mtx_unlock_spin(&icu_lock);
+       cpu_critical_exit(crit);
 }
 
 static void
 ithread_disable(int vector)
 {
+       critical_t crit;
 
+       crit = cpu_critical_enter();
        mtx_lock_spin(&icu_lock);
        INTRDIS(1 << vector);
        mtx_unlock_spin(&icu_lock);
+       cpu_critical_exit(crit);
 }
 
 int
@@ -672,3 +711,10 @@
 
        return (ithread_remove_handler(cookie));
 }
+
+void
+call_fast_unpend(int irq)
+{
+       fastunpend[irq]();
+}
+
Index: i386/isa/intr_machdep.h
===================================================================
RCS file: /home/ncvs/src/sys/i386/isa/intr_machdep.h,v
retrieving revision 1.33
diff -u -r1.33 intr_machdep.h
--- i386/isa/intr_machdep.h     20 Dec 2001 23:48:30 -0000      1.33
+++ i386/isa/intr_machdep.h     24 Feb 2002 05:55:23 -0000
@@ -140,6 +140,7 @@
  * Type of the first (asm) part of an interrupt handler.
  */
 typedef void inthand_t __P((u_int cs, u_int ef, u_int esp, u_int ss));
+typedef void unpendhand_t __P((void));
 
 #define        IDTVEC(name)    __CONCAT(X,name)
 
@@ -163,6 +164,18 @@
        IDTVEC(intr4), IDTVEC(intr5), IDTVEC(intr6), IDTVEC(intr7),
        IDTVEC(intr8), IDTVEC(intr9), IDTVEC(intr10), IDTVEC(intr11),
        IDTVEC(intr12), IDTVEC(intr13), IDTVEC(intr14), IDTVEC(intr15);
+unpendhand_t
+       IDTVEC(fastunpend0), IDTVEC(fastunpend1), IDTVEC(fastunpend2),
+       IDTVEC(fastunpend3), IDTVEC(fastunpend4), IDTVEC(fastunpend5),
+       IDTVEC(fastunpend6), IDTVEC(fastunpend7), IDTVEC(fastunpend8),
+       IDTVEC(fastunpend9), IDTVEC(fastunpend10), IDTVEC(fastunpend11),
+       IDTVEC(fastunpend12), IDTVEC(fastunpend13), IDTVEC(fastunpend14),
+       IDTVEC(fastunpend15), IDTVEC(fastunpend16), IDTVEC(fastunpend17),
+       IDTVEC(fastunpend18), IDTVEC(fastunpend19), IDTVEC(fastunpend20),
+       IDTVEC(fastunpend21), IDTVEC(fastunpend22), IDTVEC(fastunpend23),
+       IDTVEC(fastunpend24), IDTVEC(fastunpend25), IDTVEC(fastunpend26),
+       IDTVEC(fastunpend27), IDTVEC(fastunpend28), IDTVEC(fastunpend29),
+       IDTVEC(fastunpend30), IDTVEC(fastunpend31);
 
 #if defined(SMP) || defined(APIC_IO)
 inthand_t
@@ -228,6 +241,7 @@
            enum intr_type flags, void **cookiep);
 int    inthand_remove(void *cookie);
 void   sched_ithd(void *dummy);
+void   call_fast_unpend(int irq);
 
 #endif /* LOCORE */
 
Index: i386/isa/npx.c
===================================================================
RCS file: /home/ncvs/src/sys/i386/isa/npx.c,v
retrieving revision 1.123
diff -u -r1.123 npx.c
--- i386/isa/npx.c      30 Jan 2002 12:41:11 -0000      1.123
+++ i386/isa/npx.c      24 Feb 2002 10:50:55 -0000
@@ -429,9 +429,15 @@
         * XXX hack around brokenness of bus_teardown_intr().  If we left the
         * irq active then we would get it instead of exception 16.
         */
-       mtx_lock_spin(&icu_lock);
-       INTRDIS(1 << irq_num);
-       mtx_unlock_spin(&icu_lock);
+       {
+               critical_t crit;
+
+               crit = cpu_critical_enter();
+               mtx_lock_spin(&icu_lock);
+               INTRDIS(1 << irq_num);
+               mtx_unlock_spin(&icu_lock);
+               cpu_critical_exit(crit);
+       }
 
        bus_release_resource(dev, SYS_RES_IRQ, irq_rid, irq_res);
        bus_release_resource(dev, SYS_RES_IOPORT, ioport_rid, ioport_res);
Index: kern/kern_fork.c
===================================================================
RCS file: /home/ncvs/src/sys/kern/kern_fork.c,v
retrieving revision 1.135
diff -u -r1.135 kern_fork.c
--- kern/kern_fork.c    23 Feb 2002 11:12:53 -0000      1.135
+++ kern/kern_fork.c    24 Feb 2002 05:57:59 -0000
@@ -777,12 +777,19 @@
 
        td->td_kse->ke_oncpu = PCPU_GET(cpuid);
        /*
-        * Setup the sched_lock state so that we can release it.
+        * Setup the sched_lock state so that we can release it.  If
+        * MACHINE_CRITICAL_ENTER is set by the MD architecture, the
+        * trampoline returns with the critical section pre-set.
+        * XXX note: all architectures should do this, because this code
+        * improperly assumes that a critical section == hard interrupt
+        * disablement on entry, which is not necessarily true.
         */
        sched_lock.mtx_lock = (uintptr_t)td;
        sched_lock.mtx_recurse = 0;
+#ifndef MACHINE_CRITICAL_ENTER
        td->td_critnest = 1;
        td->td_savecrit = CRITICAL_FORK;
+#endif
        CTR3(KTR_PROC, "fork_exit: new proc %p (pid %d, %s)", p, p->p_pid,
            p->p_comm);
        if (PCPU_GET(switchtime.sec) == 0)
Index: kern/kern_switch.c
===================================================================
RCS file: /home/ncvs/src/sys/kern/kern_switch.c,v
retrieving revision 1.20
diff -u -r1.20 kern_switch.c
--- kern/kern_switch.c  11 Feb 2002 20:37:51 -0000      1.20
+++ kern/kern_switch.c  24 Feb 2002 03:03:38 -0000
@@ -69,6 +69,8 @@
        runq_add(&runq, td->td_kse);
 }
 
+#ifndef MACHINE_CRITICAL_ENTER
+
 /* Critical sections that prevent preemption. */
 void
 critical_enter(void)
@@ -93,6 +95,8 @@
        } else
                td->td_critnest--;
 }
+
+#endif
 
 /*
  * Clear the status bit of the queue corresponding to priority level pri,
Index: sys/pcpu.h
===================================================================
RCS file: /home/ncvs/src/sys/sys/pcpu.h,v
retrieving revision 1.4
diff -u -r1.4 pcpu.h
--- sys/pcpu.h  22 Feb 2002 13:32:01 -0000      1.4
+++ sys/pcpu.h  24 Feb 2002 08:01:06 -0000
@@ -57,6 +57,10 @@
        u_int           pc_cpuid;               /* This cpu number */
        u_int           pc_cpumask;             /* This cpu mask */
        u_int           pc_other_cpus;          /* Mask of all other cpus */
+       u_int32_t       pc_int_pending;         /* master int pending flag */
+       u_int32_t       pc_ipending;            /* pending slow interrupts */
+       u_int32_t       pc_fpending;            /* pending fast interrupts */
+       u_int32_t       pc_spending;            /* pending soft interrupts */
        SLIST_ENTRY(pcpu) pc_allcpu;
        struct lock_list_entry *pc_spinlocks;
 #ifdef KTR_PERCPU

To Unsubscribe: send mail to [EMAIL PROTECTED]
with "unsubscribe freebsd-current" in the body of the message

Reply via email to