Hello, For hackers, The following patch makes the kernel preemptable. It is against 2.4.0-prerelease on for i386 only. It should work for UP and SMP even though I didn't validate it on SMP. Comments are welcome. NOTES: since the lock implementation is modified, you need obviously to re-compile all your modules. I introduced a dependency between spinlock.h and sched.h and this has some bad side effects: Some files will generate warnings during the compilation complaining that disable_preempt()/enable_preempt() are not defined. The warnings should be harmless but I was too lazy to fix all of them. If the compilation fails because of that, there is big chance to fix things by including sched.h in the cfile. Ludo.
diff -u --recursive linux-2.4-prerelease.org/arch/i386/kernel/apic.c linux-2.4-prerelease/arch/i386/kernel/apic.c --- linux-2.4-prerelease.org/arch/i386/kernel/apic.c Wed Jan 3 17:19:44 2001 +++ linux-2.4-prerelease/arch/i386/kernel/apic.c Wed Jan 3 12:58:57 2001 @@ -726,6 +726,7 @@ * interrupt lock, which is the WrongThing (tm) to do. */ irq_enter(cpu, 0); + disable_preempt(); smp_local_timer_interrupt(regs); irq_exit(cpu, 0); } @@ -746,6 +747,8 @@ if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f))) ack_APIC_irq(); + disable_preempt(); + /* see sw-dev-man vol 3, chapter 7.4.13.5 */ printk(KERN_INFO "spurious APIC interrupt on CPU#%d, should never happen.\n", smp_processor_id()); @@ -776,6 +779,9 @@ 6: Received illegal vector 7: Illegal register address */ + + disable_preempt(); + printk (KERN_ERR "APIC error on CPU%d: %02lx(%02lx)\n", smp_processor_id(), v , v1); } diff -u --recursive linux-2.4-prerelease.org/arch/i386/kernel/entry.S linux-2.4-prerelease/arch/i386/kernel/entry.S --- linux-2.4-prerelease.org/arch/i386/kernel/entry.S Wed Jan 3 17:19:44 2001 +++ linux-2.4-prerelease/arch/i386/kernel/entry.S Wed Jan 3 12:58:57 2001 @@ -79,6 +79,8 @@ need_resched = 20 tsk_ptrace = 24 processor = 52 +preemptable = 56 + ENOSYS = 38 @@ -203,6 +205,9 @@ call *SYMBOL_NAME(sys_call_table)(,%eax,4) movl %eax,EAX(%esp) # save the return value ENTRY(ret_from_sys_call) + movl $1, %edx + lock + xaddl %edx, preemptable(%ebx) #ifdef CONFIG_SMP movl processor(%ebx),%eax shll $CONFIG_X86_L1_CACHE_SHIFT,%eax @@ -213,13 +218,22 @@ testl SYMBOL_NAME(irq_stat)+4,%ecx # softirq_mask #endif jne handle_softirq - + cmpl $0, %edx # task is preemptable ? + jne check_signal ret_with_reschedule: cmpl $0,need_resched(%ebx) jne reschedule +check_signal: +#if 0 + movl EFLAGS(%esp), %eax # mix EFLAGS and CS + movb CS(%esp), %al + testl $(VM_MASK | 3), %eax # return to user mode ? + je restore_all # no bypass signal check +#endif cmpl $0,sigpending(%ebx) jne signal_return restore_all: + decl preemptable(%ebx) RESTORE_ALL ALIGN @@ -270,14 +284,22 @@ #endif jne handle_softirq +/* + * ret_from_intr is the common path used to return + * from interruptions (either hard of soft) and exceptions. + * At that point the preemption is disabled + * (see do_IRQ and handle_softirq) + * Reenable the preemption, verify that the current thread + * is preemptable and check for a pending scheduling request. + */ ENTRY(ret_from_intr) GET_CURRENT(%ebx) - movl EFLAGS(%esp),%eax # mix EFLAGS and CS - movb CS(%esp),%al - testl $(VM_MASK | 3),%eax # return to VM86 mode or non-supervisor? - jne ret_with_reschedule - jmp restore_all - + cmpl $1, preemptable(%ebx) + jne restore_all + cmpl $0, state(%ebx) # current task is running ? + jne restore_all + jmp ret_with_reschedule + ALIGN handle_softirq: call SYMBOL_NAME(do_softirq) @@ -286,6 +308,7 @@ ALIGN reschedule: call SYMBOL_NAME(schedule) # test + decl preemptable(%ebx) jmp ret_from_sys_call ENTRY(divide_error) @@ -316,6 +339,13 @@ movl %edx,%ds movl %edx,%es GET_CURRENT(%ebx) +/* + * All exceptions are called with the preemption disabled. + * In addition, some of them (page_fault) are not reentrant + * and need to be atomic until the preemption can be disabled. + */ + incl preemptable(%ebx) + sti call *%edi addl $8,%esp jmp ret_from_exception @@ -334,6 +364,7 @@ pushl $-1 # mark this as an int SAVE_ALL GET_CURRENT(%ebx) + incl preemptable(%ebx) pushl $ret_from_exception movl %cr0,%eax testl $0x4,%eax # EM (math emulation bit) diff -u --recursive linux-2.4-prerelease.org/arch/i386/kernel/irq.c linux-2.4-prerelease/arch/i386/kernel/irq.c --- linux-2.4-prerelease.org/arch/i386/kernel/irq.c Wed Jan 3 17:19:44 2001 +++ linux-2.4-prerelease/arch/i386/kernel/irq.c Wed Jan 3 12:58:57 2001 @@ -564,6 +564,12 @@ unsigned int status; kstat.irqs[cpu][irq]++; + /* + * Disable preemption for the current task. + * ret_from_intr will reenable the preemption and + * check for a scheduling request. + */ + disable_preempt(); spin_lock(&desc->lock); desc->handler->ack(irq); /* diff -u --recursive linux-2.4-prerelease.org/arch/i386/kernel/smp.c linux-2.4-prerelease/arch/i386/kernel/smp.c --- linux-2.4-prerelease.org/arch/i386/kernel/smp.c Wed Jan 3 17:19:44 2001 +++ linux-2.4-prerelease/arch/i386/kernel/smp.c Wed Jan 3 12:58:57 2001 @@ -277,6 +277,8 @@ { unsigned long cpu = smp_processor_id(); + disable_preempt(); + if (!test_bit(cpu, &flush_cpumask)) return; /* @@ -518,6 +520,7 @@ asmlinkage void smp_reschedule_interrupt(void) { ack_APIC_irq(); + disable_preempt(); } asmlinkage void smp_call_function_interrupt(void) @@ -532,6 +535,7 @@ * about to execute the function */ atomic_inc(&call_data->started); + disable_preempt(); /* * At this point the info structure may be out of scope unless wait==1 */ diff -u --recursive linux-2.4-prerelease.org/arch/i386/kernel/traps.c linux-2.4-prerelease/arch/i386/kernel/traps.c --- linux-2.4-prerelease.org/arch/i386/kernel/traps.c Wed Jan 3 17:19:44 2001 +++ linux-2.4-prerelease/arch/i386/kernel/traps.c Wed Jan 3 12:58:57 2001 @@ -958,7 +958,7 @@ set_trap_gate(11,&segment_not_present); set_trap_gate(12,&stack_segment); set_trap_gate(13,&general_protection); - set_trap_gate(14,&page_fault); + set_intr_gate(14,&page_fault); set_trap_gate(15,&spurious_interrupt_bug); set_trap_gate(16,&coprocessor_error); set_trap_gate(17,&alignment_check); diff -u --recursive linux-2.4-prerelease.org/arch/i386/lib/dec_and_lock.c linux-2.4-prerelease/arch/i386/lib/dec_and_lock.c --- linux-2.4-prerelease.org/arch/i386/lib/dec_and_lock.c Wed Jan 3 17:19:44 2001 +++ linux-2.4-prerelease/arch/i386/lib/dec_and_lock.c Wed Jan 3 12:58:57 2001 @@ -9,6 +9,7 @@ #include <linux/spinlock.h> #include <asm/atomic.h> +#include <linux/sched.h> int atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock) { diff -u --recursive linux-2.4-prerelease.org/arch/i386/mm/fault.c linux-2.4-prerelease/arch/i386/mm/fault.c --- linux-2.4-prerelease.org/arch/i386/mm/fault.c Wed Jan 3 17:19:44 2001 +++ linux-2.4-prerelease/arch/i386/mm/fault.c Wed Jan 3 12:58:57 2001 @@ -112,6 +112,8 @@ unsigned long page; unsigned long fixup; int write; + int ret; + siginfo_t info; /* get the address */ @@ -193,7 +195,17 @@ * make sure we exit gracefully rather than endlessly redo * the fault. */ - switch (handle_mm_fault(mm, vma, address, write)) { + + /* + * Re-enable the preemption before calling the generic handler. + * This is rather for fun and to validate things a bit since + * the mm semaphore is hold at that point and that can cause + * a lot of contentions. + */ + enable_preempt(); + ret = handle_mm_fault(mm, vma, address, write); + disable_preempt(); + switch (ret) { case 1: tsk->min_flt++; break; diff -u --recursive linux-2.4-prerelease.org/drivers/pcmcia/ds.c linux-2.4-prerelease/drivers/pcmcia/ds.c --- linux-2.4-prerelease.org/drivers/pcmcia/ds.c Wed Jan 3 17:19:44 2001 +++ linux-2.4-prerelease/drivers/pcmcia/ds.c Wed Jan 3 12:58:57 2001 @@ -880,7 +880,16 @@ int i, ret; DEBUG(0, "%s\n", version); - +#if 1 + /* + * I got some problems with PCMCIA initialization and a + * preemptive kernel; + * init_pcmcia_ds() beeing called before the completion + * of pending scheduled tasks. I don't know if this is the + * right fix though. + */ + flush_scheduled_tasks(); +#endif /* * Ugly. But we want to wait for the socket threads to have started up. * We really should let the drivers themselves drive some of this.. diff -u --recursive linux-2.4-prerelease.org/include/asm-i386/smplock.h linux-2.4-prerelease/include/asm-i386/smplock.h --- linux-2.4-prerelease.org/include/asm-i386/smplock.h Wed Jan 3 17:19:44 2001 +++ linux-2.4-prerelease/include/asm-i386/smplock.h Wed Jan 3 17:29:36 2001 @@ -17,6 +17,7 @@ */ #define release_kernel_lock(task, cpu) \ do { \ + disable_preempt(); \ if (task->lock_depth >= 0) \ spin_unlock(&kernel_flag); \ release_irqlock(cpu); \ @@ -30,6 +31,7 @@ do { \ if (task->lock_depth >= 0) \ spin_lock(&kernel_flag); \ + enable_preempt(); \ } while (0) @@ -43,6 +45,7 @@ extern __inline__ void lock_kernel(void) { #if 1 + disable_preempt(); if (!++current->lock_depth) spin_lock(&kernel_flag); #else @@ -63,6 +66,7 @@ #if 1 if (--current->lock_depth < 0) spin_unlock(&kernel_flag); + enable_preempt(); #else __asm__ __volatile__( "decl %1\n\t" diff -u --recursive linux-2.4-prerelease.org/include/asm-i386/softirq.h linux-2.4-prerelease/include/asm-i386/softirq.h --- linux-2.4-prerelease.org/include/asm-i386/softirq.h Wed Jan 3 17:19:44 2001 +++ linux-2.4-prerelease/include/asm-i386/softirq.h Wed Jan 3 14:21:58 2001 @@ -7,8 +7,10 @@ #define cpu_bh_disable(cpu) do { local_bh_count(cpu)++; barrier(); } while (0) #define cpu_bh_enable(cpu) do { barrier(); local_bh_count(cpu)--; } while (0) -#define local_bh_disable() cpu_bh_disable(smp_processor_id()) -#define local_bh_enable() cpu_bh_enable(smp_processor_id()) +#define local_bh_disable() \ +do { disable_preempt(); cpu_bh_disable(smp_processor_id()); } while (0) +#define local_bh_enable() \ +do { cpu_bh_enable(smp_processor_id()); enable_preempt(); } while (0) #define in_softirq() (local_bh_count(smp_processor_id()) != 0) diff -u --recursive linux-2.4-prerelease.org/include/asm-i386/spinlock.h linux-2.4-prerelease/include/asm-i386/spinlock.h --- linux-2.4-prerelease.org/include/asm-i386/spinlock.h Wed Jan 3 17:19:44 2001 +++ linux-2.4-prerelease/include/asm-i386/spinlock.h Wed Jan 3 14:21:58 2001 @@ -65,7 +65,7 @@ #define spin_unlock_string \ "movb $1,%0" -static inline int spin_trylock(spinlock_t *lock) +static inline int _spin_trylock(spinlock_t *lock) { char oldval; __asm__ __volatile__( @@ -75,7 +75,7 @@ return oldval > 0; } -static inline void spin_lock(spinlock_t *lock) +static inline void _spin_lock(spinlock_t *lock) { #if SPINLOCK_DEBUG __label__ here; @@ -90,7 +90,7 @@ :"=m" (lock->lock) : : "memory"); } -static inline void spin_unlock(spinlock_t *lock) +static inline void _spin_unlock(spinlock_t *lock) { #if SPINLOCK_DEBUG if (lock->magic != SPINLOCK_MAGIC) @@ -143,7 +143,7 @@ */ /* the spinlock helpers are in arch/i386/kernel/semaphore.c */ -static inline void read_lock(rwlock_t *rw) +static inline void _read_lock(rwlock_t *rw) { #if SPINLOCK_DEBUG if (rw->magic != RWLOCK_MAGIC) @@ -152,7 +152,7 @@ __build_read_lock(rw, "__read_lock_failed"); } -static inline void write_lock(rwlock_t *rw) +static inline void _write_lock(rwlock_t *rw) { #if SPINLOCK_DEBUG if (rw->magic != RWLOCK_MAGIC) @@ -161,8 +161,8 @@ __build_write_lock(rw, "__write_lock_failed"); } -#define read_unlock(rw) asm volatile("lock ; incl %0" :"=m" ((rw)->lock) : : "memory") -#define write_unlock(rw) asm volatile("lock ; addl $" RW_LOCK_BIAS_STR ",%0":"=m" ((rw)->lock) : : "memory") +#define _read_unlock(rw) asm volatile("lock ; incl %0" :"=m" ((rw)->lock) : : +"memory") +#define _write_unlock(rw) asm volatile("lock ; addl $" RW_LOCK_BIAS_STR +",%0":"=m" ((rw)->lock) : : "memory") static inline int write_trylock(rwlock_t *lock) { diff -u --recursive linux-2.4-prerelease.org/include/asm-i386/system.h linux-2.4-prerelease/include/asm-i386/system.h --- linux-2.4-prerelease.org/include/asm-i386/system.h Wed Jan 3 17:19:44 2001 +++ linux-2.4-prerelease/include/asm-i386/system.h Wed Jan 3 14:21:58 2001 @@ -306,6 +306,13 @@ #define local_irq_disable() __cli() #define local_irq_enable() __sti() +static inline int local_irq_are_enabled(void) +{ + unsigned long flags; + __save_flags(flags); + return (flags & 0x00000200); +} + #ifdef CONFIG_SMP extern void __global_cli(void); diff -u --recursive linux-2.4-prerelease.org/include/linux/sched.h linux-2.4-prerelease/include/linux/sched.h --- linux-2.4-prerelease.org/include/linux/sched.h Wed Jan 3 17:19:44 2001 +++ linux-2.4-prerelease/include/linux/sched.h Wed Jan 3 15:31:41 2001 @@ -296,6 +296,7 @@ unsigned long policy; struct mm_struct *mm; int has_cpu, processor; + atomic_t preemptable; unsigned long cpus_allowed; /* * (only the 'next' pointer fits into the cacheline, but @@ -443,6 +444,7 @@ policy: SCHED_OTHER, \ mm: NULL, \ active_mm: &init_mm, \ + preemptable: ATOMIC_INIT(0), \ cpus_allowed: -1, \ run_list: LIST_HEAD_INIT(tsk.run_list), \ next_task: &tsk, \ @@ -524,6 +526,7 @@ extern void free_uid(struct user_struct *); #include <asm/current.h> +#include <asm/hardirq.h> extern unsigned long volatile jiffies; extern unsigned long itimer_ticks; @@ -634,6 +637,41 @@ { return (current->sas_ss_size == 0 ? SS_DISABLE : on_sig_stack(sp) ? SS_ONSTACK : 0); +} + +static inline void disable_preempt(void) +{ + atomic_inc(¤t->preemptable); +} + +static inline void enable_preempt(void) +{ + if (atomic_read(¤t->preemptable) <= 0) { + BUG(); + } + if (atomic_read(¤t->preemptable) == 1) { + /* + * At that point a scheduling is healthy iff: + * - a scheduling request is pending. + * - the task is in running state. + * - this is not an interrupt context. + * - local interrupts are enabled. + */ + if (current->need_resched == 1 && + current->state == TASK_RUNNING && + !in_interrupt() && + local_irq_are_enabled()) + { + schedule(); + } + } + atomic_dec(¤t->preemptable); +} + +static inline int preemptable(void) +{ + return (!in_interrupt() && + !atomic_read(¤t->preemptable)); } extern int request_irq(unsigned int, diff -u --recursive linux-2.4-prerelease.org/include/linux/smp_lock.h linux-2.4-prerelease/include/linux/smp_lock.h --- linux-2.4-prerelease.org/include/linux/smp_lock.h Wed Jan 3 17:19:44 2001 +++ linux-2.4-prerelease/include/linux/smp_lock.h Wed Jan 3 17:30:04 2001 @@ -5,11 +5,37 @@ #ifndef CONFIG_SMP -#define lock_kernel() do { } while(0) -#define unlock_kernel() do { } while(0) -#define release_kernel_lock(task, cpu) do { } while(0) -#define reacquire_kernel_lock(task) do { } while(0) -#define kernel_locked() 1 +/* + * Release global kernel lock. + * Regarding preemption, this actually does the reverse - + */ +#define release_kernel_lock(task, cpu) \ +do { \ + disable_preempt(); \ +} while (0) + +/* + * Re-acquire the kernel lock + * Re-enable the preemption - see comments above. + * Note: enable_preempt() cannot not be called at + * that point (otherwise schedule() becomes reentrant). + */ +#define reacquire_kernel_lock(task) \ +do { \ + atomic_dec(¤t->preemptable); \ +} while (0) + +#define lock_kernel() \ +do { \ + disable_preempt(); \ +} while(0); + +#define unlock_kernel() \ +do { \ + enable_preempt(); \ +} while (0) + +#define kernel_locked() (!preemptable()) #else diff -u --recursive linux-2.4-prerelease.org/include/linux/spinlock.h linux-2.4-prerelease/include/linux/spinlock.h --- linux-2.4-prerelease.org/include/linux/spinlock.h Wed Jan 3 17:19:44 2001 +++ linux-2.4-prerelease/include/linux/spinlock.h Wed Jan 3 14:21:58 2001 @@ -3,33 +3,72 @@ #include <linux/config.h> +static inline void disable_preempt(void); +static inline void enable_preempt(void); + /* * These are the generic versions of the spinlocks and read-write * locks.. */ -#define spin_lock_irqsave(lock, flags) do { local_irq_save(flags); spin_lock(lock); } while (0) -#define spin_lock_irq(lock) do { local_irq_disable(); spin_lock(lock); } while (0) -#define spin_lock_bh(lock) do { local_bh_disable(); spin_lock(lock); } while (0) - -#define read_lock_irqsave(lock, flags) do { local_irq_save(flags); read_lock(lock); } while (0) -#define read_lock_irq(lock) do { local_irq_disable(); read_lock(lock); } while (0) -#define read_lock_bh(lock) do { local_bh_disable(); read_lock(lock); } while (0) - -#define write_lock_irqsave(lock, flags) do { local_irq_save(flags); write_lock(lock); } while (0) -#define write_lock_irq(lock) do { local_irq_disable(); write_lock(lock); } while (0) -#define write_lock_bh(lock) do { local_bh_disable(); write_lock(lock); } while (0) - -#define spin_unlock_irqrestore(lock, flags) do { spin_unlock(lock); local_irq_restore(flags); } while (0) -#define spin_unlock_irq(lock) do { spin_unlock(lock); local_irq_enable(); } while (0) -#define spin_unlock_bh(lock) do { spin_unlock(lock); local_bh_enable(); } while (0) - -#define read_unlock_irqrestore(lock, flags) do { read_unlock(lock); local_irq_restore(flags); } while (0) -#define read_unlock_irq(lock) do { read_unlock(lock); local_irq_enable(); } while (0) -#define read_unlock_bh(lock) do { read_unlock(lock); local_bh_enable(); } while (0) - -#define write_unlock_irqrestore(lock, flags) do { write_unlock(lock); local_irq_restore(flags); } while (0) -#define write_unlock_irq(lock) do { write_unlock(lock); local_irq_enable(); } while (0) -#define write_unlock_bh(lock) do { write_unlock(lock); local_bh_enable(); } while (0) +#define spin_lock_irqsave(lock, flags) \ + do { disable_preempt(); local_irq_save(flags); _spin_lock(lock); } while (0) +#define spin_lock_irq(lock) \ + do { disable_preempt(); local_irq_disable(); _spin_lock(lock); } while (0) +#define spin_lock_bh(lock) \ + do { disable_preempt(); local_bh_disable(); _spin_lock(lock); } while (0) + +#define read_lock_irqsave(lock, flags) \ + do { disable_preempt(); local_irq_save(flags); _read_lock(lock); } while (0) +#define read_lock_irq(lock) \ + do { disable_preempt(); local_irq_disable(); _read_lock(lock); } while (0) +#define read_lock_bh(lock) \ + do { disable_preempt(); local_bh_disable(); _read_lock(lock); } while (0) + +#define write_lock_irqsave(lock, flags) \ + do { disable_preempt(); local_irq_save(flags); _write_lock(lock); } while (0) +#define write_lock_irq(lock) \ + do { disable_preempt(); local_irq_disable(); _write_lock(lock); } while (0) +#define write_lock_bh(lock) \ + do { disable_preempt(); local_bh_disable(); _write_lock(lock); } while (0) + +#define spin_unlock_irqrestore(lock, flags) \ + do { _spin_unlock(lock); local_irq_restore(flags); enable_preempt(); } while +(0) +#define spin_unlock_irq(lock) \ + do { _spin_unlock(lock); local_irq_enable(); enable_preempt(); } while (0) +#define spin_unlock_bh(lock) \ + do { _spin_unlock(lock); local_bh_enable(); enable_preempt(); } while (0) + +#define read_unlock_irqrestore(lock, flags) \ + do { _read_unlock(lock); local_irq_restore(flags); enable_preempt(); } while +(0) +#define read_unlock_irq(lock) \ + do { _read_unlock(lock); local_irq_enable(); enable_preempt(); } while (0) +#define read_unlock_bh(lock) \ + do { _read_unlock(lock); local_bh_enable(); enable_preempt(); } while (0) + +#define write_unlock_irqrestore(lock, flags) \ + do { _write_unlock(lock); local_irq_restore(flags); enable_preempt(); } while +(0) +#define write_unlock_irq(lock) \ + do { _write_unlock(lock); local_irq_enable(); enable_preempt(); } while (0) +#define write_unlock_bh(lock) \ + do { _write_unlock(lock); local_bh_enable(); enable_preempt(); } while (0) + +#define spin_lock(lock) \ + do { disable_preempt(); _spin_lock(lock); } while (0) +#define spin_unlock(lock) \ + do { _spin_unlock(lock); enable_preempt(); } while (0) +#define spin_trylock(lock) \ + ({disable_preempt(); _spin_trylock(lock)? 1: (enable_preempt(), 0);}) + +#define read_lock(lock) \ + do { disable_preempt(); _read_lock(lock); } while (0) +#define read_unlock(lock) \ + do { _read_unlock(lock); enable_preempt(); } while (0) + +#define write_lock(lock) \ + do { disable_preempt(); _write_lock(lock); } while (0) +#define write_unlock(lock) \ + do { _write_unlock(lock); enable_preempt(); } while (0) + #ifdef CONFIG_SMP #include <asm/spinlock.h> @@ -40,8 +79,6 @@ #if (DEBUG_SPINLOCKS < 1) -#define atomic_dec_and_lock(atomic,lock) atomic_dec_and_test(atomic) - /* * Your basic spinlocks, allowing only a single CPU anywhere * @@ -56,11 +93,11 @@ #endif #define spin_lock_init(lock) do { } while(0) -#define spin_lock(lock) (void)(lock) /* Not "unused variable". */ +#define _spin_lock(lock) (void)(lock) /* Not "unused variable". */ #define spin_is_locked(lock) (0) -#define spin_trylock(lock) ({1; }) +#define _spin_trylock(lock) ({1; }) #define spin_unlock_wait(lock) do { } while(0) -#define spin_unlock(lock) do { } while(0) +#define _spin_unlock(lock) do { } while(0) #elif (DEBUG_SPINLOCKS < 2) @@ -71,11 +108,11 @@ #define spin_lock_init(x) do { (x)->lock = 0; } while (0) #define spin_is_locked(lock) (test_bit(0,(lock))) -#define spin_trylock(lock) (!test_and_set_bit(0,(lock))) +#define _spin_trylock(lock) (!test_and_set_bit(0,(lock))) -#define spin_lock(x) do { (x)->lock = 1; } while (0) +#define _spin_lock(x) do { (x)->lock = 1; } while (0) #define spin_unlock_wait(x) do { } while (0) -#define spin_unlock(x) do { (x)->lock = 0; } while (0) +#define _spin_unlock(x) do { (x)->lock = 0; } while (0) #else /* (DEBUG_SPINLOCKS >= 2) */ @@ -90,11 +127,11 @@ #define spin_lock_init(x) do { (x)->lock = 0; } while (0) #define spin_is_locked(lock) (test_bit(0,(lock))) -#define spin_trylock(lock) (!test_and_set_bit(0,(lock))) +#define _spin_trylock(lock) (!test_and_set_bit(0,(lock))) -#define spin_lock(x) do {unsigned long __spinflags; save_flags(__spinflags); cli(); if ((x)->lock&&(x)->babble) {printk("%s:%d: spin_lock(%s:%p) already locked\n", __BASE_FILE__,__LINE__, (x)->module, (x));(x)->babble--;} (x)->lock = 1; restore_flags(__spinflags);} while (0) +#define _spin_lock(x) do {unsigned long __spinflags; +save_flags(__spinflags); cli(); if ((x)->lock&&(x)->babble) {printk("%s:%d: +spin_lock(%s:%p) already locked\n", __BASE_FILE__,__LINE__, (x)->module, +(x));(x)->babble--;} (x)->lock = 1; restore_flags(__spinflags);} while (0) #define spin_unlock_wait(x) do {unsigned long __spinflags; save_flags(__spinflags); cli(); if ((x)->lock&&(x)->babble) {printk("%s:%d: spin_unlock_wait(%s:%p) deadlock\n", __BASE_FILE__,__LINE__, (x)->module, (x));(x)->babble--;} restore_flags(__spinflags);} while (0) -#define spin_unlock(x) do {unsigned long __spinflags; save_flags(__spinflags); cli(); if (!(x)->lock&&(x)->babble) {printk("%s:%d: spin_unlock(%s:%p) not locked\n", __BASE_FILE__,__LINE__, (x)->module, (x));(x)->babble--;} (x)->lock = 0; restore_flags(__spinflags);} while (0) +#define _spin_unlock(x) do {unsigned long __spinflags; +save_flags(__spinflags); cli(); if (!(x)->lock&&(x)->babble) {printk("%s:%d: +spin_unlock(%s:%p) not locked\n", __BASE_FILE__,__LINE__, (x)->module, +(x));(x)->babble--;} (x)->lock = 0; restore_flags(__spinflags);} while (0) #endif /* DEBUG_SPINLOCKS */ @@ -119,10 +156,10 @@ #endif #define rwlock_init(lock) do { } while(0) -#define read_lock(lock) (void)(lock) /* Not "unused variable". */ -#define read_unlock(lock) do { } while(0) -#define write_lock(lock) (void)(lock) /* Not "unused variable". */ -#define write_unlock(lock) do { } while(0) +#define _read_lock(lock) (void)(lock) /* Not "unused variable". */ +#define _read_unlock(lock) do { } while(0) +#define _write_lock(lock) (void)(lock) /* Not "unused variable". */ +#define _write_unlock(lock) do { } while(0) #endif /* !SMP */ diff -u --recursive linux-2.4-prerelease.org/kernel/fork.c linux-2.4-prerelease/kernel/fork.c --- linux-2.4-prerelease.org/kernel/fork.c Wed Jan 3 17:19:44 2001 +++ linux-2.4-prerelease/kernel/fork.c Wed Jan 3 17:27:38 2001 @@ -622,6 +622,7 @@ } #endif p->lock_depth = -1; /* -1 = no lock */ + atomic_set(&p->preemptable, 0); p->start_time = jiffies; retval = -ENOMEM; diff -u --recursive linux-2.4-prerelease.org/kernel/sched.c linux-2.4-prerelease/kernel/sched.c --- linux-2.4-prerelease.org/kernel/sched.c Wed Jan 3 17:19:44 2001 +++ linux-2.4-prerelease/kernel/sched.c Wed Jan 3 13:53:17 2001 @@ -550,6 +550,16 @@ del_from_runqueue(prev); case TASK_RUNNING: } + /* + * Check if the context switch is still necessary. + * This catches up things like if (need_resched) schedule() + * that is not atomic and open a window with a preemptive + * kernel where a task can be scheduled twice. + */ + if (prev->need_resched == 0 && prev->state == TASK_RUNNING) { + spin_unlock_irq(&runqueue_lock); + goto same_process; + } prev->need_resched = 0; /* @@ -1150,7 +1160,7 @@ printk(" %5d\n", p->p_osptr->pid); else printk("\n"); - + printk(" preemptable : %d\n", atomic_read(&p->preemptable)); { struct sigqueue *q; char s[sizeof(sigset_t)*2+1], b[sizeof(sigset_t)*2+1]; diff -u --recursive linux-2.4-prerelease.org/lib/dec_and_lock.c linux-2.4-prerelease/lib/dec_and_lock.c --- linux-2.4-prerelease.org/lib/dec_and_lock.c Wed Jan 3 17:19:44 2001 +++ linux-2.4-prerelease/lib/dec_and_lock.c Wed Jan 3 12:58:58 2001 @@ -1,5 +1,6 @@ #include <linux/spinlock.h> #include <asm/atomic.h> +#include <linux/sched.h> /* * This is an architecture-neutral, but slow,