Hello, I've dug quite a bit why gnumach on kvm on kvm is slugguish. What happens is that spl7 (the scheduler level for interrupts) is using a PIC mask to avoid reentrancy. That is fine on real hardware, but in a virtualized environment, this is very expensive since it'll need a VMEXIT, then simulation in the host. It happens than gnumach is sometimes doing it a couple hundred thousand times a second! Attached patch2 allows to measure this, I do get 200000 during device creation at debian installer boot.
The attached patch fixes this by making spl.S use just cli/sti for spl7; they can be emulated fine without any VMEXIT. This lowers the number of PIC mask changes to a few hundreds or thousands only, and getting the gnumach on kvm on kvm case way faster. Does anybody with a bit of background with spl (I guess Richard?) see any red flag here? Samuel
Make spl7 just clear IF instead of setting the PIC mask * i386/i386/spl.S (spl7): Just set curr_ipl and cli. (splx) [MACH_KDB || MACH_TTD]: When curr_ipl is 7, make sure that IF is cleared. (splx): When staying at ipl7, do not enable interrupts. (spl) [MACH_KDB || MACH_TTD]: When curr_ipl is 7, make sure that IF is cleared. (spl): When new ipl is 7, branch to spl7. * i386/i386/locore.S (TIME_TRAP_UENTRY, TIME_TRAP_SENTRY): Save flags, and restore them instead of blindly using sti. --- a/i386/i386/spl.S +++ b/i386/i386/spl.S @@ -163,15 +163,34 @@ Entry(splsched) Entry(splhigh) Entry(splhi) ENTRY(spl7) - SETIPL(SPL7) + movl $SPL7,%eax + xchgl EXT(curr_ipl),%eax + cli + ret ENTRY(splx) movl S_ARG0,%edx /* get ipl */ + +#if MACH_KDB || MACH_TTD + /* First make sure that if we're exitting from ipl7, IF is still cleared */ + cmpl $SPL7,EXT(curr_ipl) /* from ipl7? */ + jne 0f + pushfl + popl %eax + testl $0x200,%eax /* IF? */ + jz 0f + int3 /* Oops, interrupts got enabled?! */ + +0: +#endif /* MACH_KDB || MACH_TTD */ testl %edx,%edx /* spl0? */ jz EXT(spl0) /* yes, handle specially */ cmpl EXT(curr_ipl),%edx /* same ipl as current? */ jne spl /* no */ + cmpl $SPL7,%edx /* spl7? */ + je 1f /* to ipl7, don't enable interrupts */ sti /* ensure interrupts are enabled */ +1: movl %edx,%eax /* return previous ipl */ ret @@ -230,6 +249,20 @@ splx_cli: .align TEXT_ALIGN .globl spl spl: +#if MACH_KDB || MACH_TTD + /* First make sure that if we're exitting from ipl7, IF is still cleared */ + cmpl $SPL7,EXT(curr_ipl) /* from ipl7? */ + jne 0f + pushfl + popl %eax + testl $0x200,%eax /* IF? */ + jz 0f + int3 /* Oops, interrupts got enabled?! */ + +0: +#endif /* MACH_KDB || MACH_TTD */ + cmpl $SPL7,%edx /* spl7? */ + je EXT(spl7) /* yes, handle specially */ movl EXT(pic_mask)(,%edx,4),%eax /* get PIC mask */ cli /* disable interrupts */ --- a/i386/i386/locore.S +++ b/i386/i386/locore.S @@ -119,6 +119,7 @@ LEXT(retry_table_end) ;\ * Uses %eax, %ebx, %ecx. */ #define TIME_TRAP_UENTRY \ + pushf /* Save flags */ ;\ cli /* block interrupts */ ;\ movl VA_ETC,%ebx /* get timer value */ ;\ movl CX(EXT(current_tstamp),%edx),%ecx /* get old time stamp */;\ @@ -131,7 +132,7 @@ LEXT(retry_table_end) ;\ 0: addl $(TH_SYSTEM_TIMER-TH_USER_TIMER),%ecx ;\ /* switch to sys timer */;\ movl %ecx,CX(EXT(current_timer),%edx) /* make it current */ ;\ - sti /* allow interrupts */ + popf /* allow interrupts */ /* * Update time on system call entry. @@ -141,6 +142,7 @@ LEXT(retry_table_end) ;\ * Same as TIME_TRAP_UENTRY, but preserves %eax. */ #define TIME_TRAP_SENTRY \ + pushf /* Save flags */ ;\ cli /* block interrupts */ ;\ movl VA_ETC,%ebx /* get timer value */ ;\ movl CX(EXT(current_tstamp),%edx),%ecx /* get old time stamp */;\ @@ -155,7 +157,7 @@ LEXT(retry_table_end) ;\ 0: addl $(TH_SYSTEM_TIMER-TH_USER_TIMER),%ecx ;\ /* switch to sys timer */;\ movl %ecx,CX(EXT(current_timer),%edx) /* make it current */ ;\ - sti /* allow interrupts */ + popf /* allow interrupts */ /* * update time on user trap exit.
--- a/i386/i386/spl.S +++ b/i386/i386/spl.S @@ -41,6 +41,19 @@ movl %edx,%eax; \ ret + .data + .align DATA_ALIGN +.globl lastspl1 +lastspl1: + .long 0 +.globl lastspl2 +lastspl2: + .long 0 +.globl splnb +splnb: + .long 0 + .text + /* * Program PICs with mask in %eax. */ @@ -48,6 +61,16 @@ #define SETMASK() \ cmpl EXT(curr_pic_mask),%eax; \ je 9f; \ + incl splnb; \ + cmpl $123,splnb; \ + jnz 8f; \ + pushl %ebx; \ + movl 4(%esp),%ebx; \ + movl %ebx,lastspl1; \ + movl 8(%esp),%ebx; \ + movl %ebx,lastspl2; \ + popl %ebx; \ +8: \ outb %al,$(PIC_MASTER_OCW); \ movl %eax,EXT(curr_pic_mask); \ movb %ah,%al; \ --- a/kern/mach_clock.c +++ b/kern/mach_clock.c @@ -127,6 +127,15 @@ void clock_interrupt( { int my_cpu = cpu_number(); thread_t thread = current_thread(); + static int count; + + if (count++ == 100) { + extern int splnb; + extern void *lastspl1, *lastspl2; + printf("\n\n0x%p 0x%p %d\n\n", lastspl1, lastspl2, splnb); + splnb = 0; + count = 0; + } counter(c_clock_ticks++); counter(c_threads_total += c_threads_current);