Hello,

I've dug quite a bit why gnumach on kvm on kvm is slugguish.  What
happens is that spl7 (the scheduler level for interrupts) is using a
PIC mask to avoid reentrancy.  That is fine on real hardware, but in
a virtualized environment, this is very expensive since it'll need
a VMEXIT, then simulation in the host.  It happens than gnumach is
sometimes doing it a couple hundred thousand times a second!  Attached
patch2 allows to measure this, I do get 200000 during device creation at
debian installer boot.

The attached patch fixes this by making spl.S use just cli/sti for spl7;
they can be emulated fine without any VMEXIT.  This lowers the number of
PIC mask changes to a few hundreds or thousands only, and getting the
gnumach on kvm on kvm case way faster.

Does anybody with a bit of background with spl (I guess Richard?) see
any red flag here?

Samuel
Make spl7 just clear IF instead of setting the PIC mask

* i386/i386/spl.S (spl7): Just set curr_ipl and cli.
(splx) [MACH_KDB || MACH_TTD]: When curr_ipl is 7, make sure that IF is cleared.
(splx): When staying at ipl7, do not enable interrupts.
(spl) [MACH_KDB || MACH_TTD]: When curr_ipl is 7, make sure that IF is cleared.
(spl): When new ipl is 7, branch to spl7.
* i386/i386/locore.S (TIME_TRAP_UENTRY, TIME_TRAP_SENTRY): Save flags, and
restore them instead of blindly using sti.

--- a/i386/i386/spl.S
+++ b/i386/i386/spl.S
@@ -163,15 +163,34 @@ Entry(splsched)
 Entry(splhigh)
 Entry(splhi)
 ENTRY(spl7)
-       SETIPL(SPL7)
+       movl    $SPL7,%eax
+       xchgl   EXT(curr_ipl),%eax
+       cli
+       ret
 
 ENTRY(splx)
        movl    S_ARG0,%edx             /* get ipl */
+
+#if MACH_KDB || MACH_TTD
+       /* First make sure that if we're exitting from ipl7, IF is still 
cleared */
+       cmpl    $SPL7,EXT(curr_ipl)     /* from ipl7? */
+       jne     0f
+       pushfl
+       popl    %eax
+       testl   $0x200,%eax             /* IF? */
+       jz      0f
+       int3                            /* Oops, interrupts got enabled?! */
+
+0:
+#endif /* MACH_KDB || MACH_TTD */
        testl   %edx,%edx               /* spl0? */
        jz      EXT(spl0)               /* yes, handle specially */
        cmpl    EXT(curr_ipl),%edx      /* same ipl as current? */
        jne     spl                     /* no */
+       cmpl    $SPL7,%edx              /* spl7? */
+       je      1f                      /* to ipl7, don't enable interrupts */
        sti                             /* ensure interrupts are enabled */
+1:
        movl    %edx,%eax               /* return previous ipl */
        ret
 
@@ -230,6 +249,20 @@ splx_cli:
        .align  TEXT_ALIGN
        .globl  spl
 spl:
+#if MACH_KDB || MACH_TTD
+       /* First make sure that if we're exitting from ipl7, IF is still 
cleared */
+       cmpl    $SPL7,EXT(curr_ipl)     /* from ipl7? */
+       jne     0f
+       pushfl
+       popl    %eax
+       testl   $0x200,%eax             /* IF? */
+       jz      0f
+       int3                            /* Oops, interrupts got enabled?! */
+
+0:
+#endif /* MACH_KDB || MACH_TTD */
+       cmpl    $SPL7,%edx              /* spl7? */
+       je      EXT(spl7)               /* yes, handle specially */
        movl    EXT(pic_mask)(,%edx,4),%eax
                                        /* get PIC mask */
        cli                             /* disable interrupts */
--- a/i386/i386/locore.S
+++ b/i386/i386/locore.S
@@ -119,6 +119,7 @@ LEXT(retry_table_end)                       ;\
  * Uses %eax, %ebx, %ecx.
  */
 #define        TIME_TRAP_UENTRY \
+       pushf                                   /* Save flags */        ;\
        cli                                     /* block interrupts */  ;\
        movl    VA_ETC,%ebx                     /* get timer value */   ;\
        movl    CX(EXT(current_tstamp),%edx),%ecx       /* get old time stamp 
*/;\
@@ -131,7 +132,7 @@ LEXT(retry_table_end)                       ;\
 0:     addl    $(TH_SYSTEM_TIMER-TH_USER_TIMER),%ecx                   ;\
                                                /* switch to sys timer */;\
        movl    %ecx,CX(EXT(current_timer),%edx)        /* make it current */   
;\
-       sti                                     /* allow interrupts */
+       popf                                    /* allow interrupts */
 
 /*
  * Update time on system call entry.
@@ -141,6 +142,7 @@ LEXT(retry_table_end)                       ;\
  * Same as TIME_TRAP_UENTRY, but preserves %eax.
  */
 #define        TIME_TRAP_SENTRY \
+       pushf                                   /* Save flags */        ;\
        cli                                     /* block interrupts */  ;\
        movl    VA_ETC,%ebx                     /* get timer value */   ;\
        movl    CX(EXT(current_tstamp),%edx),%ecx       /* get old time stamp 
*/;\
@@ -155,7 +157,7 @@ LEXT(retry_table_end)                       ;\
 0:     addl    $(TH_SYSTEM_TIMER-TH_USER_TIMER),%ecx                   ;\
                                                /* switch to sys timer */;\
        movl    %ecx,CX(EXT(current_timer),%edx)        /* make it current */   
;\
-       sti                                     /* allow interrupts */
+       popf                                    /* allow interrupts */
 
 /*
  * update time on user trap exit.
--- a/i386/i386/spl.S
+++ b/i386/i386/spl.S
@@ -41,6 +41,19 @@
        movl    %edx,%eax;              \
        ret
 
+       .data
+       .align  DATA_ALIGN
+.globl  lastspl1
+lastspl1:
+       .long   0
+.globl  lastspl2
+lastspl2:
+       .long   0
+.globl splnb
+splnb:
+       .long   0
+       .text
+
 /*
  * Program PICs with mask in %eax.
  */
@@ -48,6 +61,16 @@
 #define SETMASK()                              \
        cmpl    EXT(curr_pic_mask),%eax;        \
        je      9f;                             \
+       incl    splnb;                          \
+       cmpl    $123,splnb;                     \
+       jnz     8f;                             \
+       pushl   %ebx;                           \
+       movl    4(%esp),%ebx;                   \
+       movl    %ebx,lastspl1;                  \
+       movl    8(%esp),%ebx;                   \
+       movl    %ebx,lastspl2;                  \
+       popl    %ebx;                           \
+8:                                             \
        outb    %al,$(PIC_MASTER_OCW);          \
        movl    %eax,EXT(curr_pic_mask);        \
        movb    %ah,%al;                        \
--- a/kern/mach_clock.c
+++ b/kern/mach_clock.c
@@ -127,6 +127,15 @@ void clock_interrupt(
 {
        int             my_cpu = cpu_number();
        thread_t        thread = current_thread();
+       static int count;
+
+       if (count++ == 100) {
+               extern int splnb;
+               extern void *lastspl1, *lastspl2;
+               printf("\n\n0x%p 0x%p %d\n\n", lastspl1, lastspl2, splnb);
+               splnb = 0;
+               count = 0;
+       }
 
        counter(c_clock_ticks++);
        counter(c_threads_total += c_threads_current);

Reply via email to