First, expand the INTR_RESTORE_GPRS macro and then rearrange the register 
restores around the INTR_RESTORE_SELECTORS macro to minimize how long 
interupts are blocked.  This also lets us eliminate all the adjusting of 
the stack pointer except for the necessary one on the iretq path.

Instead of having completely separate "entered via int$80" and syscall 
paths, have the int$80 path set the MDP_IRET flag in md_proc and then jump 
to the common "call syscall() and handle ASTs" code.  Then, after that, 
check the MDP_IRET flag and use the correct return path.  This lets us set 
MDP_IRET in the kernel to force return via iretq despite entering via 
syscall.  With *that* we can change sigcode to invoke sigreturn via 
syscall instead of int$80, have sigreturn set MDP_IRET, and then return 
via iretq, which is necessary for correct restoring of rcx and r11 when 
interrupted.

Been working for quite a while for me, even testing the "real interrupt 
restoring r11 and rcx correctly part".

Tests please from people running amd64...


Philip


Index: locore.S
===================================================================
RCS file: /cvs/src/sys/arch/amd64/amd64/locore.S,v
retrieving revision 1.45
diff -u -p -r1.45 locore.S
--- locore.S    5 Apr 2011 21:14:00 -0000       1.45
+++ locore.S    5 Apr 2011 21:30:02 -0000
@@ -650,7 +650,7 @@ NENTRY(sigcode)
        movq    %rsp,%rdi
        pushq   %rdi                    /* fake return address */
        movq    $SYS_sigreturn,%rax
-       int     $0x80
+       syscall
        movq    $SYS_exit,%rax
        syscall
        .globl  _C_LABEL(esigcode)
@@ -935,8 +935,9 @@ IDTVEC(syscall)
        movq    $T_ASTFLT, TF_TRAPNO(%rsp)
 
        movq    CPUVAR(CURPROC),%r14
-       movq    %rsp,P_MD_REGS(%r14)    # save pointer to frame
        andl    $~MDP_IRET,P_MD_FLAGS(%r14)
+call_syscall:
+       movq    %rsp,P_MD_REGS(%r14)    # save pointer to frame
        movq    %rsp,%rdi
        call    _C_LABEL(syscall)
 1:     /* Check for ASTs on exit to user mode. */
@@ -959,16 +960,27 @@ syscall_return:
        cmpl    $IPL_NONE,CPUVAR(ILEVEL)
        jne     3f
 #endif
-       /*
-        * XXX interrupts off longer than they should be here.
-        */
+
+       movq    TF_RDI(%rsp),%rdi
+       movq    TF_RSI(%rsp),%rsi
+       movq    TF_R8(%rsp),%r8
+       movq    TF_R9(%rsp),%r9
+       movq    TF_R10(%rsp),%r10
+       movq    TF_R12(%rsp),%r12
+       movq    TF_R13(%rsp),%r13
+       movq    TF_R14(%rsp),%r14
+       movq    TF_R15(%rsp),%r15
+       movq    TF_RBP(%rsp),%rbp
+       movq    TF_RBX(%rsp),%rbx
+
        INTR_RESTORE_SELECTORS
-       INTR_RESTORE_GPRS
-       addq    $48,%rsp
-       popq    %rcx    /* return rip */
-       addq    $8,%rsp
-       popq    %r11    /* flags as set by sysret insn */
-       movq    %ss:(%rsp),%rsp
+
+       movq    TF_RDX(%rsp),%rdx
+       movq    TF_RAX(%rsp),%rax
+
+       movq    TF_RIP(%rsp),%rcx
+       movq    TF_RFLAGS(%rsp),%r11
+       movq    TF_RSP(%rsp),%rsp
        sysretq
 
 #ifdef DIAGNOSTIC
@@ -1007,47 +1019,52 @@ NENTRY(child_trampoline)
        call    *%r12
        jmp     syscall_return
 
-       .globl  _C_LABEL(osyscall_return)
-
 
 /*
- * Trap gate entry for int $80 syscall, also used by sigreturn.
+ * Trap gate entry for old int $80 syscall (used to be used by sigreturn)
  */
 IDTVEC(osyscall)
        pushq   $2              # size of instruction for restart
        pushq   $T_ASTFLT       # trap # for doing ASTs
        INTRENTRY
        sti
-       movq    CPUVAR(CURPROC),%rdx
-       movq    %rsp,P_MD_REGS(%rdx)    # save pointer to frame
-       movq    %rsp,%rdi
-       call    _C_LABEL(syscall)
-_C_LABEL(osyscall_return):
-2:     /* Check for ASTs on exit to user mode. */
-       cli
-       CHECK_ASTPENDING(%r11)
-       je      1f
-       /* Always returning to user mode here. */
-       CLEAR_ASTPENDING(%r11)
-       sti
-       /* Pushed T_ASTFLT into tf_trapno on entry. */
-       movq    %rsp,%rdi
-       call    _C_LABEL(trap)
-       jmp     2b
+       movq    CPUVAR(CURPROC),%r14
+       orl     $MDP_IRET,P_MD_FLAGS(%r14)
+       jmp     call_syscall
 
+/*
+ * Return via iretq, for real interrupts and signal returns
+ */
 iret_return:
-1:
 #ifdef DIAGNOSTIC
        cmpl    $IPL_NONE,CPUVAR(ILEVEL)
        jne     3f
-#endif /* DIAGNOSTIC */
+#endif
        .globl  intr_fast_exit
 intr_fast_exit:
+       movq    TF_RDI(%rsp),%rdi
+       movq    TF_RSI(%rsp),%rsi
+       movq    TF_R8(%rsp),%r8
+       movq    TF_R9(%rsp),%r9
+       movq    TF_R10(%rsp),%r10
+       movq    TF_R12(%rsp),%r12
+       movq    TF_R13(%rsp),%r13
+       movq    TF_R14(%rsp),%r14
+       movq    TF_R15(%rsp),%r15
+       movq    TF_RBP(%rsp),%rbp
+       movq    TF_RBX(%rsp),%rbx
+
        testq   $SEL_UPL,TF_CS(%rsp)
        je      5f
+
        INTR_RESTORE_SELECTORS
-5:     INTR_RESTORE_GPRS
-       addq    $48,%rsp
+
+5:     movq    TF_RDX(%rsp),%rdx
+       movq    TF_RCX(%rsp),%rcx
+       movq    TF_R11(%rsp),%r11
+       movq    TF_RAX(%rsp),%rax
+       addq    $TF_RIP,%rsp
+
        .globl  _C_LABEL(doreti_iret)
 _C_LABEL(doreti_iret):
        iretq
Index: machdep.c
===================================================================
RCS file: /cvs/src/sys/arch/amd64/amd64/machdep.c,v
retrieving revision 1.135
diff -u -p -r1.135 machdep.c
--- machdep.c   5 Apr 2011 21:14:00 -0000       1.135
+++ machdep.c   5 Apr 2011 21:30:03 -0000
@@ -674,6 +674,7 @@ sys_sigreturn(struct proc *p, void *v, r
        else
                p->p_sigacts->ps_sigstk.ss_flags &= ~SS_ONSTACK;
        p->p_sigmask = ksc.sc_mask & ~sigcantmask;
+       p->p_md.md_flags |= MDP_IRET;
 
        return (EJUSTRETURN);
 }

Reply via email to