 i386/Makefrag.am        |   2 +
 i386/i386/gdt.c         |  17 +++++
 i386/i386/gdt.h         |   7 +-
 i386/i386/locore.S      | 177 ++++++++++++++++++++++++++++++++++++++++++++++++
 i386/i386/pcb.c         |  24 +++----
 i386/i386/syscall.c     | 103 ++++++++++++++++++++++++++++
 i386/i386/syscall.h     |   7 ++
 i386/i386/tss.h         |   1 +
 i386/i386at/conf.c      |   8 +++
 i386/i386at/model_dep.c |   2 +
 10 files changed, 335 insertions(+), 13 deletions(-)
 create mode 100644 i386/i386/syscall.c
 create mode 100644 i386/i386/syscall.h

diff --git a/i386/Makefrag.am b/i386/Makefrag.am
index 4dd6a9f..f59ac29 100644
--- a/i386/Makefrag.am
+++ b/i386/Makefrag.am
@@ -147,6 +147,8 @@ libkernel_a_SOURCES += \
        i386/i386/setjmp.h \
        i386/i386/spl.S \
        i386/i386/spl.h \
+       i386/i386/syscall.c \
+       i386/i386/syscall.h \
        i386/i386/task.h \
        i386/i386/thread.h \
        i386/i386/time_stamp.h \
diff --git a/i386/i386/gdt.c b/i386/i386/gdt.c
index c895eb3..0f9d0e3 100644
--- a/i386/i386/gdt.c
+++ b/i386/i386/gdt.c
@@ -57,6 +57,23 @@ gdt_init(void)
                            LINEAR_MAX_KERNEL_ADDRESS - 
                            ACC_PL_K|ACC_DATA_W, SZ_32);
+       fill_gdt_descriptor(KERNEL_ENTER_CS,
+                           LINEAR_MAX_KERNEL_ADDRESS - 
+                           ACC_PL_K|ACC_CODE_R, SZ_32);
+       fill_gdt_descriptor(KERNEL_ENTER_DS,
+                           LINEAR_MAX_KERNEL_ADDRESS - 
+                           ACC_PL_K|ACC_DATA_W, SZ_32);
+       fill_gdt_descriptor(USER_EXIT_CS,
+                           VM_MIN_ADDRESS,
+                           VM_MAX_ADDRESS-VM_MIN_ADDRESS-4096,
+                           /* XXX LINEAR_... */
+                           ACC_PL_U|ACC_CODE_R, SZ_32);
+       fill_gdt_descriptor(USER_EXIT_DS,
+                           VM_MIN_ADDRESS,
+                           VM_MAX_ADDRESS-VM_MIN_ADDRESS-4096,
+                           ACC_PL_U|ACC_DATA_W, SZ_32);
 #ifndef        MACH_PV_DESCRIPTORS
diff --git a/i386/i386/gdt.h b/i386/i386/gdt.h
index d865640..37ca6f5 100644
--- a/i386/i386/gdt.h
+++ b/i386/i386/gdt.h
@@ -55,7 +55,12 @@
 #define        USER_GDT        0x48            /* user-defined GDT entries */
 #define        USER_GDT_SLOTS  2
-#define        GDTSZ           (USER_GDT/8 + USER_GDT_SLOTS)
+#define        KERNEL_ENTER_CS (0x58 | SEL_PL_K)               /* kernel code 
+#define        KERNEL_ENTER_DS (0x60 | SEL_PL_K)               /* kernel data 
+#define        USER_EXIT_CS    (0x68 | SEL_PL_U)               /* user code */
+#define        USER_EXIT_DS    (0x70 | SEL_PL_U)               /* user data */
+#define        GDTSZ           (USER_EXIT_DS/8 + 1)
 extern struct real_descriptor gdt[GDTSZ];
diff --git a/i386/i386/locore.S b/i386/i386/locore.S
index cfda86f..aa13c6b 100644
--- a/i386/i386/locore.S
+++ b/i386/i386/locore.S
@@ -592,6 +592,7 @@ ENTRY(thread_syscall_return)
        or      $(KERNEL_STACK_SIZE-1),%ecx
        movl    -3-IKS_SIZE(%ecx),%esp          /* switch back to PCB stack */
        movl    %eax,R_EAX(%esp)                /* save return value */
+       /* XXX make this work with sysenter */
        jmp     _return_from_trap
@@ -978,6 +979,18 @@ ttd_from_iret_i:                   /* on interrupt stack */
 #endif /* MACH_TTD */
+/* User stub for calling the kernel using the trap gate.  */
+       .globl user_trapgate_stub_start
+       popl %ecx               /* Pop return address into %ecx.  */
+       popl %eax               /* Pop syscall number into %eax.  */
+       pushl %ecx              /* Push back return address.  */
+       lcall $7, $0
+       subl $4, %esp           /* magic */
+       ret
+       .globl user_trapgate_stub_end
  * System call enters through a call gate.  Flags are not saved -
  * we must shuffle stack to look like trap save area.
@@ -1171,6 +1184,170 @@ syscall_addr:
        jmp     _take_trap              /* treat as a trap */
+/* User stub for calling the kernel using the sysenter instruction.  */
+       .globl user_sysenter_stub_start
+       push %ebp
+       mov %esp, %ebp
+       pushf
+       push %ebx
+       push %esi
+       push %edi               /* xxxmore callee-saved registers?  */
+       mov 8(%ebp), %eax       /* Move syscall number into %eax.  */
+       mov 12(%ebp), %ebx      /* Move first argument into %ebx.  */
+       mov 16(%ebp), %ecx      /* Move second argument into %ecx.  */
+       mov 20(%ebp), %edx      /* Move third argument into %edx.  */
+       mov 24(%ebp), %esi      /* Move fourth argument into %esi.  */
+       call get_ip             /* compute location of sysexit */
+get_ip:        pop %edi                /* load current ip */
+       add $8, %edi            /* userspace return address */
+       movl %esp, %ebp         /* userspace stack pointer */
+       sysenter
+       pop %edi
+       pop %esi
+       pop %ebx
+       popf
+       pop %ebp
+       ret
+       .globl user_sysenter_stub_end
+ * SYSENTER entry point.
+ *
+ * Calling convention:
+ *   %eax - syscall number
+ *   %ebx - syscall argument 1
+ *   %ecx - syscall argument 2
+ *   %edx - syscall argument 3
+ *   %esi - syscall argument 4
+ *   %edi - userspace return address
+ *   %ebp - userspace stack pointer
+ *
+ * Kernel Stack layout:
+ *  kernel stack base -> EAX
+ *                       ECX
+ *                       EDX
+ *                       EBX
+ *                       ESP
+ *                       EBP
+ *                       ESI
+ *                       EDI
+ * XXX: make this look like a trap save area to make thread_syscall_return work
+ */
+/* Offsets from %ebp */
+#define SE_EAX                 (4 * 7)
+#define SE_ECX                 (4 * 6)
+#define SE_EDX                 (4 * 5)
+#define SE_EBX                 (4 * 4)
+#define SE_ESX                 (4 * 3)
+#define SE_EBP                 (4 * 2)
+#define SE_ESI                 (4 * 1)
+#define SE_EDI                 (4 * 0)
+#define SE_STACK_POINTER       SE_EBP
+#define SE_USER_SKIP           20      /* skip past the scratchpad */
+       pusha                           /* save all registers */
+       mov     %esp, %ebp              /* to access the sysenter stack */
+       cld                             /* clear direction flag */
+       pushl   %ds                     /* save the segment registers */
+       pushl   %es
+       pushl   %fs
+       pushl   %gs
+       mov     %ss,%cx                 /* switch to kernel data segment */
+       mov     %cx,%ds
+       mov     %cx,%es
+       mov     %cx,%fs
+       mov     %cx,%gs
+       CPU_NUMBER(%edx)
+       movl    CX(EXT(kernel_stack),%edx),%ebx
+                                       /* get current kernel stack */
+       xchgl   %ebx, %esp              /* switch stacks */
+       pushl   %ebx                    /* save sysenter sp */
+       movl    %esp,%ebx               /* save kernel sp for argument
+                                          unwinding */
+       negl    %eax                    /* get system call number */
+       /* xxx sysenter_mach_call_range */
+       jl      mach_call_range         /* out of range if it was positive */
+       cmpl    EXT(mach_trap_count),%eax       /* check system call table 
bounds */
+       /* xxx sysenter_mach_call_range */
+       jg      mach_call_range         /* error if out of range */
+       shll    $4,%eax                 /* manual indexing */
+       movl    EXT(mach_trap_table)(%eax),%ecx
+                                       /* get number of arguments */
+       cmp     $4, %ecx
+       ja      se_args_5plus
+       je      se_args_4
+       cmp     $2, %ecx
+       ja      se_args_3
+       je      se_args_2
+       cmp     $1, %ecx
+       je      se_args_1
+       jmp     se_args_0
+       sub     $4, %ecx                /* skip the four first arguments */
+       movl    SE_STACK_POINTER(%ebp), %esi
+                                       /* get user stack pointer */
+       lea     (4                      /* skip user return address */\
+                +4                     /* point past last argument */\
+                +16                    /* skip register arguments */\
+                +SE_USER_SKIP)(%esi,%ecx,4),%esi
+                                       /* and skip past the userspace
+                                          local storage */
+       movl    $USER_DS,%edx           /* use user data segment for accesses */
+       mov     %dx,%fs
+0:     subl    $4,%esi
+       RECOVER(sysenter_mach_call_addr_push)
+       pushl   %fs:(%esi)              /* push argument on stack */
+       loop    0b                      /* loop for all arguments */
+       push    SE_ESI(%ebp)            /* push fourth argument */
+       push    SE_EDX(%ebp)            /* push third argument */
+       push    SE_ECX(%ebp)            /* push second argument */
+       push    SE_EBX(%ebp)            /* push first argument */
+       sti                             /* xxx: sti/cli where ? */
+       call    *EXT(mach_trap_table)+4(%eax)
+                                       /* call procedure */
+       cli                             /* xxx: sti/cli where ? */
+       movl    %ebx, %esp              /* clean parameters from stack */
+       /* xxx: process ast */
+       popl    %esp                    /* restore sysenter sp */
+       popl    %gs                     /* restore segment registers */
+       popl    %fs
+       popl    %es
+       popl    %ds
+       movl    SE_RETURN_ADDRESS(%ebp), %edx
+       movl    SE_STACK_POINTER(%ebp), %ecx
+       sti                             /* xxx: sti/cli where ? */
+       sysexit
+       movl    %ebx,%esp               /* clean parameters from stack */
+       /* xxx signal page-fault */
+       jmp sysenter_mach_call_addr_push
        .long   0
diff --git a/i386/i386/pcb.c b/i386/i386/pcb.c
index e8040c8..2da3804 100644
--- a/i386/i386/pcb.c
+++ b/i386/i386/pcb.c
@@ -391,12 +391,12 @@ void pcb_init(thread_t thread)
         *      Guarantee that the bootstrapped thread will be in user
         *      mode.
-       pcb->iss.cs = USER_CS;
-       pcb->iss.ss = USER_DS;
-       pcb->iss.ds = USER_DS;
-       pcb->iss.es = USER_DS;
-       pcb->iss.fs = USER_DS;
-       pcb->iss.gs = USER_DS;
+       pcb->iss.cs = USER_EXIT_CS;
+       pcb->iss.ss = USER_EXIT_DS;
+       pcb->iss.ds = USER_EXIT_DS;
+       pcb->iss.es = USER_EXIT_DS;
+       pcb->iss.fs = USER_EXIT_DS;
+       pcb->iss.gs = USER_EXIT_DS;
        pcb->iss.efl = EFL_USER_SET;
        thread->pcb = pcb;
@@ -524,12 +524,12 @@ kern_return_t thread_setstatus(
                     * 386 mode.  Set segment registers for flat
                     * 32-bit address space.
-                   saved_state->cs = USER_CS;
-                   saved_state->ss = USER_DS;
-                   saved_state->ds = USER_DS;
-                   saved_state->es = USER_DS;
-                   saved_state->fs = USER_DS;
-                   saved_state->gs = USER_DS;
+                   saved_state->cs = USER_EXIT_CS;
+                   saved_state->ss = USER_EXIT_DS;
+                   saved_state->ds = USER_EXIT_DS;
+                   saved_state->es = USER_EXIT_DS;
+                   saved_state->fs = USER_EXIT_DS;
+                   saved_state->gs = USER_EXIT_DS;
                else {
diff --git a/i386/i386/syscall.c b/i386/i386/syscall.c
new file mode 100644
index 0000000..e9b17d0
--- /dev/null
+++ b/i386/i386/syscall.c
@@ -0,0 +1,103 @@
+#include <mach/vm_param.h>
+#include <mach/vm_prot.h>
+#include <vm/pmap.h>
+#include <vm/vm_kern.h>
+#include <string.h>
+#include <kern/debug.h>
+#include <machine/tss.h>
+#include <i386/i386/ktss.h>
+#include <i386/i386/gdt.h>
+#include <i386/i386/locore.h>
+#include "syscall.h"
+#include <kern/printf.h> // xxx
+static vm_offset_t msyscall = 0;
+void user_trapgate_stub_start();
+void user_trapgate_stub_end();
+void user_sysenter_stub_start();
+void user_sysenter_stub_end();
+       kern_return_t kr;
+       vm_offset_t user_stub_start;
+       vm_offset_t user_stub_end;
+       kr = kmem_alloc_wired(kernel_map, &msyscall, PAGE_SIZE);
+       if (kr != KERN_SUCCESS)
+               panic("syscall_init");
+       memset((void *) msyscall, 0, PAGE_SIZE);
+               printf ("syscall: using SYSENTER/SYSEXIT\n");
+               user_stub_start = (vm_offset_t) user_sysenter_stub_start;
+               user_stub_end = (vm_offset_t) user_sysenter_stub_end;
+       } else {
+               printf ("syscall: using trap gate\n");
+               user_stub_start = (vm_offset_t) user_trapgate_stub_start;
+               user_stub_end = (vm_offset_t) user_trapgate_stub_end;
+       }
+       memcpy((void *) msyscall, (void *) user_stub_start,
+              (size_t) (user_stub_end - user_stub_start));
+       syscall_init_cpu();
+static void
+wrmsr(unsigned int msr, unsigned long long val)
+        __asm__ __volatile__("wrmsr"
+                           : /* no Outputs */
+                           : "c" (msr), "A" (val));
+#define MSR_IA32_SYSENTER_CS           0x00000174
+#define MSR_IA32_SYSENTER_ESP          0x00000175
+#define MSR_IA32_SYSENTER_EIP          0x00000176
+extern void sysenter_entry(void);
+               return;
+       //struct task_tss *tss = curr_ktss (cpu_number ());
+       struct task_tss *tss = &ktss;
+       wrmsr(MSR_IA32_SYSENTER_ESP,
+             (unsigned long) tss->sysenter_stack + sizeof tss->sysenter_stack);
+       wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long) sysenter_entry);
+syscall_open(dev_t dev, int flag, io_req_t ior)
+       return 0;
+syscall_close(dev_t dev, int flag)
+       return;
+syscall_mmap(dev_t dev, vm_offset_t off, vm_prot_t prot)
+       if (prot & VM_PROT_WRITE)
+               return (-1);
+       return (i386_btop(pmap_extract(pmap_kernel(),
+                                      (vm_offset_t) msyscall)));
diff --git a/i386/i386/syscall.h b/i386/i386/syscall.h
new file mode 100644
index 0000000..de9670c
--- /dev/null
+++ b/i386/i386/syscall.h
@@ -0,0 +1,7 @@
+// XXX
+void syscall_init(void);
+void syscall_init_cpu(void);
+int syscall_open(dev_t dev, int flag, io_req_t ior);
+void syscall_close(dev_t dev, int flag);
+int syscall_mmap(dev_t dev, vm_offset_t off, vm_prot_t prot);
diff --git a/i386/i386/tss.h b/i386/i386/tss.h
index ff25f21..8c939c7 100644
--- a/i386/i386/tss.h
+++ b/i386/i386/tss.h
@@ -76,6 +76,7 @@ struct task_tss
   struct i386_tss tss;
   unsigned char iopb[IOPB_BYTES];
   unsigned char barrier;
+  unsigned long sysenter_stack[64]; /* xxx */
diff --git a/i386/i386at/conf.c b/i386/i386at/conf.c
index ab4f680..d7f9e6f 100644
--- a/i386/i386at/conf.c
+++ b/i386/i386at/conf.c
@@ -68,6 +68,9 @@
 #define hypcnname              "hyp"
 #endif /* MACH_HYP */
+#include <i386/syscall.h>
+#define syscall_name           "syscall"
  * List of devices - console must be at slot 0
@@ -143,6 +146,11 @@ struct dev_ops     dev_name_list[] =
          nodev },
 #endif /* MACH_HYP */
+       { syscall_name, syscall_open,   syscall_close,  nulldev_read,
+         nulldev_write,        nulldev_getstat,        nulldev_setstat,
+         syscall_mmap,
+         nodev,        nulldev,        nulldev_portdeath,      0,
+         nodev },
 int    dev_name_count = sizeof(dev_name_list)/sizeof(dev_name_list[0]);
diff --git a/i386/i386at/model_dep.c b/i386/i386at/model_dep.c
index bc34c9b..210e54d 100644
--- a/i386/i386at/model_dep.c
+++ b/i386/i386at/model_dep.c
@@ -63,6 +63,7 @@
 #include <i386/proc_reg.h>
 #include <i386/locore.h>
 #include <i386/model_dep.h>
+#include <i386/syscall.h>
 #include <i386at/autoconf.h>
 #include <i386at/idt.h>
 #include <i386at/int_init.h>
@@ -197,6 +198,7 @@ void machine_init(void)
+       syscall_init();
 /* Conserve power on processor CPU.  */

Reply via email to