* For USER32, don't store fs/gs base at all * For !USER32, store fs/gs base outside of the PCB stack * For !USER32, don't store or touch es, ds, fs, gs (but keep ss and cs) * For !USER32, disable all of the v86 code ---
So I went ahead and just made x86_64 !USER32 not store or access those segment registers, along with moving fs/gs base out of iss and disabling v86 (not that I know what v86 is, but it sounds like something we don't need to support considering we only allow running x86_64 code). I have only tested my configuration (x86_64 !USER32 !MACH_KDB) -- quite likely this doesn't build or work in others; but for me it seems to work very well and I haven't got a single crash, in kernel space or user space. Please do review! debootstrap is still not quite happy. I've uploaded the log here: [0] [0]: https://paste.gg/p/anonymous/c976008dc38342cd963b0778586ead19 i386/i386/debug_i386.c | 2 - i386/i386/i386asm.sym | 4 +- i386/i386/pcb.c | 30 ++++++++---- i386/i386/thread.h | 27 +++++++++-- x86_64/locore.S | 105 +++++++++++++++++------------------------ 5 files changed, 90 insertions(+), 78 deletions(-) diff --git a/i386/i386/debug_i386.c b/i386/i386/debug_i386.c index b5465796..41d032e3 100644 --- a/i386/i386/debug_i386.c +++ b/i386/i386/debug_i386.c @@ -40,8 +40,6 @@ void dump_ss(const struct i386_saved_state *st) st->r8, st->r9, st->r10, st->r11); printf("R12 %016lx R13 %016lx R14 %016lx R15 %016lx\n", st->r12, st->r13, st->r14, st->r15); - printf("FSBASE %016lx GSBASE %016lx\n", - st->fsbase, st->gsbase); printf("RIP %016lx EFLAGS %08lx\n", st->eip, st->efl); #else printf("EAX %08lx EBX %08lx ECX %08lx EDX %08lx\n", diff --git a/i386/i386/i386asm.sym b/i386/i386/i386asm.sym index fd0be557..8af0c5d6 100644 --- a/i386/i386/i386asm.sym +++ b/i386/i386/i386asm.sym @@ -84,8 +84,10 @@ size i386_kernel_state iks size i386_exception_link iel +#if !defined(__x86_64__) || defined(USER32) offset i386_saved_state r gs offset i386_saved_state r fs +#endif offset i386_saved_state r cs offset i386_saved_state r uesp offset i386_saved_state r eax @@ -108,8 +110,6 @@ offset i386_saved_state r r12 offset i386_saved_state r r13 offset i386_saved_state r r14 offset i386_saved_state r r15 -offset i386_saved_state r fsbase -offset i386_saved_state r gsbase #endif offset i386_interrupt_state i eip diff --git a/i386/i386/pcb.c b/i386/i386/pcb.c index fb535709..d8987ddf 100644 --- a/i386/i386/pcb.c +++ b/i386/i386/pcb.c @@ -145,9 +145,14 @@ void switch_ktss(pcb_t pcb) * won`t save the v86 segments, so we leave room. */ +#if !defined(__x86_64__) || defined(USER32) pcb_stack_top = (pcb->iss.efl & EFL_VM) ? (long) (&pcb->iss + 1) : (long) (&pcb->iss.v86_segs); +#else + pcb_stack_top = (vm_offset_t) (&pcb->iss + 1); +#endif + #ifdef __x86_64__ assert((pcb_stack_top & 0xF) == 0); #endif @@ -224,8 +229,8 @@ void switch_ktss(pcb_t pcb) #endif /* MACH_PV_DESCRIPTORS */ #if defined(__x86_64__) && !defined(USER32) - wrmsr(MSR_REG_FSBASE, pcb->iss.fsbase); - wrmsr(MSR_REG_GSBASE, pcb->iss.gsbase); + wrmsr(MSR_REG_FSBASE, pcb->isbs.fsbase); + wrmsr(MSR_REG_GSBASE, pcb->isbs.gsbase); #endif db_load_context(pcb); @@ -412,10 +417,12 @@ void pcb_init(task_t parent_task, thread_t thread) */ pcb->iss.cs = USER_CS; pcb->iss.ss = USER_DS; +#if !defined(__x86_64__) || defined(USER32) pcb->iss.ds = USER_DS; pcb->iss.es = USER_DS; pcb->iss.fs = USER_DS; pcb->iss.gs = USER_DS; +#endif pcb->iss.efl = EFL_USER_SET; thread->pcb = pcb; @@ -477,6 +484,7 @@ kern_return_t thread_setstatus( state = (struct i386_thread_state *) tstate; +#if !defined(__x86_64__) || defined(USER32) if (flavor == i386_REGS_SEGS_STATE) { /* * Code and stack selectors must not be null, @@ -494,6 +502,7 @@ kern_return_t thread_setstatus( || state->ss == 0 || (state->ss & SEL_PL) != SEL_PL_U) return KERN_INVALID_ARGUMENT; } +#endif saved_state = USER_REGS(thread); @@ -532,7 +541,6 @@ kern_return_t thread_setstatus( saved_state->eip = state->eip; saved_state->efl = (state->efl & ~EFL_USER_CLEAR) | EFL_USER_SET; -#endif /* __x86_64__ && !USER32 */ /* * Segment registers. Set differently in V8086 mode. @@ -590,6 +598,7 @@ kern_return_t thread_setstatus( saved_state->fs = state->fs; saved_state->gs = state->gs; } +#endif /* __x86_64__ && !USER32 */ break; } @@ -631,6 +640,7 @@ kern_return_t thread_setstatus( break; } +#if !defined(__x86_64__) || defined(USER32) case i386_V86_ASSIST_STATE: { struct i386_v86_assist_state *state; @@ -657,7 +667,7 @@ kern_return_t thread_setstatus( USER_REGS(thread)->efl & (EFL_TF | EFL_IF); break; } - +#endif case i386_DEBUG_STATE: { struct i386_debug_state *state; @@ -680,8 +690,8 @@ kern_return_t thread_setstatus( return KERN_INVALID_ARGUMENT; state = (struct i386_fsgs_base_state *) tstate; - thread->pcb->iss.fsbase = state->fs_base; - thread->pcb->iss.gsbase = state->gs_base; + thread->pcb->isbs.fsbase = state->fs_base; + thread->pcb->isbs.gsbase = state->gs_base; if (thread == current_thread()) { wrmsr(MSR_REG_FSBASE, state->fs_base); wrmsr(MSR_REG_GSBASE, state->gs_base); @@ -766,7 +776,6 @@ kern_return_t thread_getstatus( state->uesp = saved_state->uesp; state->efl = saved_state->efl; state->esp = 0; /* unused */ -#endif /* __x86_64__ && !USER32 */ state->cs = saved_state->cs; state->ss = saved_state->ss; @@ -798,6 +807,7 @@ kern_return_t thread_getstatus( state->fs = saved_state->fs & 0xffff; state->gs = saved_state->gs & 0xffff; } +#endif /* __x86_64__ && !USER32 */ *count = i386_THREAD_STATE_COUNT; break; } @@ -836,6 +846,7 @@ kern_return_t thread_getstatus( break; } +#if !defined(__x86_64__) || defined(USER32) case i386_V86_ASSIST_STATE: { struct i386_v86_assist_state *state; @@ -850,6 +861,7 @@ kern_return_t thread_getstatus( *count = i386_V86_ASSIST_STATE_COUNT; break; } +#endif case i386_DEBUG_STATE: { @@ -872,8 +884,8 @@ kern_return_t thread_getstatus( return KERN_INVALID_ARGUMENT; state = (struct i386_fsgs_base_state *) tstate; - state->fs_base = thread->pcb->iss.fsbase; - state->gs_base = thread->pcb->iss.gsbase; + state->fs_base = thread->pcb->isbs.fsbase; + state->gs_base = thread->pcb->isbs.gsbase; *count = i386_FSGS_BASE_STATE_COUNT; break; } diff --git a/i386/i386/thread.h b/i386/i386/thread.h index b5fc5ffb..eab762dc 100644 --- a/i386/i386/thread.h +++ b/i386/i386/thread.h @@ -51,14 +51,13 @@ */ struct i386_saved_state { -#ifdef __x86_64__ - unsigned long fsbase; - unsigned long gsbase; -#endif +#if !defined(__x86_64__) || defined(USER32) unsigned long gs; unsigned long fs; unsigned long es; unsigned long ds; +#endif + #ifdef __x86_64__ unsigned long r15; unsigned long r14; @@ -85,12 +84,15 @@ struct i386_saved_state { unsigned long efl; unsigned long uesp; unsigned long ss; + +#if !defined(__x86_64__) || defined(USER32) struct v86_segs { unsigned long v86_es; /* virtual 8086 segment registers */ unsigned long v86_ds; unsigned long v86_fs; unsigned long v86_gs; } v86_segs; +#endif }; /* @@ -144,6 +146,7 @@ struct i386_fpsave_state { }; }; +#if !defined(__x86_64__) || defined(USER32) /* * v86_assist_state: * @@ -157,6 +160,7 @@ struct v86_assist_state { unsigned short flags; /* 8086 flag bits */ }; #define V86_IF_PENDING 0x8000 /* unused bit */ +#endif /* * i386_interrupt_state: @@ -167,10 +171,13 @@ struct v86_assist_state { */ struct i386_interrupt_state { +#if !defined(__x86_64__) || defined(USER32) long gs; long fs; long es; long ds; +#endif + #ifdef __x86_64__ long r11; long r10; @@ -187,6 +194,13 @@ struct i386_interrupt_state { long efl; }; +#if defined(__x86_64__) && !defined(USER32) +struct i386_saved_fsgs_base_state { + unsigned long fsbase; + unsigned long gsbase; +}; +#endif + /* * i386_machine_state: * @@ -197,7 +211,9 @@ struct i386_interrupt_state { struct i386_machine_state { struct user_ldt * ldt; struct i386_fpsave_state *ifps; +#if !defined(__x86_64__) || defined(USER32) struct v86_assist_state v86s; +#endif struct real_descriptor user_gdt[USER_GDT_SLOTS]; struct i386_debug_state ids; }; @@ -209,6 +225,9 @@ typedef struct pcb { #endif struct i386_saved_state iss; struct i386_machine_state ims; +#if defined(__x86_64__) && !defined(USER32) + struct i386_saved_fsgs_base_state isbs; +#endif decl_simple_lock_data(, lock) unsigned short init_control; /* Initial FPU control to set */ #ifdef LINUX_DEV diff --git a/x86_64/locore.S b/x86_64/locore.S index 4d61d618..fba2ad03 100644 --- a/x86_64/locore.S +++ b/x86_64/locore.S @@ -42,45 +42,6 @@ #define pusha pushq %rax ; pushq %rcx ; pushq %rdx ; pushq %rbx ; subq $8,%rsp ; pushq %rbp ; pushq %rsi ; pushq %rdi ; pushq %r8 ; pushq %r9 ; pushq %r10 ; pushq %r11 ; pushq %r12 ; pushq %r13 ; pushq %r14 ; pushq %r15 #define popa popq %r15 ; popq %r14 ; popq %r13 ; popq %r12 ; popq %r11 ; popq %r10 ; popq %r9 ; popq %r8 ; popq %rdi ; popq %rsi ; popq %rbp ; addq $8,%rsp ; popq %rbx ; popq %rdx ; popq %rcx ; popq %rax -#ifdef USER32 -#define PUSH_FSGS \ - pushq %fs ;\ - pushq %gs ;\ - subq $16,%rsp -#else -#define PUSH_FSGS \ - subq $32,%rsp -#endif - -#ifdef USER32 -#define POP_FSGS \ - popq %gs ;\ - popq %fs ;\ - addq $16,%rsp -#else -#define POP_FSGS \ - addq $32,%rsp -#endif - -#ifdef USER32 -#define PUSH_FSGS_ISR \ - pushq %fs ;\ - pushq %gs -#else -#define PUSH_FSGS_ISR \ - subq $16,%rsp -#endif - -#ifdef USER32 -#define POP_FSGS_ISR \ - popq %gs ;\ - popq %fs -#else -#define POP_FSGS_ISR \ - addq $16,%rsp -#endif - - /* * Fault recovery. @@ -368,14 +329,17 @@ ENTRY(t_segnp) /* indicate fault type */ trap_check_kernel_exit: +#ifdef USER32 testq $(EFL_VM),32(%rsp) /* is trap from V86 mode? */ jnz EXT(alltraps) /* isn`t kernel trap if so */ +#endif /* Note: handling KERNEL_RING value by hand */ testq $2,24(%rsp) /* is trap from kernel mode? */ jnz EXT(alltraps) /* if so: */ /* check for the kernel exit sequence */ cmpq $_kret_iret,16(%rsp) /* on IRET? */ je fault_iret +#ifdef USER32 #if 0 cmpq $_kret_popl_ds,16(%rsp) /* popping DS? */ je fault_popl_ds @@ -386,6 +350,7 @@ trap_check_kernel_exit: je fault_popl_fs cmpq $_kret_popl_gs,16(%rsp) /* popping GS? */ je fault_popl_gs +#endif take_fault: /* if none of the above: */ jmp EXT(alltraps) /* treat as normal trap. */ @@ -414,6 +379,7 @@ fault_iret: popq %rax /* restore eax */ jmp EXT(alltraps) /* take fault */ +#ifdef USER32 /* * Fault restoring a segment register. The user's registers are still * saved on the stack. The offending segment register has not been @@ -446,13 +412,11 @@ push_fs: pushq %fs /* restore fs, */ push_gs: pushq %gs /* restore gs. */ -push_gsbase: - pushq $0 - pushq $0 push_segregs: movq %rax,R_TRAPNO(%rsp) /* set trap number */ movq %rdx,R_ERR(%rsp) /* set error code */ jmp trap_set_segs /* take trap */ +#endif /* * Debug trap. Check for single-stepping across system call into @@ -462,8 +426,10 @@ push_segregs: */ ENTRY(t_debug) INT_FIX +#ifdef USER32 testq $(EFL_VM),16(%rsp) /* is trap from V86 mode? */ jnz 0f /* isn`t kernel trap if so */ +#endif /* Note: handling KERNEL_RING value by hand */ testq $2,8(%rsp) /* is trap from kernel mode? */ jnz 0f /* if so: */ @@ -510,11 +476,13 @@ ENTRY(t_page_fault) ENTRY(alltraps) pusha /* save the general registers */ trap_push_segs: +#ifdef USER32 movq %ds,%rax /* and the segment registers */ pushq %rax movq %es,%rax /* and the segment registers */ pushq %rax - PUSH_FSGS + pushq %fs + pushq %gs /* Note that we have to load the segment registers even if this is a trap from the kernel, @@ -523,14 +491,15 @@ trap_push_segs: mov %ss,%ax /* switch to kernel data segment */ mov %ax,%ds /* (same as kernel stack segment) */ mov %ax,%es -#ifdef USER32 mov %ax,%fs mov %ax,%gs #endif trap_set_segs: cld /* clear direction flag */ +#ifdef USER32 testl $(EFL_VM),R_EFLAGS(%rsp) /* in V86 mode? */ jnz trap_from_user /* user mode trap if so */ +#endif /* Note: handling KERNEL_RING value by hand */ testb $2,R_CS(%rsp) /* user mode trap? */ jz trap_from_kernel /* kernel trap if not */ @@ -580,23 +549,18 @@ _return_to_user: */ _return_from_kernel: - addq $16,%rsp /* skip FS/GS base */ -#ifndef USER32 -_kret_popl_gs: -_kret_popl_fs: - addq $16,%rsp /* skip FS/GS selector */ -#else +#ifdef USER32 _kret_popl_gs: popq %gs /* restore segment registers */ _kret_popl_fs: popq %fs -#endif _kret_popl_es: popq %rax movq %rax,%es _kret_popl_ds: popq %rax movq %rax,%ds +#endif popa /* restore general registers */ addq $16,%rsp /* discard trap number and error code */ _kret_iret: @@ -742,16 +706,17 @@ ENTRY(all_intrs) cmpq %ss:EXT(int_stack_base),%rdx je int_from_intstack /* if not: */ +#ifdef USER32 movq %ds,%rdx /* save segment registers */ pushq %rdx movq %es,%rdx pushq %rdx - PUSH_FSGS_ISR + pushq %fs + pushq %gs mov %ss,%dx /* switch to kernel segments */ mov %dx,%ds mov %dx,%es -#ifdef USER32 mov %dx,%fs mov %dx,%gs #endif @@ -784,8 +749,10 @@ LEXT(return_to_iret) /* ( label for kdb_kintr and hardclock) */ popq %rsp /* switch back to old stack */ +#ifdef USER32 testl $(EFL_VM),I_EFL(%rsp) /* if in V86 */ jnz 0f /* or */ +#endif /* Note: handling KERNEL_RING value by hand */ testb $2,I_CS(%rsp) /* user mode, */ jz 1f /* check for ASTs */ @@ -793,11 +760,14 @@ LEXT(return_to_iret) /* ( label for kdb_kintr and hardclock) */ cmpq $0,CX(EXT(need_ast),%edx) jnz ast_from_interrupt /* take it if so */ 1: - POP_FSGS_ISR +#ifdef USER32 + popq %gs + popq %fs pop %rdx mov %rdx,%es pop %rdx mov %rdx,%ds +#endif pop %r11 pop %r10 pop %r9 @@ -847,11 +817,14 @@ stack_overflowed: * ss */ ast_from_interrupt: - POP_FSGS_ISR +#ifdef USER32 + popq %gs + popq %fs pop %rdx mov %rdx,%es pop %rdx mov %rdx,%ds +#endif popq %r11 popq %r10 popq %r9 @@ -864,16 +837,18 @@ ast_from_interrupt: pushq $0 /* zero code */ pushq $0 /* zero trap number */ pusha /* save general registers */ + +#ifdef USER32 mov %ds,%rdx /* save segment registers */ push %rdx mov %es,%rdx push %rdx - PUSH_FSGS_ISR + pushq %fs + pushq %gs mov %ss,%dx /* switch to kernel segments */ mov %dx,%ds mov %dx,%es -#ifdef USER32 mov %dx,%fs mov %dx,%gs #endif @@ -994,20 +969,26 @@ kdb_from_iret_i: /* on interrupt stack */ pushq $0 /* zero error code */ pushq $0 /* zero trap number */ pusha /* save general registers */ +#ifdef USER32 mov %ds,%rdx /* save segment registers */ push %rdx mov %es,%rdx push %rdx - PUSH_FSGS + pushq %fs + pushq %gs +#endif movq %rsp,%rdx /* pass regs, */ movq $0,%rsi /* code, */ movq $-1,%rdi /* type to kdb */ call EXT(kdb_trap) - POP_FSGS +#ifdef USER32 + popq %gs + popq %fs pop %rdx mov %rdx,%es pop %rdx mov %rdx,%ds +#endif popa /* restore general registers */ addq $16,%rsp @@ -1082,23 +1063,27 @@ ttd_from_iret_i: /* on interrupt stack */ pushq $0 /* zero error code */ pushq $0 /* zero trap number */ pusha /* save general registers */ +#ifdef USER32 mov %ds,%rdx /* save segment registers */ push %rdx mov %es,%rdx push %rdx push %fs push %gs +#endif ud2 // TEST it movq %rsp,%rdx /* pass regs, */ movq $0,%rsi /* code, */ movq $-1,%rdi /* type to kdb */ call _kttd_trap +#ifdef USER32 pop %gs /* restore segment registers */ pop %fs pop %rdx mov %rdx,%es pop %rdx mov %rdx,%ds +#endif popa /* restore general registers */ addq $16,%rsp @@ -1137,8 +1122,6 @@ syscall_entry_2: pushq %rdx pushq %fs pushq %gs - pushq $0 // gsbase - pushq $0 // fsbase mov %ss,%dx /* switch to kernel data segment */ mov %dx,%ds -- 2.40.1