This patch implements sys_vcpu, which allows a process to enter a new mode in which a signal or system call will cause a return to the original context.
diff --git a/arch/um/include/kern_util.h b/arch/um/include/kern_util.h index 3c34122..0a91cb1 100644 --- a/arch/um/include/kern_util.h +++ b/arch/um/include/kern_util.h @@ -20,7 +20,7 @@ extern int kmalloc_ok; extern unsigned long alloc_stack(int order, int atomic); extern void free_stack(unsigned long stack, int order); -extern int do_signal(void); +extern void do_signal(void); extern void copy_sc(struct uml_pt_regs *regs, void *from); extern void interrupt_end(void); extern void relay_signal(int sig, struct uml_pt_regs *regs); diff --git a/arch/um/include/sysdep-i386/ptrace.h b/arch/um/include/sysdep-i386/ptrace.h index 11c0896..510c80f 100644 --- a/arch/um/include/sysdep-i386/ptrace.h +++ b/arch/um/include/sysdep-i386/ptrace.h @@ -156,7 +156,7 @@ struct syscall_args { } while (0) #define UPT_SET_SYSCALL_RETURN(r, res) \ - REGS_SET_SYSCALL_RETURN((r)->regs, (res)) + REGS_SET_SYSCALL_RETURN((r)->gp, (res)) #define UPT_RESTART_SYSCALL(r) REGS_RESTART_SYSCALL((r)->gp) diff --git a/arch/um/include/sysdep-x86_64/ptrace.h b/arch/um/include/sysdep-x86_64/ptrace.h index 9ea44d1..d3d1dda 100644 --- a/arch/um/include/sysdep-x86_64/ptrace.h +++ b/arch/um/include/sysdep-x86_64/ptrace.h @@ -225,11 +225,11 @@ struct syscall_args { }) #define UPT_SET_SYSCALL_RETURN(r, res) \ - REGS_SET_SYSCALL_RETURN((r)->regs, (res)) + REGS_SET_SYSCALL_RETURN((r)->gp, (res)) #define UPT_RESTART_SYSCALL(r) REGS_RESTART_SYSCALL((r)->gp) -#define UPT_SEGV_IS_FIXABLE(r) REGS_SEGV_IS_FIXABLE(&r->skas) +#define UPT_SEGV_IS_FIXABLE(r) REGS_SEGV_IS_FIXABLE(&(r)->skas) #define UPT_FAULTINFO(r) (&(r)->faultinfo) diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c index e8cb9ff..0963fcd 100644 --- a/arch/um/kernel/process.c +++ b/arch/um/kernel/process.c @@ -115,7 +115,7 @@ void interrupt_end(void) { if (need_resched()) schedule(); - if (test_tsk_thread_flag(current, TIF_SIGPENDING)) + if (test_thread_flag(TIF_SIGPENDING)) do_signal(); } diff --git a/arch/um/kernel/signal.c b/arch/um/kernel/signal.c index b0fce72..b1fcfde 100644 --- a/arch/um/kernel/signal.c +++ b/arch/um/kernel/signal.c @@ -85,8 +85,11 @@ static int handle_signal(struct pt_regs *regs, unsigned long signr, return err; } -static int kern_do_signal(struct pt_regs *regs) +extern int unvcpu(struct pt_regs *regs, siginfo_t *siginfo); + +void do_signal(void) { + struct pt_regs *regs = ¤t->thread.regs; struct k_sigaction ka_copy; siginfo_t info; sigset_t *oldset; @@ -98,6 +101,11 @@ static int kern_do_signal(struct pt_regs *regs) oldset = ¤t->blocked; while ((sig = get_signal_to_deliver(&info, &ka_copy, regs, NULL)) > 0) { + if (test_thread_flag(TIF_VCPU)) { + PT_REGS_SET_SYSCALL_RETURN(regs, unvcpu(regs, &info)); + return; + } + handled_sig = 1; /* Whee! Actually deliver the signal. */ if (!handle_signal(regs, sig, &ka_copy, &info, oldset)) { @@ -150,12 +158,6 @@ static int kern_do_signal(struct pt_regs *regs) clear_thread_flag(TIF_RESTORE_SIGMASK); sigprocmask(SIG_SETMASK, ¤t->saved_sigmask, NULL); } - return handled_sig; -} - -int do_signal(void) -{ - return kern_do_signal(¤t->thread.regs); } /* diff --git a/arch/um/kernel/skas/syscall.c b/arch/um/kernel/skas/syscall.c index 4e3b820..c677b8e 100644 --- a/arch/um/kernel/skas/syscall.c +++ b/arch/um/kernel/skas/syscall.c @@ -12,12 +12,19 @@ extern int syscall_table_size; #define NR_syscalls (syscall_table_size / sizeof(void *)) +extern int unvcpu(struct pt_regs *regs, siginfo_t *siginfo); + void handle_syscall(struct uml_pt_regs *r) { struct pt_regs *regs = container_of(r, struct pt_regs, regs); long result; int syscall; + if (test_thread_flag(TIF_VCPU)) { + REGS_SET_SYSCALL_RETURN(r->gp, unvcpu(regs, NULL)); + return; + } + syscall_trace(r, 0); /* diff --git a/arch/um/kernel/syscall.c b/arch/um/kernel/syscall.c index a9c2f6f..63c782d 100644 --- a/arch/um/kernel/syscall.c +++ b/arch/um/kernel/syscall.c @@ -1,17 +1,17 @@ /* - * Copyright (C) 2000 - 2007 Jeff Dike ([EMAIL PROTECTED],linux.intel}.com) + * Copyright (C) 2000 - 2008 Jeff Dike ([EMAIL PROTECTED],linux.intel}.com) * Licensed under the GPL */ -#include "linux/file.h" -#include "linux/fs.h" -#include "linux/mm.h" -#include "linux/sched.h" -#include "linux/utsname.h" -#include "asm/current.h" -#include "asm/mman.h" -#include "asm/uaccess.h" -#include "asm/unistd.h" +#include <linux/file.h> +#include <linux/fs.h> +#include <linux/mm.h> +#include <linux/sched.h> +#include <linux/utsname.h> +#include <asm/current.h> +#include <asm/mman.h> +#include <asm/uaccess.h> +#include <asm/unistd.h> long sys_fork(void) { @@ -158,3 +158,11 @@ long sys_switch_mm(int fd, long __user *save, long __user *new, { return do_switch_mm(fd, save, new, ip, sp, ¤t->thread.regs); } + +extern long do_vcpu(int mm_fd, struct vcpu_user __user *new, + struct pt_regs *regs); + +long sys_vcpu(int mm_fd, struct vcpu_user __user *new) +{ + return do_vcpu(mm_fd, new, ¤t->thread.regs); +} diff --git a/arch/um/os-Linux/skas/process.c b/arch/um/os-Linux/skas/process.c index cbb7986..21e24ba 100644 --- a/arch/um/os-Linux/skas/process.c +++ b/arch/um/os-Linux/skas/process.c @@ -446,8 +446,14 @@ void userspace(struct uml_pt_regs *regs) "with signal %d\n", sig); fatal_sigsegv(); } - pid = userspace_pid[0]; + + /* + * userspace_pid can change in in_interrupt since + * PTRACE_SWITCH_MM can cause a process to change + * address spaces + */ interrupt_end(); + pid = userspace_pid[0]; /* Avoid -ERESTARTSYS handling in host */ if (PT_SYSCALL_NR_OFFSET != PT_SYSCALL_RET_OFFSET) diff --git a/arch/um/sys-x86_64/syscall_table.c b/arch/um/sys-x86_64/syscall_table.c index 8b5c216..9bb72fc 100644 --- a/arch/um/sys-x86_64/syscall_table.c +++ b/arch/um/sys-x86_64/syscall_table.c @@ -40,6 +40,7 @@ #define stub_sigaltstack sys_sigaltstack #define stub_rt_sigreturn sys_rt_sigreturn #define stub_switch_mm sys_switch_mm +#define stub_vcpu sys_vcpu #define __SYSCALL(nr, sym) extern asmlinkage void sym(void) ; #undef _ASM_X86_64_UNISTD_H_ diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 4b87c32..1e2adae 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -371,7 +371,7 @@ ENTRY(system_call) GET_THREAD_INFO(%ebp) # system call tracing in operation / emulation /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */ - testw $(_TIF_SYSCALL_EMU|_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT),TI_flags(%ebp) + testl $(_TIF_SYSCALL_EMU|_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT|_TIF_VCPU),TI_flags(%ebp) jnz syscall_trace_entry cmpl $(nr_syscalls), %eax jae syscall_badsys diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index bb573ef..f3f403a 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -244,7 +244,7 @@ ENTRY(system_call_after_swapgs) movq %rcx,RIP-ARGOFFSET(%rsp) CFI_REL_OFFSET rip,RIP-ARGOFFSET GET_THREAD_INFO(%rcx) - testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%rcx) + testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP|_TIF_VCPU),threadinfo_flags(%rcx) jnz tracesys cmpq $__NR_syscall_max,%rax ja badsys @@ -323,6 +323,12 @@ tracesys: FIXUP_TOP_OF_STACK %rdi movq %rsp,%rdi call syscall_trace_enter + testl %eax, %eax + jz 2f + LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */ + RESTORE_REST + jmp int_ret_from_sys_call +2: LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */ RESTORE_REST cmpq $__NR_syscall_max,%rax @@ -482,6 +488,23 @@ ENTRY(stub_rt_sigreturn) END(stub_rt_sigreturn) /* + * vcpu is special too + */ +ENTRY(stub_vcpu) + CFI_STARTPROC + addq $8, %rsp + CFI_ADJUST_CFA_OFFSET -8 + SAVE_REST + movq %rsp,%rdx + FIXUP_TOP_OF_STACK %r11 + call sys_vcpu + movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer + RESTORE_REST + jmp int_ret_from_sys_call + CFI_ENDPROC +END(stub_vcpu) + +/* * initial frame state for interrupts and exceptions */ .macro _frame ref diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index de84950..44334e2 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -1453,6 +1453,8 @@ void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code) force_sig_info(SIGTRAP, &info, tsk); } +extern int unvcpu(struct pt_regs *regs, siginfo_t *siginfo); + /* notification of system call entry/exit * - triggered by current->work.syscall_trace */ @@ -1489,6 +1491,14 @@ int do_syscall_trace(struct pt_regs *regs, int entryexit) goto out; } + if (test_thread_flag(TIF_VCPU)) { + if (entryexit) + return 0; + + regs->ax = unvcpu(regs, NULL); + return 1; + } + if (!(current->ptrace & PT_PTRACED)) goto out; @@ -1616,11 +1626,18 @@ static void syscall_trace(struct pt_regs *regs) } } -asmlinkage void syscall_trace_enter(struct pt_regs *regs) +extern int unvcpu(struct pt_regs *regs, siginfo_t *siginfo); + +asmlinkage int syscall_trace_enter(struct pt_regs *regs) { /* do the secure computing check first */ secure_computing(regs->orig_ax); + if (test_thread_flag(TIF_VCPU)) { + regs->ax = unvcpu(regs, NULL); + return 1; + } + if (test_thread_flag(TIF_SYSCALL_TRACE) && (current->ptrace & PT_PTRACED)) syscall_trace(regs); @@ -1638,6 +1655,8 @@ asmlinkage void syscall_trace_enter(struct pt_regs *regs) regs->dx, regs->r10); } } + + return 0; } asmlinkage void syscall_trace_leave(struct pt_regs *regs) diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c index 0157a6f..73b5d21 100644 --- a/arch/x86/kernel/signal_32.c +++ b/arch/x86/kernel/signal_32.c @@ -573,6 +573,8 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, return ret; } +extern int unvcpu(struct pt_regs *regs, siginfo_t *siginfo); + /* * Note that 'init' is a special process: it doesn't get signals it doesn't * want to handle. Thus you cannot kill init even with a SIGKILL even by @@ -603,6 +605,11 @@ static void do_signal(struct pt_regs *regs) signr = get_signal_to_deliver(&info, &ka, regs, NULL); if (signr > 0) { + if (test_thread_flag(TIF_VCPU)) { + regs->ax = unvcpu(regs, &info); + return; + } + /* Re-enable any watchpoints before delivering the * signal to user space. The processor register will * have been cleared if the watchpoint triggered diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c index 1c83e51..8978b40 100644 --- a/arch/x86/kernel/signal_64.c +++ b/arch/x86/kernel/signal_64.c @@ -407,6 +407,8 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, return ret; } +extern int unvcpu(struct pt_regs *regs, siginfo_t *siginfo); + /* * Note that 'init' is a special process: it doesn't get signals it doesn't * want to handle. Thus you cannot kill init even with a SIGKILL even by @@ -435,6 +437,11 @@ static void do_signal(struct pt_regs *regs) signr = get_signal_to_deliver(&info, &ka, regs, NULL); if (signr > 0) { + if (test_thread_flag(TIF_VCPU)) { + regs->ax = unvcpu(regs, &info); + return; + } + /* Re-enable any watchpoints before delivering the * signal to user space. The processor register will * have been cleared if the watchpoint triggered diff --git a/arch/x86/kernel/sys_i386_32.c b/arch/x86/kernel/sys_i386_32.c index 23f6aff..d5d54f6 100644 --- a/arch/x86/kernel/sys_i386_32.c +++ b/arch/x86/kernel/sys_i386_32.c @@ -273,3 +273,17 @@ asmlinkage long sys_switch_mm(struct pt_regs regs) (struct __user user_regs *) regs.dx, regs.si, regs.di, ®s); } + +extern long do_vcpu(int mm_fd, struct vcpu_user __user *new, + struct pt_regs *regs); + +asmlinkage long sys_vcpu(struct pt_regs regs) +{ + int err; + + err = do_vcpu(regs.bx, (struct vcpu_user __user *) regs.cx, ®s); + if (err) + return err; + + return regs.ax; +} diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c index b3c98f5..aab9121 100644 --- a/arch/x86/kernel/sys_x86_64.c +++ b/arch/x86/kernel/sys_x86_64.c @@ -262,3 +262,18 @@ asmlinkage long sys_switch_mm(int fd, struct __user user_regs *save, { return do_switch_mm(fd, save, new, ip, sp, regs); } + +extern long do_vcpu(int mm_fd, struct vcpu_user __user *new, + struct pt_regs *regs); + +asmlinkage long sys_vcpu(int mm_fd, struct vcpu_user __user *new, + struct pt_regs *regs) +{ + int err; + + err = do_vcpu(mm_fd, new, regs); + if (err) + return err; + + return regs->ax; +} diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S index 27f20f0..5b9803a 100644 --- a/arch/x86/kernel/syscall_table_32.S +++ b/arch/x86/kernel/syscall_table_32.S @@ -328,3 +328,4 @@ ENTRY(sys_call_table) .long sys_timerfd_gettime .long sys_new_mm .long sys_switch_mm + .long sys_vcpu diff --git a/include/asm-um/desc.h b/include/asm-um/desc.h index 4ec34a5..efbabaf 100644 --- a/include/asm-um/desc.h +++ b/include/asm-um/desc.h @@ -1,6 +1,11 @@ #ifndef __UM_DESC_H #define __UM_DESC_H +#ifdef CONFIG_64BIT +#define LM(info) (info)->lm == 0 +#else +#define LM(info) (1) +#endif /* Taken from asm-i386/desc.h, it's the only thing we need. The rest wouldn't * compile, and has never been used. */ #define LDT_empty(info) (\ @@ -11,6 +16,7 @@ (info)->seg_32bit == 0 && \ (info)->limit_in_pages == 0 && \ (info)->seg_not_present == 1 && \ + LM(info) && \ (info)->useable == 0 ) #endif diff --git a/include/asm-um/host_ldt-i386.h b/include/asm-um/host_ldt-i386.h index b27cb0a..e2ad59c 100644 --- a/include/asm-um/host_ldt-i386.h +++ b/include/asm-um/host_ldt-i386.h @@ -1,7 +1,8 @@ #ifndef __ASM_HOST_LDT_I386_H #define __ASM_HOST_LDT_I386_H -#include "asm/arch/ldt.h" +#include <asm/desc.h> +#include <asm/arch/ldt.h> /* * macros stolen from include/asm-i386/desc.h @@ -21,14 +22,4 @@ ((info)->useable << 20) | \ 0x7000) -#define LDT_empty(info) (\ - (info)->base_addr == 0 && \ - (info)->limit == 0 && \ - (info)->contents == 0 && \ - (info)->read_exec_only == 1 && \ - (info)->seg_32bit == 0 && \ - (info)->limit_in_pages == 0 && \ - (info)->seg_not_present == 1 && \ - (info)->useable == 0 ) - #endif diff --git a/include/asm-um/host_ldt-x86_64.h b/include/asm-um/host_ldt-x86_64.h index 74a63f7..585c162 100644 --- a/include/asm-um/host_ldt-x86_64.h +++ b/include/asm-um/host_ldt-x86_64.h @@ -1,7 +1,8 @@ #ifndef __ASM_HOST_LDT_X86_64_H #define __ASM_HOST_LDT_X86_64_H -#include "asm/arch/ldt.h" +#include <asm/desc.h> +#include <asm/arch/ldt.h> /* * macros stolen from include/asm-x86_64/desc.h @@ -24,15 +25,4 @@ /* ((info)->lm << 21) | */ \ 0x7000) -#define LDT_empty(info) (\ - (info)->base_addr == 0 && \ - (info)->limit == 0 && \ - (info)->contents == 0 && \ - (info)->read_exec_only == 1 && \ - (info)->seg_32bit == 0 && \ - (info)->limit_in_pages == 0 && \ - (info)->seg_not_present == 1 && \ - (info)->useable == 0 && \ - (info)->lm == 0) - #endif diff --git a/include/asm-um/thread_info.h b/include/asm-um/thread_info.h index 356b83e..6aa19f3 100644 --- a/include/asm-um/thread_info.h +++ b/include/asm-um/thread_info.h @@ -83,6 +83,7 @@ static inline struct thread_info *current_thread_info(void) #define TIF_MEMDIE 5 #define TIF_SYSCALL_AUDIT 6 #define TIF_RESTORE_SIGMASK 7 +#define TIF_VCPU 8 #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) @@ -91,5 +92,6 @@ static inline struct thread_info *current_thread_info(void) #define _TIF_MEMDIE (1 << TIF_MEMDIE) #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) #define _TIF_RESTORE_SIGMASK (1 << TIF_RESTORE_SIGMASK) +#define _TIF_VCPU (1 << TIF_VCPU) #endif diff --git a/include/asm-x86/thread_info_32.h b/include/asm-x86/thread_info_32.h index 5bd5082..920c94a 100644 --- a/include/asm-x86/thread_info_32.h +++ b/include/asm-x86/thread_info_32.h @@ -142,6 +142,7 @@ static inline struct thread_info *current_thread_info(void) #define TIF_DEBUGCTLMSR 22 /* uses thread_struct.debugctlmsr */ #define TIF_DS_AREA_MSR 23 /* uses thread_struct.ds_area_msr */ #define TIF_BTS_TRACE_TS 24 /* record scheduling event timestamps */ +#define TIF_VCPU 25 #define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE) #define _TIF_SIGPENDING (1<<TIF_SIGPENDING) @@ -161,6 +162,7 @@ static inline struct thread_info *current_thread_info(void) #define _TIF_DEBUGCTLMSR (1<<TIF_DEBUGCTLMSR) #define _TIF_DS_AREA_MSR (1<<TIF_DS_AREA_MSR) #define _TIF_BTS_TRACE_TS (1<<TIF_BTS_TRACE_TS) +#define _TIF_VCPU (1<<TIF_VCPU) /* work to do on interrupt/exception return */ #define _TIF_WORK_MASK \ diff --git a/include/asm-x86/thread_info_64.h b/include/asm-x86/thread_info_64.h index 6c9b214..179d036 100644 --- a/include/asm-x86/thread_info_64.h +++ b/include/asm-x86/thread_info_64.h @@ -125,6 +125,7 @@ static inline struct thread_info *stack_thread_info(void) #define TIF_DEBUGCTLMSR 25 /* uses thread_struct.debugctlmsr */ #define TIF_DS_AREA_MSR 26 /* uses thread_struct.ds_area_msr */ #define TIF_BTS_TRACE_TS 27 /* record scheduling event timestamps */ +#define TIF_VCPU 28 #define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE) #define _TIF_SIGPENDING (1<<TIF_SIGPENDING) @@ -146,6 +147,7 @@ static inline struct thread_info *stack_thread_info(void) #define _TIF_DEBUGCTLMSR (1<<TIF_DEBUGCTLMSR) #define _TIF_DS_AREA_MSR (1<<TIF_DS_AREA_MSR) #define _TIF_BTS_TRACE_TS (1<<TIF_BTS_TRACE_TS) +#define _TIF_VCPU (1<<TIF_VCPU) /* work to do on interrupt/exception return */ #define _TIF_WORK_MASK \ diff --git a/include/asm-x86/unistd_32.h b/include/asm-x86/unistd_32.h index 5f8f291..cadbdb1 100644 --- a/include/asm-x86/unistd_32.h +++ b/include/asm-x86/unistd_32.h @@ -334,6 +334,7 @@ #define __NR_timerfd_gettime 326 #define __NR_new_mm 327 #define __NR_switch_mm 328 +#define __NR_vcpu 329 #ifdef __KERNEL__ diff --git a/include/asm-x86/unistd_64.h b/include/asm-x86/unistd_64.h index a674098..51bd17c 100644 --- a/include/asm-x86/unistd_64.h +++ b/include/asm-x86/unistd_64.h @@ -643,6 +643,8 @@ __SYSCALL(__NR_timerfd_gettime, sys_timerfd_gettime) __SYSCALL(__NR_new_mm, sys_new_mm) #define __NR_switch_mm 289 __SYSCALL(__NR_switch_mm, stub_switch_mm) +#define __NR_vcpu 290 +__SYSCALL(__NR_vcpu, stub_vcpu) #ifndef __NO_STUBS diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 1f74e1d..5ed65eb 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -193,6 +193,7 @@ extern struct group_info init_groups; [PIDTYPE_SID] = INIT_PID_LINK(PIDTYPE_SID), \ }, \ .dirties = INIT_PROP_LOCAL_SINGLE(dirties), \ + .vcpu = NULL, \ INIT_IDS \ INIT_TRACE_IRQFLAGS \ INIT_LOCKDEP \ diff --git a/include/linux/sched.h b/include/linux/sched.h index 7360fde..5759bba 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -65,6 +65,7 @@ struct sched_param { #include <asm/page.h> #include <asm/ptrace.h> #include <asm/cputime.h> +#include <asm/ldt.h> #include <linux/smp.h> #include <linux/sem.h> @@ -991,6 +992,24 @@ struct sched_rt_entity { #endif }; +struct vcpu_user { + enum { VCPU_SYSCALL, VCPU_SIGNAL } event; + struct user_regs regs; + siginfo_t siginfo; +#if defined(CONFIG_X86_32) && !defined(CONFIG_UML) + struct user_desc tls_array[GDT_ENTRY_TLS_ENTRIES]; +#endif +}; + +struct vcpu { + struct vcpu_user user; + struct mm_struct *mm; + struct vcpu_user __user *state; +#if defined(CONFIG_X86_32) && !defined(CONFIG_UML) + struct user_desc tls[GDT_ENTRY_TLS_ENTRIES]; +#endif +}; + struct task_struct { volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */ void *stack; @@ -1103,6 +1122,7 @@ struct task_struct { cputime_t it_prof_expires, it_virt_expires; unsigned long long it_sched_expires; struct list_head cpu_timers[3]; + struct vcpu *vcpu; /* process credentials */ uid_t uid,euid,suid,fsuid; diff --git a/kernel/Makefile b/kernel/Makefile index 6c584c5..0119a37 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -9,7 +9,7 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o profile.o \ rcupdate.o extable.o params.o posix-timers.o \ kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \ hrtimer.o rwsem.o nsproxy.o srcu.o \ - notifier.o ksysfs.o pm_qos_params.o + notifier.o ksysfs.o pm_qos_params.o vcpu.o obj-$(CONFIG_SYSCTL) += sysctl_check.o obj-$(CONFIG_STACKTRACE) += stacktrace.o diff --git a/kernel/exit.c b/kernel/exit.c index 073005b..bda5e7f 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -175,6 +175,11 @@ repeat: write_unlock_irq(&tasklist_lock); release_thread(p); + + if (p->vcpu && p->vcpu->mm) + mmput(p->vcpu->mm); + kfree(p->vcpu); + call_rcu(&p->rcu, delayed_put_task_struct); p = leader; diff --git a/kernel/fork.c b/kernel/fork.c index 4ca580a..3b8ed4c 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1086,6 +1086,8 @@ static struct task_struct *copy_process(unsigned long clone_flags, clear_tsk_thread_flag(p, TIF_SIGPENDING); init_sigpending(&p->pending); + p->vcpu = NULL; + p->utime = cputime_zero; p->stime = cputime_zero; p->gtime = cputime_zero; diff --git a/kernel/signal.c b/kernel/signal.c index 6025e33..67b5ec5 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1785,6 +1785,9 @@ relock: if (!signr) break; /* will return 0 */ + if (test_thread_flag(TIF_VCPU)) + break; + if ((current->ptrace & PT_PTRACED) && signr != SIGKILL) { ptrace_signal_deliver(regs, cookie); diff --git a/kernel/vcpu.c b/kernel/vcpu.c new file mode 100644 index 0000000..5ca259e --- /dev/null +++ b/kernel/vcpu.c @@ -0,0 +1,129 @@ +/* + * Copyright (C) 2008 Jeff Dike ([EMAIL PROTECTED],linux.intel}.com) + * Licensed under the GPL + */ + +#include <linux/sched.h> +#include <asm/uaccess.h> +#include <asm/desc.h> + +extern asmlinkage int sys_get_thread_area(struct user_desc __user *u_info); +extern asmlinkage int sys_set_thread_area(struct user_desc __user *u_info); +extern int do_switch(struct task_struct *task, int fd); + +long do_vcpu(int mm_fd, struct vcpu_user __user *new, struct pt_regs *regs) +{ + mm_segment_t fs; + struct vcpu *vcpu; + int err; + + if (current->vcpu == NULL) { + current->vcpu = kmalloc(sizeof(struct vcpu), GFP_KERNEL); + if (current->vcpu == NULL) + return -ENOMEM; + } + + vcpu = current->vcpu; + vcpu->mm = NULL; + vcpu->state = new; + + fs = get_fs(); + set_fs(KERNEL_DS); + err = pt_regs_to_ptrace(&vcpu->user.regs, regs); + set_fs(fs); + if (err) + return err; + + err = ptrace_to_pt_regs(regs, &new->regs); + if (err) + return err; + +#if defined(CONFIG_X86_32) && !defined(CONFIG_UML) + { int i; + + memcpy(vcpu->tls, current->thread.tls_array, sizeof(vcpu->tls)); + for (i = 0; i < ARRAY_SIZE(new->tls_array); i++){ + fs = get_fs(); + set_fs(KERNEL_DS); + vcpu->tls[i].entry_number = GDT_ENTRY_TLS_MIN + i; + err = sys_get_thread_area(&vcpu->tls[i]); + set_fs(fs); + if (err) + return err; + + err = sys_set_thread_area(&new->tls_array[i]); + if (err) + return err; + } + } +#endif + + if (mm_fd != -1) { + vcpu->mm = current->mm; + atomic_inc(&vcpu->mm->mm_users); + + err = do_switch(current, mm_fd); + if (err) + return err; + } + +#if defined(CONFIG_X86_32) && !defined(CONFIG_UML) + loadsegment(gs, current->thread.gs); +#endif + set_thread_flag(TIF_VCPU); + + return 0; +} + +extern void do_switch_mm_struct(struct task_struct *task, + struct mm_struct *new); + +int unvcpu(struct pt_regs *regs, siginfo_t *siginfo) +{ + mm_segment_t fs; + struct vcpu *vcpu; + int err, event; + + clear_thread_flag(TIF_VCPU); + + vcpu = current->vcpu; + if (vcpu->mm != NULL) { + do_switch_mm_struct(current, vcpu->mm); + mmput(vcpu->mm); + vcpu->mm = NULL; + } + + err = pt_regs_to_ptrace(&vcpu->state->regs, regs); + if (err) + return err; + + err = -EFAULT; + if ((siginfo != NULL) && + (copy_to_user(&vcpu->state->siginfo, siginfo, + sizeof(siginfo_t)) != 0)) + return err; + + event = (siginfo != NULL) ? VCPU_SIGNAL : VCPU_SYSCALL; + if (copy_to_user(&vcpu->state->event, &event, sizeof(event)) != 0) + return err; + +#if defined(CONFIG_X86_32) && !defined(CONFIG_UML) + { int i; + for (i = 0; i < ARRAY_SIZE(vcpu->state->tls_array); i++){ + fs = get_fs(); + set_fs(KERNEL_DS); + err = sys_set_thread_area(&vcpu->tls[i]); + set_fs(fs); + if (err) + return err; + } + } +#endif + + fs = get_fs(); + set_fs(KERNEL_DS); + err = ptrace_to_pt_regs(regs, &vcpu->user.regs); + set_fs(fs); + + return err; +} ------------------------------------------------------------------------- This SF.net email is sponsored by: Microsoft Defy all challenges. Microsoft(R) Visual Studio 2008. http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/ _______________________________________________ User-mode-linux-user mailing list User-mode-linux-user@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/user-mode-linux-user