As userspace on UML/!MMU also need to configure %fs register when it is running to correctly access thread structure, host syscalls implemented in os-Linux drivers may be puzzled when they are called. Thus it has to configure %fs register via arch_prctl(SET_FS) on every host syscalls.
Signed-off-by: Hajime Tazaki <thehaj...@gmail.com> Signed-off-by: Ricardo Koller <ricar...@google.com> --- arch/um/include/shared/os.h | 6 +++ arch/um/os-Linux/process.c | 6 +++ arch/um/os-Linux/start_up.c | 20 +++++++++ arch/x86/um/nommu/do_syscall_64.c | 37 ++++++++++++++++ arch/x86/um/nommu/syscalls_64.c | 71 +++++++++++++++++++++++++++++++ 5 files changed, 140 insertions(+) diff --git a/arch/um/include/shared/os.h b/arch/um/include/shared/os.h index 7f6703869dde..2e972ca55213 100644 --- a/arch/um/include/shared/os.h +++ b/arch/um/include/shared/os.h @@ -190,6 +190,7 @@ extern void check_host_supports_tls(int *supports_tls, int *tls_min); extern void get_host_cpu_features( void (*flags_helper_func)(char *line), void (*cache_helper_func)(char *line)); +extern int host_has_fsgsbase; /* mem.c */ extern int create_mem_file(unsigned long long len); @@ -213,6 +214,11 @@ extern int os_protect_memory(void *addr, unsigned long len, extern int os_unmap_memory(void *addr, int len); extern int os_drop_memory(void *addr, int length); extern int can_drop_memory(void); +extern int os_arch_prctl(int pid, int option, unsigned long *arg); +#ifndef CONFIG_MMU +extern long long host_fs; +#endif + void os_set_pdeathsig(void); diff --git a/arch/um/os-Linux/process.c b/arch/um/os-Linux/process.c index 3ac01881e905..7806b51e38b3 100644 --- a/arch/um/os-Linux/process.c +++ b/arch/um/os-Linux/process.c @@ -16,6 +16,7 @@ #include <sys/prctl.h> #include <sys/wait.h> #include <asm/unistd.h> +#include <sys/syscall.h> /* For SYS_xxx definitions */ #include <linux/threads.h> #include <init.h> #include <longjmp.h> @@ -155,6 +156,11 @@ int __init can_drop_memory(void) return ok; } +int os_arch_prctl(int pid, int option, unsigned long *arg2) +{ + return syscall(SYS_arch_prctl, option, arg2); +} + void init_new_thread_signals(void) { set_handler(SIGSEGV); diff --git a/arch/um/os-Linux/start_up.c b/arch/um/os-Linux/start_up.c index 93fc82c01aba..dbab091892b3 100644 --- a/arch/um/os-Linux/start_up.c +++ b/arch/um/os-Linux/start_up.c @@ -19,6 +19,8 @@ #include <sys/resource.h> #include <asm/ldt.h> #include <asm/unistd.h> +#include <sys/auxv.h> +#include <asm/hwcap2.h> #include <init.h> #include <os.h> #include <kern_util.h> @@ -28,6 +30,8 @@ #include <skas.h> #include "internal.h" +int host_has_fsgsbase; + static void ptrace_child(void) { int ret; @@ -278,6 +282,19 @@ void __init get_host_cpu_features( } } +static void __init check_fsgsbase(void) +{ + unsigned long auxv = getauxval(AT_HWCAP2); + + os_info("Checking FSGSBASE instructions..."); + if (auxv & HWCAP2_FSGSBASE) { + host_has_fsgsbase = 1; + os_info("OK\n"); + } else { + host_has_fsgsbase = 0; + os_info("disabled\n"); + } +} void __init os_early_checks(void) { @@ -293,6 +310,9 @@ void __init os_early_checks(void) */ check_tmpexec(); + /* probe fsgsbase instruction */ + check_fsgsbase(); + pid = start_ptraced_child(); if (init_pid_registers(pid)) fatal("Failed to initialize default registers"); diff --git a/arch/x86/um/nommu/do_syscall_64.c b/arch/x86/um/nommu/do_syscall_64.c index 5d0fa83e7fdc..796beb0089fc 100644 --- a/arch/x86/um/nommu/do_syscall_64.c +++ b/arch/x86/um/nommu/do_syscall_64.c @@ -2,10 +2,38 @@ #include <linux/kernel.h> #include <linux/ptrace.h> +#include <asm/fsgsbase.h> +#include <asm/prctl.h> #include <kern_util.h> #include <sysdep/syscalls.h> #include <os.h> +static int os_x86_arch_prctl(int pid, int option, unsigned long *arg2) +{ + if (!host_has_fsgsbase) + return os_arch_prctl(pid, option, arg2); + + switch (option) { + case ARCH_SET_FS: + wrfsbase(*arg2); + break; + case ARCH_SET_GS: + wrgsbase(*arg2); + break; + case ARCH_GET_FS: + *arg2 = rdfsbase(); + break; + case ARCH_GET_GS: + *arg2 = rdgsbase(); + break; + default: + pr_warn("%s: unsupported option: 0x%x", __func__, option); + break; + } + + return 0; +} + __visible void do_syscall_64(struct pt_regs *regs) { int syscall; @@ -17,6 +45,9 @@ __visible void do_syscall_64(struct pt_regs *regs) syscall, (unsigned long)current, (unsigned long)sys_call_table[syscall]); + /* set fs register to the original host one */ + os_x86_arch_prctl(0, ARCH_SET_FS, (void *)host_fs); + if (likely(syscall < NR_syscalls)) { PT_REGS_SET_SYSCALL_RETURN(regs, EXECUTE_SYSCALL(syscall, regs)); @@ -34,4 +65,10 @@ __visible void do_syscall_64(struct pt_regs *regs) /* force do_signal() --> is_syscall() */ set_thread_flag(TIF_SIGPENDING); interrupt_end(); + + /* restore back fs register to userspace configured one */ + os_x86_arch_prctl(0, ARCH_SET_FS, + (void *)(current->thread.regs.regs.gp[FS_BASE + / sizeof(unsigned long)])); + } diff --git a/arch/x86/um/nommu/syscalls_64.c b/arch/x86/um/nommu/syscalls_64.c index c78c442aed1d..5bb6d55b4bb5 100644 --- a/arch/x86/um/nommu/syscalls_64.c +++ b/arch/x86/um/nommu/syscalls_64.c @@ -13,8 +13,70 @@ #include <asm/prctl.h> /* XXX This should get the constants from libc */ #include <registers.h> #include <os.h> +#include <asm/thread_info.h> +#include <asm/mman.h> #include "syscalls.h" +/* + * The guest libc can change FS, which confuses the host libc. + * In fact, changing FS directly is not supported (check + * man arch_prctl). So, whenever we make a host syscall, + * we should be changing FS to the original FS (not the + * one set by the guest libc). This original FS is stored + * in host_fs. + */ +long long host_fs = -1; + +long arch_prctl(struct task_struct *task, int option, + unsigned long __user *arg2) +{ + long ret = -EINVAL; + unsigned long *ptr = arg2, tmp; + + switch (option) { + case ARCH_SET_FS: + if (host_fs == -1) + os_arch_prctl(0, ARCH_GET_FS, (void *)&host_fs); + ret = 0; + break; + case ARCH_SET_GS: + ret = 0; + break; + case ARCH_GET_FS: + case ARCH_GET_GS: + ptr = &tmp; + break; + } + + ret = os_arch_prctl(0, option, ptr); + if (ret) + return ret; + + switch (option) { + case ARCH_SET_FS: + current->thread.regs.regs.gp[FS_BASE / sizeof(unsigned long)] = + (unsigned long) arg2; + break; + case ARCH_SET_GS: + current->thread.regs.regs.gp[GS_BASE / sizeof(unsigned long)] = + (unsigned long) arg2; + break; + case ARCH_GET_FS: + ret = put_user(current->thread.regs.regs.gp[FS_BASE / sizeof(unsigned long)], arg2); + break; + case ARCH_GET_GS: + ret = put_user(current->thread.regs.regs.gp[GS_BASE / sizeof(unsigned long)], arg2); + break; + } + + return ret; +} + +SYSCALL_DEFINE2(arch_prctl, int, option, unsigned long, arg2) +{ + return arch_prctl(current, option, (unsigned long __user *) arg2); +} + void arch_switch_to(struct task_struct *to) { /* @@ -42,3 +104,12 @@ SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len, return ksys_mmap_pgoff(addr, len, prot, flags, fd, off >> PAGE_SHIFT); } + +static int __init um_nommu_setup_hostfs(void) +{ + /* initialize the host_fs value at boottime */ + os_arch_prctl(0, ARCH_GET_FS, (void *)&host_fs); + + return 0; +} +arch_initcall(um_nommu_setup_hostfs); -- 2.43.0