Hi, On Mon, 2024-11-11 at 15:27 +0900, Hajime Tazaki wrote: > On !MMU mode, the address of vdso is accessible from userspace. This > commit implements the entry point by pointing a block of page address. > > This commit also add memory permission configuration of vdso page to be > executable. > > Signed-off-by: Hajime Tazaki <thehaj...@gmail.com> > Signed-off-by: Ricardo Koller <ricar...@google.com> > --- > arch/x86/um/vdso/um_vdso.c | 20 ++++++++++++++++++++ > arch/x86/um/vdso/vma.c | 14 ++++++++++++++ > 2 files changed, 34 insertions(+) > > diff --git a/arch/x86/um/vdso/um_vdso.c b/arch/x86/um/vdso/um_vdso.c > index cbae2584124f..eff3e6641a0e 100644 > --- a/arch/x86/um/vdso/um_vdso.c > +++ b/arch/x86/um/vdso/um_vdso.c > @@ -23,10 +23,17 @@ int __vdso_clock_gettime(clockid_t clock, struct > __kernel_old_timespec *ts) > { > long ret; > > +#ifdef CONFIG_MMU > asm("syscall" > : "=a" (ret) > : "0" (__NR_clock_gettime), "D" (clock), "S" (ts) > : "rcx", "r11", "memory"); > +#else > + asm("call *%1" > + : "=a" (ret) > + : "0" ((unsigned long)__NR_clock_gettime), "D" > (clock), "S" (ts) > + : "rcx", "r11", "memory"); > +#endif > > return ret; > } > @@ -37,10 +44,17 @@ int __vdso_gettimeofday(struct > __kernel_old_timeval *tv, struct timezone *tz) > { > long ret; > > +#ifdef CONFIG_MMU > asm("syscall" > : "=a" (ret) > : "0" (__NR_gettimeofday), "D" (tv), "S" (tz) > : "rcx", "r11", "memory"); > +#else > + asm("call *%1" > + : "=a" (ret) > + : "0" ((unsigned long)__NR_gettimeofday), "D" (tv), > "S" (tz) > + : "rcx", "r11", "memory"); > +#endif > > return ret; > } > @@ -51,9 +65,15 @@ __kernel_old_time_t > __vdso_time(__kernel_old_time_t *t) > { > long secs; > > +#ifdef CONFIG_MMU > asm volatile("syscall" > : "=a" (secs) > : "0" (__NR_time), "D" (t) : "cc", "r11", "cx", > "memory"); > +#else > + asm("call *%1" > + : "=a" (secs) > + : "0" ((unsigned long)__NR_time), "D" (t) : "cc", > "r11", "cx", "memory"); > +#endif
Maybe introduce a macro for "syscall" vs. "call *%1"? The parameters should be identical in both cases. The "call" could probably even jump to the end of the NOP ramp directly in this case. Though maybe I am missing something with the "(unsigned long)" cast? > return secs; > } > diff --git a/arch/x86/um/vdso/vma.c b/arch/x86/um/vdso/vma.c > index f238f7b33cdd..83c861e2a815 100644 > --- a/arch/x86/um/vdso/vma.c > +++ b/arch/x86/um/vdso/vma.c > @@ -9,6 +9,7 @@ > #include <asm/page.h> > #include <asm/elf.h> > #include <linux/init.h> > +#include <os.h> > > static unsigned int __read_mostly vdso_enabled = 1; > unsigned long um_vdso_addr; > @@ -24,7 +25,9 @@ static int __init init_vdso(void) > > BUG_ON(vdso_end - vdso_start > PAGE_SIZE); > > +#ifdef CONFIG_MMU > um_vdso_addr = task_size - PAGE_SIZE; > +#endif > > vdsop = kmalloc(sizeof(struct page *), GFP_KERNEL); > if (!vdsop) > @@ -40,6 +43,15 @@ static int __init init_vdso(void) > copy_page(page_address(um_vdso), vdso_start); > *vdsop = um_vdso; > > +#ifndef CONFIG_MMU > + /* this is fine with NOMMU as everything is accessible */ > + um_vdso_addr = (unsigned long)page_address(um_vdso); > + os_protect_memory((void *)um_vdso_addr, vdso_end - > vdso_start, 1, 1, 1); I think this should be "1, 0, 1", i.e. we shouldn't enable write access. > + pr_debug("vdso_start=%lx um_vdso_addr=%lx pg_um_vdso=%lx", > + (unsigned long)vdso_start, um_vdso_addr, > + (unsigned long)page_address(um_vdso)); > +#endif > + > return 0; > > oom: > @@ -50,6 +62,7 @@ static int __init init_vdso(void) > } > subsys_initcall(init_vdso); > > +#ifdef CONFIG_MMU > int arch_setup_additional_pages(struct linux_binprm *bprm, int > uses_interp) > { > struct vm_area_struct *vma; > @@ -74,3 +87,4 @@ int arch_setup_additional_pages(struct linux_binprm > *bprm, int uses_interp) > > return IS_ERR(vma) ? PTR_ERR(vma) : 0; > } > +#endif