On Wed, 2024-09-25 at 22:32 +0200, Benjamin Berg wrote: > > + /* > + * If in seccomp mode, install the SECCOMP filter and trigger a syscall. > + * Otherwise set PTRACE_TRACEME and do a SIGSTOP. > + */ > + if (init_data.seccomp) { > + struct sock_filter filter[] = { > +#if __BITS_PER_LONG > 32 > + /* [0] Load upper 32bit of instruction pointer from > seccomp_data */ > + BPF_STMT(BPF_LD | BPF_W | BPF_ABS, > + (offsetof(struct seccomp_data, > instruction_pointer) + 4)), > + > + /* [1] Jump forward 3 instructions if the upper address > is not identical */ > + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, > (init_data.stub_start) >> 32, 0, 3), > +#endif > + /* [2] Load lower 32bit of instruction pointer from > seccomp_data */ > + BPF_STMT(BPF_LD | BPF_W | BPF_ABS, > + (offsetof(struct seccomp_data, > instruction_pointer))), > + > + /* [3] Mask out lower bits */ > + BPF_STMT(BPF_ALU | BPF_AND | BPF_K, 0xfffff000), > + > + /* [4] Jump to [6] if the lower bits are not on the > expected page */ > + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, > (init_data.stub_start) & 0xfffff000, 1, 0), > + > + /* [5] Trap call, allow */ > + BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_TRAP), > + > + /* [6,7] Check architecture */ > + BPF_STMT(BPF_LD | BPF_W | BPF_ABS, > + offsetof(struct seccomp_data, arch)), > + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, > + UM_SECCOMP_ARCH_NATIVE, 1, 0), > + > + /* [8] Kill (for architecture check) */ > + BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_KILL_PROCESS), > + > + /* [9] Load syscall number */ > + BPF_STMT(BPF_LD | BPF_W | BPF_ABS, > + offsetof(struct seccomp_data, nr)), > + > + /* [10-14] Check against permitted syscalls */ > + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_futex, > + 5, 0), > + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, STUB_MMAP_NR, > + 4, 0), > + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_munmap, > + 3, 0), > +#ifdef __i386__ > + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, > __NR_set_thread_area, > + 2, 0), > +#else > + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_arch_prctl, > + 2, 0), > +#endif > + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_rt_sigreturn, > + 1, 0), > + > + /* [15] Not one of the permitted syscalls */ > + BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_KILL_PROCESS), > + > + /* [16] Permitted call for the stub */ > + BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW), > + };
So not sure, but what else would you need per the cover letter's description that it's not complete for the filter? > * skas/process.c > */ > void wait_stub_done(int pid); > +void wait_stub_done_seccomp(struct mm_id *mm_idp, int running, int > wait_sigsys); > + nit: extra newline Also the function can be static? > +++ b/arch/um/os-Linux/skas/process.c > @@ -1,9 +1,11 @@ > // SPDX-License-Identifier: GPL-2.0 > /* > + * Copyright (C) 2021 Benjamin Berg <benja...@sipsolutions.net> > * Copyright (C) 2015 Thomas Meyer (tho...@m3y3r.de) > * Copyright (C) 2002- 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) > */ > > +#include <linux/kconfig.h> Hmm. If this works, why do we have UML_CONFIG_64BIT? For ASM stuff? But you also use "using_seccomp" so this shouldn't be needed, or you've set it up very dangerously - that header file should probably include this then, or whatever is needed? Otherwise could end up with using_seccomp being used but not defined properly per the ifdef, and just be statically 0 anyway? > #include <stdlib.h> > #include <stdbool.h> > #include <unistd.h> > @@ -25,8 +27,11 @@ > #include <registers.h> > #include <skas.h> > #include <sysdep/stub.h> > +#include <sysdep/mcontext.h> > +#include <linux/futex.h> > #include <linux/threads.h> > #include <timetravel.h> > +#include <asm-generic/rwonce.h> > #include "../internal.h" > > int is_skas_winch(int pid, int fd, void *data) > @@ -142,6 +147,74 @@ void wait_stub_done(int pid) > fatal_sigsegv(); > } > > +#ifdef CONFIG_UML_SECCOMP Also not sure you need the ifdef at all? > +void wait_stub_done_seccomp(struct mm_id *mm_idp, int running, int > wait_sigsys) This could be static and then it doesn't matter to have the ifdef? > - CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED | __WALL)); > - if (n < 0) { > + if (using_seccomp) { > + wait_stub_done_seccomp(mm_id, 1, 1); Also this builds because you declare the function but don't define it, but you already rely on the optimisation throwing it out anyway. > +#ifdef CONFIG_X86_32 > + extern int have_fpx_regs; > + > + /* > + * FIXME: This is wrong, but the non-FPX layout is closer to > + * what the mcontext presents to us. So, for all intents and > + * purposes we'll behave mostly correct if we do this. "correctly" but you don't care anyway since seccomp is only for 64-bit? johannes