The patch adds support for SECCOMP and SECCOMP_FILTER (BPF).

Signed-off-by: David Abdurachmanov <david.abdurachma...@gmail.com>
---
 arch/riscv/Kconfig                   | 14 ++++++++++++++
 arch/riscv/include/asm/thread_info.h |  5 ++++-
 arch/riscv/kernel/entry.S            | 27 +++++++++++++++++++++++++--
 arch/riscv/kernel/ptrace.c           |  8 ++++++++
 4 files changed, 51 insertions(+), 3 deletions(-)

diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index a4f48f757204..49cd8e251547 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -29,6 +29,7 @@ config RISCV
        select GENERIC_SMP_IDLE_THREAD
        select GENERIC_ATOMIC64 if !64BIT || !RISCV_ISA_A
        select HAVE_ARCH_AUDITSYSCALL
+       select HAVE_ARCH_SECCOMP_FILTER
        select HAVE_MEMBLOCK_NODE_MAP
        select HAVE_DMA_CONTIGUOUS
        select HAVE_FUTEX_CMPXCHG if FUTEX
@@ -228,6 +229,19 @@ menu "Kernel features"
 
 source "kernel/Kconfig.hz"
 
+config SECCOMP
+       bool "Enable seccomp to safely compute untrusted bytecode"
+       help
+         This kernel feature is useful for number crunching applications
+         that may need to compute untrusted bytecode during their
+         execution. By using pipes or other transports made available to
+         the process as file descriptors supporting the read/write
+         syscalls, it's possible to isolate those applications in
+         their own address space using seccomp. Once seccomp is
+         enabled via prctl(PR_SET_SECCOMP), it cannot be disabled
+         and the task is only allowed to execute a few safe syscalls
+         defined by each seccomp mode.
+
 endmenu
 
 menu "Boot options"
diff --git a/arch/riscv/include/asm/thread_info.h 
b/arch/riscv/include/asm/thread_info.h
index 1c9cc8389928..1fd6e4130cab 100644
--- a/arch/riscv/include/asm/thread_info.h
+++ b/arch/riscv/include/asm/thread_info.h
@@ -81,6 +81,7 @@ struct thread_info {
 #define TIF_MEMDIE             5       /* is terminating due to OOM killer */
 #define TIF_SYSCALL_TRACEPOINT  6       /* syscall tracepoint instrumentation 
*/
 #define TIF_SYSCALL_AUDIT      7       /* syscall auditing */
+#define TIF_SECCOMP                    8       /* syscall secure computing */
 
 #define _TIF_SYSCALL_TRACE     (1 << TIF_SYSCALL_TRACE)
 #define _TIF_NOTIFY_RESUME     (1 << TIF_NOTIFY_RESUME)
@@ -88,11 +89,13 @@ struct thread_info {
 #define _TIF_NEED_RESCHED      (1 << TIF_NEED_RESCHED)
 #define _TIF_SYSCALL_TRACEPOINT        (1 << TIF_SYSCALL_TRACEPOINT)
 #define _TIF_SYSCALL_AUDIT     (1 << TIF_SYSCALL_AUDIT)
+#define _TIF_SECCOMP           (1 << TIF_SECCOMP)
 
 #define _TIF_WORK_MASK \
        (_TIF_NOTIFY_RESUME | _TIF_SIGPENDING | _TIF_NEED_RESCHED)
 
 #define _TIF_SYSCALL_WORK \
-       (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_TRACEPOINT | _TIF_SYSCALL_AUDIT)
+       (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_TRACEPOINT | _TIF_SYSCALL_AUDIT \
+        _TIF_SECCOMP )
 
 #endif /* _ASM_RISCV_THREAD_INFO_H */
diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S
index 355166f57205..e88ccbfa61ee 100644
--- a/arch/riscv/kernel/entry.S
+++ b/arch/riscv/kernel/entry.S
@@ -207,8 +207,25 @@ check_syscall_nr:
        /* Check to make sure we don't jump to a bogus syscall number. */
        li t0, __NR_syscalls
        la s0, sys_ni_syscall
-       /* Syscall number held in a7 */
-       bgeu a7, t0, 1f
+       /*
+        * The tracer can change syscall number to valid/invalid value.
+        * We use syscall_set_nr helper in syscall_trace_enter thus we
+        * cannot trust the current value in a7 and have to reload from
+        * the current task pt_regs.
+        */
+       REG_L a7, PT_A7(sp)
+       /*
+        * Syscall number held in a7.
+        * If syscall number is above allowed value, redirect to ni_syscall.
+        */
+       bge a7, t0, 1f
+       /*
+        * Check if syscall is rejected by tracer or seccomp, i.e., a7 == -1.
+        * If yes, we pretend it was executed.
+        */
+       li t1, -1
+       beq a7, t1, ret_from_syscall_rejected
+       /* Call syscall */
        la s0, sys_call_table
        slli t0, a7, RISCV_LGPTR
        add s0, s0, t0
@@ -219,6 +236,12 @@ check_syscall_nr:
 ret_from_syscall:
        /* Set user a0 to kernel a0 */
        REG_S a0, PT_A0(sp)
+       /*
+        * We didn't execute the actual syscall.
+        * Seccomp already set return value for the current task pt_regs.
+        * (If it was configured with SECCOMP_RET_ERRNO/TRACE)
+        */
+ret_from_syscall_rejected:
        /* Trace syscalls, but only if requested by the user. */
        REG_L t0, TASK_TI_FLAGS(tp)
        andi t0, t0, _TIF_SYSCALL_WORK
diff --git a/arch/riscv/kernel/ptrace.c b/arch/riscv/kernel/ptrace.c
index c1b51539c3e2..598e48b8ca2b 100644
--- a/arch/riscv/kernel/ptrace.c
+++ b/arch/riscv/kernel/ptrace.c
@@ -160,6 +160,14 @@ void do_syscall_trace_enter(struct pt_regs *regs)
                if (tracehook_report_syscall_entry(regs))
                        syscall_set_nr(current, regs, -1);
 
+       /*
+        * Do the secure computing after ptrace; failures should be fast.
+        * If this fails we might have return value in a0 from seccomp
+        * (via SECCOMP_RET_ERRNO/TRACE).
+        */
+       if (secure_computing(NULL) == -1)
+               syscall_set_nr(current, regs, -1);
+
 #ifdef CONFIG_HAVE_SYSCALL_TRACEPOINTS
        if (test_thread_flag(TIF_SYSCALL_TRACEPOINT))
                trace_sys_enter(regs, syscall_get_nr(current, regs));
-- 
2.19.2

Reply via email to