One problem with seccomp was that ptrace could be used to change a
syscall after seccomp filtering had completed. This was a well documented
limitation, and it was recommended to block ptrace when defining a filter
to avoid this problem. This can be quite a limitation for containers or
other places where ptrace is desired even under seccomp filters.

Since seccomp filtering has been split into pre-trace and trace phases
(phase1 and phase2 respectively), it's possible to re-run phase1 seccomp
after ptrace. This makes that change, and updates the test suite for
both SECCOMP_RET_TRACE and PTRACE_SYSCALL manipulation.

Signed-off-by: Kees Cook <keesc...@chromium.org>
---
 include/linux/seccomp.h                       |   6 +
 include/linux/tracehook.h                     |   8 +-
 kernel/seccomp.c                              |  42 ++++++
 tools/testing/selftests/seccomp/seccomp_bpf.c | 176 ++++++++++++++++++++++++--
 4 files changed, 220 insertions(+), 12 deletions(-)

diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h
index 2296e6b2f690..e2b72394c200 100644
--- a/include/linux/seccomp.h
+++ b/include/linux/seccomp.h
@@ -85,6 +85,7 @@ static inline int seccomp_mode(struct seccomp *s)
 #ifdef CONFIG_SECCOMP_FILTER
 extern void put_seccomp_filter(struct task_struct *tsk);
 extern void get_seccomp_filter(struct task_struct *tsk);
+extern int seccomp_phase1_recheck(void);
 #else  /* CONFIG_SECCOMP_FILTER */
 static inline void put_seccomp_filter(struct task_struct *tsk)
 {
@@ -94,6 +95,11 @@ static inline void get_seccomp_filter(struct task_struct 
*tsk)
 {
        return;
 }
+
+static inline int seccomp_phase1_recheck(void)
+{
+       return 0;
+}
 #endif /* CONFIG_SECCOMP_FILTER */
 
 #if defined(CONFIG_SECCOMP_FILTER) && defined(CONFIG_CHECKPOINT_RESTORE)
diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h
index 26c152122a42..69b584d88508 100644
--- a/include/linux/tracehook.h
+++ b/include/linux/tracehook.h
@@ -48,6 +48,7 @@
 
 #include <linux/sched.h>
 #include <linux/ptrace.h>
+#include <linux/seccomp.h>
 #include <linux/security.h>
 #include <linux/task_work.h>
 #include <linux/memcontrol.h>
@@ -100,7 +101,12 @@ static inline int ptrace_report_syscall(struct pt_regs 
*regs)
 static inline __must_check int tracehook_report_syscall_entry(
        struct pt_regs *regs)
 {
-       return ptrace_report_syscall(regs);
+       int skip;
+
+       skip = ptrace_report_syscall(regs);
+       if (skip)
+               return skip;
+       return seccomp_phase1_recheck();
 }
 
 /**
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 7002796f14a4..6eaa3a1c5edb 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -665,6 +665,46 @@ u32 seccomp_phase1(struct seccomp_data *sd)
 }
 
 /**
+ * seccomp_phase1_recheck() - recheck phase1 in the context of ptrace
+ *
+ * This re-runs phase 1 seccomp checks in the case where ptrace may have
+ * just changed things out from under us.
+ *
+ * Returns 0 if the syscall should be processed or -1 to skip the syscall.
+ */
+int seccomp_phase1_recheck(void)
+{
+       u32 action;
+
+       /* If we're not under seccomp, continue normally. */
+       if (!test_thread_flag(TIF_SECCOMP))
+               return 0;
+
+       /* Pass NULL struct seccomp_data to force reload after ptrace. */
+       action = seccomp_phase1(NULL);
+       switch (action) {
+       case SECCOMP_PHASE1_OK:
+               /* Passes seccomp, continue normally. */
+               break;
+       case SECCOMP_PHASE1_SKIP:
+               /* Skip the syscall. */
+               return -1;
+       default:
+               if ((action & SECCOMP_RET_ACTION) != SECCOMP_RET_TRACE) {
+                       /* Impossible return value: kill the process. */
+                       do_exit(SIGSYS);
+               }
+               /*
+                * We've hit a trace request, but ptrace already put us
+                * into this state, so just continue.
+                */
+               break;
+       }
+
+       return 0;
+}
+
+/**
  * seccomp_phase2() - finish slow path seccomp work for the current syscall
  * @phase1_result: The return value from seccomp_phase1()
  *
@@ -701,6 +741,8 @@ int seccomp_phase2(u32 phase1_result)
                do_exit(SIGSYS);
        if (syscall_get_nr(current, regs) < 0)
                return -1;  /* Explicit request to skip. */
+       if (seccomp_phase1_recheck() < 0)
+               return -1;
 
        return 0;
 }
diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c 
b/tools/testing/selftests/seccomp/seccomp_bpf.c
index 7947e568e057..b8f6c743998f 100644
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -1021,8 +1021,8 @@ void tracer_stop(int sig)
 typedef void tracer_func_t(struct __test_metadata *_metadata,
                           pid_t tracee, int status, void *args);
 
-void tracer(struct __test_metadata *_metadata, int fd, pid_t tracee,
-           tracer_func_t tracer_func, void *args)
+void start_tracer(struct __test_metadata *_metadata, int fd, pid_t tracee,
+           tracer_func_t tracer_func, void *args, bool ptrace_syscall)
 {
        int ret = -1;
        struct sigaction action = {
@@ -1042,12 +1042,16 @@ void tracer(struct __test_metadata *_metadata, int fd, 
pid_t tracee,
        /* Wait for attach stop */
        wait(NULL);
 
-       ret = ptrace(PTRACE_SETOPTIONS, tracee, NULL, PTRACE_O_TRACESECCOMP);
+       ret = ptrace(PTRACE_SETOPTIONS, tracee, NULL, ptrace_syscall ?
+                                                     PTRACE_O_TRACESYSGOOD :
+                                                     PTRACE_O_TRACESECCOMP);
        ASSERT_EQ(0, ret) {
                TH_LOG("Failed to set PTRACE_O_TRACESECCOMP");
                kill(tracee, SIGKILL);
        }
-       ptrace(PTRACE_CONT, tracee, NULL, 0);
+       ret = ptrace(ptrace_syscall ? PTRACE_SYSCALL : PTRACE_CONT,
+                    tracee, NULL, 0);
+       ASSERT_EQ(0, ret);
 
        /* Unblock the tracee */
        ASSERT_EQ(1, write(fd, "A", 1));
@@ -1063,12 +1067,13 @@ void tracer(struct __test_metadata *_metadata, int fd, 
pid_t tracee,
                        /* Child is dead. Time to go. */
                        return;
 
-               /* Make sure this is a seccomp event. */
-               ASSERT_EQ(true, IS_SECCOMP_EVENT(status));
+               /* Check if this is a seccomp event. */
+               ASSERT_EQ(!ptrace_syscall, IS_SECCOMP_EVENT(status));
 
                tracer_func(_metadata, tracee, status, args);
 
-               ret = ptrace(PTRACE_CONT, tracee, NULL, NULL);
+               ret = ptrace(ptrace_syscall ? PTRACE_SYSCALL : PTRACE_CONT,
+                            tracee, NULL, 0);
                ASSERT_EQ(0, ret);
        }
        /* Directly report the status of our test harness results. */
@@ -1079,7 +1084,7 @@ void tracer(struct __test_metadata *_metadata, int fd, 
pid_t tracee,
 void cont_handler(int num)
 { }
 pid_t setup_trace_fixture(struct __test_metadata *_metadata,
-                         tracer_func_t func, void *args)
+                         tracer_func_t func, void *args, bool ptrace_syscall)
 {
        char sync;
        int pipefd[2];
@@ -1095,7 +1100,8 @@ pid_t setup_trace_fixture(struct __test_metadata 
*_metadata,
        signal(SIGALRM, cont_handler);
        if (tracer_pid == 0) {
                close(pipefd[0]);
-               tracer(_metadata, pipefd[1], tracee, func, args);
+               start_tracer(_metadata, pipefd[1], tracee, func, args,
+                            ptrace_syscall);
                syscall(__NR_exit, 0);
        }
        close(pipefd[1]);
@@ -1177,7 +1183,7 @@ FIXTURE_SETUP(TRACE_poke)
 
        /* Launch tracer. */
        self->tracer = setup_trace_fixture(_metadata, tracer_poke,
-                                          &self->tracer_args);
+                                          &self->tracer_args, false);
 }
 
 FIXTURE_TEARDOWN(TRACE_poke)
@@ -1395,6 +1401,29 @@ void tracer_syscall(struct __test_metadata *_metadata, 
pid_t tracee,
 
 }
 
+void tracer_ptrace(struct __test_metadata *_metadata, pid_t tracee,
+                  int status, void *args)
+{
+       int ret, nr;
+       unsigned long msg;
+       static bool entry;
+
+       /* Make sure we got an empty message. */
+       ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg);
+       EXPECT_EQ(0, ret);
+       EXPECT_EQ(0, msg);
+
+       /* The only way to tell PTRACE_SYSCALL entry/exit is by counting. */
+       entry = !entry;
+       if (!entry)
+               return;
+
+       nr = get_syscall(_metadata, tracee);
+
+       if (nr == __NR_getpid)
+               change_syscall(_metadata, tracee, __NR_getppid);
+}
+
 FIXTURE_DATA(TRACE_syscall) {
        struct sock_fprog prog;
        pid_t tracer, mytid, mypid, parent;
@@ -1436,7 +1465,8 @@ FIXTURE_SETUP(TRACE_syscall)
        ASSERT_NE(self->parent, self->mypid);
 
        /* Launch tracer. */
-       self->tracer = setup_trace_fixture(_metadata, tracer_syscall, NULL);
+       self->tracer = setup_trace_fixture(_metadata, tracer_syscall, NULL,
+                                          false);
 }
 
 FIXTURE_TEARDOWN(TRACE_syscall)
@@ -1496,6 +1526,130 @@ TEST_F(TRACE_syscall, syscall_dropped)
        EXPECT_NE(self->mytid, syscall(__NR_gettid));
 }
 
+TEST_F(TRACE_syscall, skip_after_RET_TRACE)
+{
+       struct sock_filter filter[] = {
+               BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+                       offsetof(struct seccomp_data, nr)),
+               BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
+               BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | EPERM),
+               BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+       };
+       struct sock_fprog prog = {
+               .len = (unsigned short)ARRAY_SIZE(filter),
+               .filter = filter,
+       };
+       long ret;
+
+       ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+       ASSERT_EQ(0, ret);
+
+       /* Install fixture filter. */
+       ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
+       ASSERT_EQ(0, ret);
+
+       /* Install "errno on getppid" filter. */
+       ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
+       ASSERT_EQ(0, ret);
+
+       /* Tracer will redirect getpid to getppid, and we should see EPERM. */
+       EXPECT_EQ(-1, syscall(__NR_getpid));
+       EXPECT_EQ(EPERM, errno);
+}
+
+TEST_F_SIGNAL(TRACE_syscall, kill_after_RET_TRACE, SIGSYS)
+{
+       struct sock_filter filter[] = {
+               BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+                       offsetof(struct seccomp_data, nr)),
+               BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
+               BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
+               BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+       };
+       struct sock_fprog prog = {
+               .len = (unsigned short)ARRAY_SIZE(filter),
+               .filter = filter,
+       };
+       long ret;
+
+       ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+       ASSERT_EQ(0, ret);
+
+       /* Install fixture filter. */
+       ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
+       ASSERT_EQ(0, ret);
+
+       /* Install "death on getppid" filter. */
+       ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
+       ASSERT_EQ(0, ret);
+
+       /* Tracer will redirect getpid to getppid, and we should die. */
+       EXPECT_NE(self->mypid, syscall(__NR_getpid));
+}
+
+TEST_F(TRACE_syscall, skip_after_ptrace)
+{
+       struct sock_filter filter[] = {
+               BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+                       offsetof(struct seccomp_data, nr)),
+               BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
+               BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | EPERM),
+               BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+       };
+       struct sock_fprog prog = {
+               .len = (unsigned short)ARRAY_SIZE(filter),
+               .filter = filter,
+       };
+       long ret;
+
+       /* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */
+       teardown_trace_fixture(_metadata, self->tracer);
+       self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL,
+                                          true);
+
+       ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+       ASSERT_EQ(0, ret);
+
+       /* Install "errno on getppid" filter. */
+       ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
+       ASSERT_EQ(0, ret);
+
+       /* Tracer will redirect getpid to getppid, and we should see EPERM. */
+       EXPECT_EQ(-1, syscall(__NR_getpid));
+       EXPECT_EQ(EPERM, errno);
+}
+
+TEST_F_SIGNAL(TRACE_syscall, kill_after_ptrace, SIGSYS)
+{
+       struct sock_filter filter[] = {
+               BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+                       offsetof(struct seccomp_data, nr)),
+               BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
+               BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
+               BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+       };
+       struct sock_fprog prog = {
+               .len = (unsigned short)ARRAY_SIZE(filter),
+               .filter = filter,
+       };
+       long ret;
+
+       /* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */
+       teardown_trace_fixture(_metadata, self->tracer);
+       self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL,
+                                          true);
+
+       ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+       ASSERT_EQ(0, ret);
+
+       /* Install "death on getppid" filter. */
+       ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
+       ASSERT_EQ(0, ret);
+
+       /* Tracer will redirect getpid to getppid, and we should die. */
+       EXPECT_NE(self->mypid, syscall(__NR_getpid));
+}
+
 #ifndef __NR_seccomp
 # if defined(__i386__)
 #  define __NR_seccomp 354
-- 
2.6.3


-- 
Kees Cook
Chrome OS & Brillo Security

Reply via email to