/* Background. */
There are many circumstances when userspace wants to resolve a path and
ensure that it doesn't go outside of a particular root directory during
resolution. Obvious examples include archive extraction tools, as well as
other security-conscious userspace programs. FreeBSD spun out O_BENEATH
from their Capsicum project[1,2], so it also seems reasonable to
implement similar functionality for Linux.

This is part of a refresh of Al's AT_NO_JUMPS patchset[3] (which was a
variation on David Drysdale's O_BENEATH patchset[4], which in turn was
based on the Capsicum project[5]).

/* Userspace API. */
LOOKUP_BENEATH will be exposed to userspace through openat2(2).

/* Semantics. */
Unlike most other LOOKUP flags (most notably LOOKUP_FOLLOW),
LOOKUP_BENEATH applies to all components of the path.

With LOOKUP_BENEATH, any path component which attempts to "escape" the
starting point of the filesystem lookup (the dirfd passed to openat)
will yield -EXDEV. Thus, all absolute paths and symlinks are disallowed.

Due to a security concern brought up by Jann[6], any ".." path
components are also blocked. This restriction will be lifted in a future
patch, but requires more work to ensure that permitting ".." is done
safely.

Magic-link jumps are also blocked, because they can beam the path lookup
across the starting point. It would be possible to detect and block
only the "bad" crossings with path_is_under() checks, but it's unclear
whether it makes sense to permit magic-links at all. However, userspace
is recommended to pass LOOKUP_NO_MAGICLINKS if they want to ensure that
magic-link crossing is entirely disabled.

/* Testing. */
LOOKUP_BENEATH is tested as part of the openat2(2) selftests.

[1]: https://reviews.freebsd.org/D2808
[2]: https://reviews.freebsd.org/D17547
[3]: https://lore.kernel.org/lkml/20170429220414.gt29...@zeniv.linux.org.uk/
[4]: 
https://lore.kernel.org/lkml/1415094884-18349-1-git-send-email-drysd...@google.com/
[5]: 
https://lore.kernel.org/lkml/1404124096-21445-1-git-send-email-drysd...@google.com/
[6]: 
https://lore.kernel.org/lkml/CAG48ez1jzNvxB+bfOBnERFGp=omm0vhwuld6eulmne3r6xa...@mail.gmail.com/

Cc: Christian Brauner <christian.brau...@ubuntu.com>
Suggested-by: David Drysdale <drysd...@google.com>
Suggested-by: Al Viro <v...@zeniv.linux.org.uk>
Suggested-by: Andy Lutomirski <l...@kernel.org>
Suggested-by: Linus Torvalds <torva...@linux-foundation.org>
Signed-off-by: Aleksa Sarai <cyp...@cyphar.com>
---
 fs/namei.c            | 80 +++++++++++++++++++++++++++++++++++++++----
 include/linux/namei.h |  4 +++
 2 files changed, 78 insertions(+), 6 deletions(-)

diff --git a/fs/namei.c b/fs/namei.c
index 321c8ad5d6b3..f0c15d2ace54 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -641,6 +641,14 @@ static bool legitimize_links(struct nameidata *nd)
 
 static bool legitimize_root(struct nameidata *nd)
 {
+       /*
+        * For scoped-lookups (where nd->root has been zeroed), we need to
+        * restart the whole lookup from scratch -- because set_root() is wrong
+        * for these lookups (nd->dfd is the root, not the filesystem root).
+        */
+       if (!nd->root.mnt && (nd->flags & LOOKUP_IS_SCOPED))
+               return false;
+       /* Nothing to do if nd->root is zero or is managed by the VFS user. */
        if (!nd->root.mnt || (nd->flags & LOOKUP_ROOT))
                return true;
        nd->flags |= LOOKUP_ROOT_GRABBED;
@@ -776,12 +784,37 @@ static int complete_walk(struct nameidata *nd)
        int status;
 
        if (nd->flags & LOOKUP_RCU) {
-               if (!(nd->flags & LOOKUP_ROOT))
+               /*
+                * We don't want to zero nd->root for scoped-lookups or
+                * externally-managed nd->root.
+                */
+               if (!(nd->flags & (LOOKUP_ROOT | LOOKUP_IS_SCOPED)))
                        nd->root.mnt = NULL;
                if (unlikely(unlazy_walk(nd)))
                        return -ECHILD;
        }
 
+       if (unlikely(nd->flags & LOOKUP_IS_SCOPED)) {
+               /*
+                * While the guarantee of LOOKUP_IS_SCOPED is (roughly) "don't
+                * ever step outside the root during lookup" and should already
+                * be guaranteed by the rest of namei, we want to avoid a namei
+                * BUG resulting in userspace being given a path that was not
+                * scoped within the root at some point during the lookup.
+                *
+                * So, do a final sanity-check to make sure that in the
+                * worst-case scenario (a complete bypass of LOOKUP_IS_SCOPED)
+                * we won't silently return an fd completely outside of the
+                * requested root to userspace.
+                *
+                * Userspace could move the path outside the root after this
+                * check, but as discussed elsewhere this is not a concern (the
+                * resolved file was inside the root at some point).
+                */
+               if (!path_is_under(&nd->path, &nd->root))
+                       return -EXDEV;
+       }
+
        if (likely(!(nd->flags & LOOKUP_JUMPED)))
                return 0;
 
@@ -802,6 +835,14 @@ static int set_root(struct nameidata *nd)
 {
        struct fs_struct *fs = current->fs;
 
+       /*
+        * Jumping to the real root in a scoped-lookup is a BUG in namei, but we
+        * still have to ensure it doesn't happen because it will cause a 
breakout
+        * from the dirfd.
+        */
+       if (WARN_ON(nd->flags & LOOKUP_IS_SCOPED))
+               return -ENOTRECOVERABLE;
+
        if (nd->flags & LOOKUP_RCU) {
                unsigned seq;
 
@@ -838,6 +879,8 @@ static inline void path_to_nameidata(const struct path 
*path,
 
 static int nd_jump_root(struct nameidata *nd)
 {
+       if (unlikely(nd->flags & LOOKUP_BENEATH))
+               return -EXDEV;
        if (unlikely(nd->flags & LOOKUP_NO_XDEV)) {
                /* Absolute path arguments to path_init() are allowed. */
                if (nd->path.mnt != NULL && nd->path.mnt != nd->root.mnt)
@@ -883,6 +926,9 @@ int nd_jump_link(struct path *path)
                if (nd->path.mnt != path->mnt)
                        goto err;
        }
+       /* Not currently safe for scoped-lookups. */
+       if (unlikely(nd->flags & LOOKUP_IS_SCOPED))
+               goto err;
 
        path_put(&nd->path);
        nd->path = *path;
@@ -1379,8 +1425,11 @@ static int follow_dotdot_rcu(struct nameidata *nd)
        struct inode *inode = nd->inode;
 
        while (1) {
-               if (path_equal(&nd->path, &nd->root))
+               if (path_equal(&nd->path, &nd->root)) {
+                       if (unlikely(nd->flags & LOOKUP_BENEATH))
+                               return -ECHILD;
                        break;
+               }
                if (nd->path.dentry != nd->path.mnt->mnt_root) {
                        struct dentry *old = nd->path.dentry;
                        struct dentry *parent = old->d_parent;
@@ -1510,9 +1559,12 @@ static int path_parent_directory(struct path *path)
 
 static int follow_dotdot(struct nameidata *nd)
 {
-       while(1) {
-               if (path_equal(&nd->path, &nd->root))
+       while (1) {
+               if (path_equal(&nd->path, &nd->root)) {
+                       if (unlikely(nd->flags & LOOKUP_BENEATH))
+                               return -EXDEV;
                        break;
+               }
                if (nd->path.dentry != nd->path.mnt->mnt_root) {
                        int ret = path_parent_directory(&nd->path);
                        if (ret)
@@ -1739,6 +1791,13 @@ static inline int handle_dots(struct nameidata *nd, int 
type)
        if (type == LAST_DOTDOT) {
                int error = 0;
 
+               /*
+                * Scoped-lookup flags resolving ".." is not currently safe --
+                * races can cause our parent to have moved outside of the root
+                * and us to skip over it.
+                */
+               if (unlikely(nd->flags & LOOKUP_IS_SCOPED))
+                       return -EXDEV;
                if (!nd->root.mnt) {
                        error = set_root(nd);
                        if (error)
@@ -2261,7 +2320,6 @@ static const char *path_init(struct nameidata *nd, 
unsigned flags)
                        get_fs_pwd(current->fs, &nd->path);
                        nd->inode = nd->path.dentry->d_inode;
                }
-               return s;
        } else {
                /* Caller must check execute permissions on the starting path 
component */
                struct fd f = fdget_raw(nd->dfd);
@@ -2286,8 +2344,18 @@ static const char *path_init(struct nameidata *nd, 
unsigned flags)
                        nd->inode = nd->path.dentry->d_inode;
                }
                fdput(f);
-               return s;
        }
+       /* For scoped-lookups we need to set the root to the dirfd as well. */
+       if (flags & LOOKUP_IS_SCOPED) {
+               nd->root = nd->path;
+               if (flags & LOOKUP_RCU) {
+                       nd->root_seq = nd->seq;
+               } else {
+                       path_get(&nd->root);
+                       nd->flags |= LOOKUP_ROOT_GRABBED;
+               }
+       }
+       return s;
 }
 
 static const char *trailing_symlink(struct nameidata *nd)
diff --git a/include/linux/namei.h b/include/linux/namei.h
index 25ee88c4acb1..93dad378f1e8 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -2,6 +2,7 @@
 #ifndef _LINUX_NAMEI_H
 #define _LINUX_NAMEI_H
 
+#include <linux/fs.h>
 #include <linux/kernel.h>
 #include <linux/path.h>
 #include <linux/fcntl.h>
@@ -43,6 +44,9 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT, LAST_BIND};
 #define LOOKUP_NO_SYMLINKS     0x010000 /* No symlink crossing. */
 #define LOOKUP_NO_MAGICLINKS   0x020000 /* No nd_jump_link() crossing. */
 #define LOOKUP_NO_XDEV         0x040000 /* No mountpoint crossing. */
+#define LOOKUP_BENEATH         0x080000 /* No escaping from starting point. */
+/* LOOKUP_* flags which do scope-related checks based on the dirfd. */
+#define LOOKUP_IS_SCOPED LOOKUP_BENEATH
 
 extern int path_pts(struct path *path);
 
-- 
2.24.0

Reply via email to