The problem is that a pathname can contain absolute symlinks and now
they are resolved relative to the current root.
But if we want to open a file in another mount namespace and we have
a file descriptor to its root directory, we want that the pathname is
resolved in the target mount namespace and in this case we need these
new flags O_ATROOT or AT_FDROOT.

If O_ATROOT is set for openat() or AT_FDROOT is set for fstatat, linkat,
unlinkat, path_init is executed with the LOOKUP_DFD_ROOT flag.

v2: fix a value of O_ATROOT to not intersect with other constans
Signed-off-by: Andrey Vagin <ava...@openvz.org>
---
 fs/exec.c                        |  4 +++-
 fs/namei.c                       | 26 +++++++++++++++++---------
 fs/open.c                        |  6 +++++-
 fs/stat.c                        |  4 +++-
 fs/utimes.c                      |  4 +++-
 include/uapi/asm-generic/fcntl.h |  4 ++++
 include/uapi/linux/fcntl.h       |  1 +
 7 files changed, 36 insertions(+), 13 deletions(-)

diff --git a/fs/exec.c b/fs/exec.c
index 887c1c9..473b709 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -775,12 +775,14 @@ static struct file *do_open_execat(int fd, struct 
filename *name, int flags)
                .lookup_flags = LOOKUP_FOLLOW,
        };
 
-       if ((flags & ~(AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH)) != 0)
+       if ((flags & ~(AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH | AT_FDROOT)) != 0)
                return ERR_PTR(-EINVAL);
        if (flags & AT_SYMLINK_NOFOLLOW)
                open_exec_flags.lookup_flags &= ~LOOKUP_FOLLOW;
        if (flags & AT_EMPTY_PATH)
                open_exec_flags.lookup_flags |= LOOKUP_EMPTY;
+       if (flags & AT_FDROOT)
+               open_exec_flags.lookup_flags |= LOOKUP_DFD_ROOT;
 
        file = do_filp_open(fd, name, &open_exec_flags);
        if (IS_ERR(file))
diff --git a/fs/namei.c b/fs/namei.c
index 17548b1..068c2d2 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2550,7 +2550,8 @@ user_path_parent(int dfd, const char __user *path,
                 unsigned int flags)
 {
        /* only LOOKUP_REVAL is allowed in extra flags */
-       return filename_parentat(dfd, getname(path), flags & LOOKUP_REVAL,
+       return filename_parentat(dfd, getname(path),
+                                flags & (LOOKUP_REVAL | LOOKUP_DFD_ROOT),
                                 parent, last, type);
 }
 
@@ -3546,7 +3547,7 @@ static struct dentry *filename_create(int dfd, struct 
filename *name,
         * Note that only LOOKUP_REVAL and LOOKUP_DIRECTORY matter here. Any
         * other flags passed in are ignored!
         */
-       lookup_flags &= LOOKUP_REVAL;
+       lookup_flags &= LOOKUP_REVAL | LOOKUP_DFD_ROOT;
 
        name = filename_parentat(dfd, name, lookup_flags, path, &last, &type);
        if (IS_ERR(name))
@@ -3944,7 +3945,8 @@ EXPORT_SYMBOL(vfs_unlink);
  * writeout happening, and we don't want to prevent access to the directory
  * while waiting on the I/O.
  */
-static long do_unlinkat(int dfd, const char __user *pathname)
+static long do_unlinkat(int dfd, const char __user *pathname,
+                                       unsigned int lookup_flags)
 {
        int error;
        struct filename *name;
@@ -3954,7 +3956,6 @@ static long do_unlinkat(int dfd, const char __user 
*pathname)
        int type;
        struct inode *inode = NULL;
        struct inode *delegated_inode = NULL;
-       unsigned int lookup_flags = 0;
 retry:
        name = user_path_parent(dfd, pathname,
                                &path, &last, &type, lookup_flags);
@@ -4019,18 +4020,23 @@ slashes:
 
 SYSCALL_DEFINE3(unlinkat, int, dfd, const char __user *, pathname, int, flag)
 {
-       if ((flag & ~AT_REMOVEDIR) != 0)
+       unsigned int lookup_flags = 0;
+
+       if ((flag & ~(AT_REMOVEDIR | AT_FDROOT)) != 0)
                return -EINVAL;
 
        if (flag & AT_REMOVEDIR)
                return do_rmdir(dfd, pathname);
 
-       return do_unlinkat(dfd, pathname);
+       if (flag & AT_FDROOT)
+               lookup_flags |= LOOKUP_DFD_ROOT;
+
+       return do_unlinkat(dfd, pathname, lookup_flags);
 }
 
 SYSCALL_DEFINE1(unlink, const char __user *, pathname)
 {
-       return do_unlinkat(AT_FDCWD, pathname);
+       return do_unlinkat(AT_FDCWD, pathname, 0);
 }
 
 int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname)
@@ -4181,7 +4187,7 @@ SYSCALL_DEFINE5(linkat, int, olddfd, const char __user *, 
oldname,
        int how = 0;
        int error;
 
-       if ((flags & ~(AT_SYMLINK_FOLLOW | AT_EMPTY_PATH)) != 0)
+       if ((flags & ~(AT_SYMLINK_FOLLOW | AT_EMPTY_PATH | AT_FDROOT)) != 0)
                return -EINVAL;
        /*
         * To use null names we require CAP_DAC_READ_SEARCH
@@ -4196,13 +4202,15 @@ SYSCALL_DEFINE5(linkat, int, olddfd, const char __user 
*, oldname,
 
        if (flags & AT_SYMLINK_FOLLOW)
                how |= LOOKUP_FOLLOW;
+       if (flags & AT_FDROOT)
+               how |= LOOKUP_DFD_ROOT;
 retry:
        error = user_path_at(olddfd, oldname, how, &old_path);
        if (error)
                return error;
 
        new_dentry = user_path_create(newdfd, newname, &new_path,
-                                       (how & LOOKUP_REVAL));
+                               (how & (LOOKUP_REVAL | LOOKUP_DFD_ROOT)));
        error = PTR_ERR(new_dentry);
        if (IS_ERR(new_dentry))
                goto out;
diff --git a/fs/open.c b/fs/open.c
index 93ae3cd..e0bc8d0 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -613,12 +613,14 @@ SYSCALL_DEFINE5(fchownat, int, dfd, const char __user *, 
filename, uid_t, user,
        int error = -EINVAL;
        int lookup_flags;
 
-       if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH)) != 0)
+       if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH | AT_FDROOT)) != 0)
                goto out;
 
        lookup_flags = (flag & AT_SYMLINK_NOFOLLOW) ? 0 : LOOKUP_FOLLOW;
        if (flag & AT_EMPTY_PATH)
                lookup_flags |= LOOKUP_EMPTY;
+       if (flag & AT_FDROOT)
+               lookup_flags |= LOOKUP_DFD_ROOT;
 retry:
        error = user_path_at(dfd, filename, lookup_flags, &path);
        if (error)
@@ -941,6 +943,8 @@ static inline int build_open_flags(int flags, umode_t mode, 
struct open_flags *o
                lookup_flags |= LOOKUP_DIRECTORY;
        if (!(flags & O_NOFOLLOW))
                lookup_flags |= LOOKUP_FOLLOW;
+       if (flags & O_ATROOT)
+               lookup_flags |= LOOKUP_DFD_ROOT;
        op->lookup_flags = lookup_flags;
        return 0;
 }
diff --git a/fs/stat.c b/fs/stat.c
index bc045c7..d71e7f2 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -95,13 +95,15 @@ int vfs_fstatat(int dfd, const char __user *filename, 
struct kstat *stat,
        unsigned int lookup_flags = 0;
 
        if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_NO_AUTOMOUNT |
-                     AT_EMPTY_PATH)) != 0)
+                     AT_EMPTY_PATH | AT_FDROOT)) != 0)
                goto out;
 
        if (!(flag & AT_SYMLINK_NOFOLLOW))
                lookup_flags |= LOOKUP_FOLLOW;
        if (flag & AT_EMPTY_PATH)
                lookup_flags |= LOOKUP_EMPTY;
+       if (flag & AT_FDROOT)
+               lookup_flags |= LOOKUP_DFD_ROOT;
 retry:
        error = user_path_at(dfd, filename, lookup_flags, &path);
        if (error)
diff --git a/fs/utimes.c b/fs/utimes.c
index 85c40f4..78a9eb9 100644
--- a/fs/utimes.c
+++ b/fs/utimes.c
@@ -143,7 +143,7 @@ long do_utimes(int dfd, const char __user *filename, struct 
timespec *times,
                goto out;
        }
 
-       if (flags & ~AT_SYMLINK_NOFOLLOW)
+       if (flags & ~(AT_SYMLINK_NOFOLLOW | AT_FDROOT))
                goto out;
 
        if (filename == NULL && dfd != AT_FDCWD) {
@@ -165,6 +165,8 @@ long do_utimes(int dfd, const char __user *filename, struct 
timespec *times,
 
                if (!(flags & AT_SYMLINK_NOFOLLOW))
                        lookup_flags |= LOOKUP_FOLLOW;
+               if (flags & AT_FDROOT)
+                       lookup_flags |= LOOKUP_DFD_ROOT;
 retry:
                error = user_path_at(dfd, filename, lookup_flags, &path);
                if (error)
diff --git a/include/uapi/asm-generic/fcntl.h b/include/uapi/asm-generic/fcntl.h
index e063eff..0436b1d 100644
--- a/include/uapi/asm-generic/fcntl.h
+++ b/include/uapi/asm-generic/fcntl.h
@@ -88,6 +88,10 @@
 #define __O_TMPFILE    020000000
 #endif
 
+#ifndef O_ATROOT
+#define O_ATROOT       040000000       /* dfd is a root */
+#endif
+
 /* a horrid kludge trying to make sure that this will fail on old kernels */
 #define O_TMPFILE (__O_TMPFILE | O_DIRECTORY)
 #define O_TMPFILE_MASK (__O_TMPFILE | O_DIRECTORY | O_CREAT)      
diff --git a/include/uapi/linux/fcntl.h b/include/uapi/linux/fcntl.h
index beed138..4f3b631 100644
--- a/include/uapi/linux/fcntl.h
+++ b/include/uapi/linux/fcntl.h
@@ -62,6 +62,7 @@
 #define AT_SYMLINK_FOLLOW      0x400   /* Follow symbolic links.  */
 #define AT_NO_AUTOMOUNT                0x800   /* Suppress terminal automount 
traversal */
 #define AT_EMPTY_PATH          0x1000  /* Allow empty relative pathname */
+#define AT_FDROOT              0x2000  /* Resolve a path as if dirfd is root */
 
 
 #endif /* _UAPI_LINUX_FCNTL_H */
-- 
2.5.5

Reply via email to