Jann Horn <ja...@google.com> writes:

> This allows the admin of a user namespace to mark the namespace as
> transparent. All other namespaces, by default, are opaque.
>
> While the current behavior of user namespaces is appropriate for use in
> containers, there are many programs that only use user namespaces because
> doing so enables them to do other things (e.g. unsharing the mount or
> network namespace) that require namespaced capabilities. For them, the
> inability to see the real UIDs and GIDs of things from inside the user
> namespace can be very annoying.
>
> In a transparent namespace, all UIDs and GIDs that are mapped into its
> first opaque ancestor are visible and are not remapped. This means that if
> a process e.g. stat()s the real root directory in a namespace, it will
> still see it as owned by UID 0.
>
> Traditionally, any UID or GID that was visible in a user namespace was also
> mapped into the namespace, giving the namespace admin full access to it.
> This patch introduces a distinction: In a transparent namespace, UIDs and
> GIDs can be visible without being mapped. Non-mapped, visible UIDs can be
> passed from the kernel to userspace, but userspace can't send them back to
> the kernel. In order to be able to fully use specific UIDs/GIDs and gain
> privileges over them, mappings need to be set up in the usual way -
> however, to avoid aliasing problems, only identity mappings are permitted.
>
> I have gone through all callers of from_kuid() and from_kgid(), and as far
> as I can tell, kuid_has_mapping() and kgid_has_mapping() were the only
> functions that used them for privilege checks. (The keys subsystem uses
> them in an insecure way, and that issue has been known for a while, but my
> patch doesn't make that any more vulnerable than it already is.

Perhaps it has been known for a while but no one has stopped and
mentioned it to me.  What questionable thing is the keys subsystem
doing?

>)

This is a bigish change in semantics and I am going to have to digest
this before I can give this an ok.

Quite frankly at the base it scares me.


If this is just about presentation and allowing some information from
the parent user namespace I would be much happier if it was not
from_kuid but that you modified, but if you instead you had a function
say from_kuid_transparent, that performed the transformation you need
and was only used in those places it is safe.

I think I could reason about that.

As your patchset sits I can not reason about the change in semantics,
because without a large grep of the source I don't know what you are
changing.

And you are dramatically changing the semantics the semantics of
from_kuid to the point I do believe we need to inspepect all of the call
sites.  As such I really don't think it makes sense to reuse the
existing name for your new semantics.

Eric

> Signed-off-by: Jann Horn <ja...@google.com>
> ---
>  fs/proc/base.c                 |  28 ++++++--
>  include/linux/uidgid.h         |  16 ++++-
>  include/linux/user_namespace.h |   4 ++
>  kernel/user.c                  |   1 +
>  kernel/user_namespace.c        | 152 
> +++++++++++++++++++++++++++++++++++++++--
>  5 files changed, 191 insertions(+), 10 deletions(-)
>
> diff --git a/fs/proc/base.c b/fs/proc/base.c
> index a11eb71..c521c51 100644
> --- a/fs/proc/base.c
> +++ b/fs/proc/base.c
> @@ -2744,7 +2744,8 @@ static const struct file_operations 
> proc_projid_map_operations = {
>       .release        = proc_id_map_release,
>  };
>  
> -static int proc_setgroups_open(struct inode *inode, struct file *file)
> +static int proc_nsadmin_open(struct inode *inode, struct file *file,
> +     int (*show)(struct seq_file *, void *))
>  {
>       struct user_namespace *ns = NULL;
>       struct task_struct *task;
> @@ -2767,7 +2768,7 @@ static int proc_setgroups_open(struct inode *inode, 
> struct file *file)
>                       goto err_put_ns;
>       }
>  
> -     ret = single_open(file, &proc_setgroups_show, ns);
> +     ret = single_open(file, show, ns);
>       if (ret)
>               goto err_put_ns;
>  
> @@ -2778,7 +2779,7 @@ err:
>       return ret;
>  }
>  
> -static int proc_setgroups_release(struct inode *inode, struct file *file)
> +static int proc_nsadmin_release(struct inode *inode, struct file *file)
>  {
>       struct seq_file *seq = file->private_data;
>       struct user_namespace *ns = seq->private;
> @@ -2787,12 +2788,30 @@ static int proc_setgroups_release(struct inode 
> *inode, struct file *file)
>       return ret;
>  }
>  
> +static int proc_setgroups_open(struct inode *inode, struct file *file)
> +{
> +     return proc_nsadmin_open(inode, file, &proc_setgroups_show);
> +}
> +
>  static const struct file_operations proc_setgroups_operations = {
>       .open           = proc_setgroups_open,
>       .write          = proc_setgroups_write,
>       .read           = seq_read,
>       .llseek         = seq_lseek,
> -     .release        = proc_setgroups_release,
> +     .release        = proc_nsadmin_release,
> +};
> +
> +static int proc_transparent_open(struct inode *inode, struct file *file)
> +{
> +     return proc_nsadmin_open(inode, file, &proc_transparent_show);
> +}
> +
> +static const struct file_operations proc_transparent_operations = {
> +     .open           = proc_transparent_open,
> +     .write          = proc_transparent_write,
> +     .read           = seq_read,
> +     .llseek         = seq_lseek,
> +     .release        = proc_nsadmin_release,
>  };
>  #endif /* CONFIG_USER_NS */
>  
> @@ -2901,6 +2920,7 @@ static const struct pid_entry tgid_base_stuff[] = {
>       REG("gid_map",    S_IRUGO|S_IWUSR, proc_gid_map_operations),
>       REG("projid_map", S_IRUGO|S_IWUSR, proc_projid_map_operations),
>       REG("setgroups",  S_IRUGO|S_IWUSR, proc_setgroups_operations),
> +     REG("transparent", S_IRUGO|S_IWUSR, proc_transparent_operations),
>  #endif
>  #ifdef CONFIG_CHECKPOINT_RESTORE
>       REG("timers",     S_IRUGO, proc_timers_operations),
> diff --git a/include/linux/uidgid.h b/include/linux/uidgid.h
> index 0383552..2908d40 100644
> --- a/include/linux/uidgid.h
> +++ b/include/linux/uidgid.h
> @@ -124,17 +124,19 @@ extern kgid_t make_kgid(struct user_namespace *from, 
> gid_t gid);
>  
>  extern uid_t from_kuid(struct user_namespace *to, kuid_t uid);
>  extern gid_t from_kgid(struct user_namespace *to, kgid_t gid);
> +extern uid_t from_kuid_opaque(struct user_namespace *to, kuid_t uid);
> +extern gid_t from_kgid_opaque(struct user_namespace *to, kgid_t gid);
>  extern uid_t from_kuid_munged(struct user_namespace *to, kuid_t uid);
>  extern gid_t from_kgid_munged(struct user_namespace *to, kgid_t gid);
>  
>  static inline bool kuid_has_mapping(struct user_namespace *ns, kuid_t uid)
>  {
> -     return from_kuid(ns, uid) != (uid_t) -1;
> +     return from_kuid_opaque(ns, uid) != (uid_t) -1;
>  }
>  
>  static inline bool kgid_has_mapping(struct user_namespace *ns, kgid_t gid)
>  {
> -     return from_kgid(ns, gid) != (gid_t) -1;
> +     return from_kgid_opaque(ns, gid) != (gid_t) -1;
>  }
>  
>  #else
> @@ -159,6 +161,16 @@ static inline gid_t from_kgid(struct user_namespace *to, 
> kgid_t kgid)
>       return __kgid_val(kgid);
>  }
>  
> +static inline uid_t from_kuid_opaque(struct user_namespace *to, kuid_t kuid)
> +{
> +     return __kuid_val(kuid);
> +}
> +
> +static inline gid_t from_kgid_opaque(struct user_namespace *to, kgid_t kgid)
> +{
> +     return __kgid_val(kgid);
> +}
> +
>  static inline uid_t from_kuid_munged(struct user_namespace *to, kuid_t kuid)
>  {
>       uid_t uid = from_kuid(to, kuid);
> diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
> index 8297e5b..18291ac 100644
> --- a/include/linux/user_namespace.h
> +++ b/include/linux/user_namespace.h
> @@ -28,6 +28,8 @@ struct user_namespace {
>       struct uid_gid_map      projid_map;
>       atomic_t                count;
>       struct user_namespace   *parent;
> +     /* self for normal ns; first opaque parent for transparent ns */
> +     struct user_namespace   *opaque;
>       int                     level;
>       kuid_t                  owner;
>       kgid_t                  group;
> @@ -71,6 +73,8 @@ extern ssize_t proc_gid_map_write(struct file *, const char 
> __user *, size_t, lo
>  extern ssize_t proc_projid_map_write(struct file *, const char __user *, 
> size_t, loff_t *);
>  extern ssize_t proc_setgroups_write(struct file *, const char __user *, 
> size_t, loff_t *);
>  extern int proc_setgroups_show(struct seq_file *m, void *v);
> +extern ssize_t proc_transparent_write(struct file *, const char __user *, 
> size_t, loff_t *);
> +extern int proc_transparent_show(struct seq_file *m, void *v);
>  extern bool userns_may_setgroups(const struct user_namespace *ns);
>  #else
>  
> diff --git a/kernel/user.c b/kernel/user.c
> index b069ccb..e1fd9e5 100644
> --- a/kernel/user.c
> +++ b/kernel/user.c
> @@ -48,6 +48,7 @@ struct user_namespace init_user_ns = {
>               },
>       },
>       .count = ATOMIC_INIT(3),
> +     .opaque = &init_user_ns,
>       .owner = GLOBAL_ROOT_UID,
>       .group = GLOBAL_ROOT_GID,
>       .ns.inum = PROC_USER_INIT_INO,
> diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
> index 9bafc21..da329a1 100644
> --- a/kernel/user_namespace.c
> +++ b/kernel/user_namespace.c
> @@ -98,6 +98,7 @@ int create_user_ns(struct cred *new)
>       atomic_set(&ns->count, 1);
>       /* Leave the new->user_ns reference with the new user namespace. */
>       ns->parent = parent_ns;
> +     ns->opaque = ns;
>       ns->level = parent_ns->level + 1;
>       ns->owner = owner;
>       ns->group = group;
> @@ -251,18 +252,46 @@ EXPORT_SYMBOL(make_kuid);
>   *   Map @kuid into the user-namespace specified by @targ and
>   *   return the resulting uid.
>   *
> + *   This function is *not* appropriate for security checks because
> + *   if @targ is transparent, the mappings of an ancestor namespace
> + *   are used. If @targ isn't &init_user_ns and you intend to do
> + *   anything with the result apart from returning it to a process
> + *   in @targ, you might want to use from_kuid_opaque() instead.
> + *
>   *   There is always a mapping into the initial user_namespace.
>   *
> - *   If @kuid has no mapping in @targ (uid_t)-1 is returned.
> + *   If @kuid is not visible in @targ (uid_t)-1 is returned.
>   */
>  uid_t from_kuid(struct user_namespace *targ, kuid_t kuid)
>  {
>       /* Map the uid from a global kernel uid */
> -     return map_id_up(&targ->uid_map, __kuid_val(kuid));
> +     struct user_namespace *opaque = READ_ONCE(targ->opaque);
> +
> +     return map_id_up(&opaque->uid_map, __kuid_val(kuid));
>  }
>  EXPORT_SYMBOL(from_kuid);
>  
>  /**
> + *   from_kuid_opaque - Create a uid from a kuid user-namespace pair.
> + *   @targ: The user namespace we want a uid in.
> + *   @kuid: The kernel internal uid to start with.
> + *
> + *   Map @kuid into the user-namespace specified by @targ and
> + *   return the resulting uid. This ignores transparent user
> + *   namespaces and is therefore appropriate for security checks.
> + *
> + *   There is always a mapping into the initial user_namespace.
> + *
> + *   If @kuid has no mapping in @targ (uid_t)-1 is returned.
> + */
> +uid_t from_kuid_opaque(struct user_namespace *targ, kuid_t kuid)
> +{
> +     /* Map the uid from a global kernel uid */
> +     return map_id_up(&targ->uid_map, __kuid_val(kuid));
> +}
> +EXPORT_SYMBOL(from_kuid_opaque);
> +
> +/**
>   *   from_kuid_munged - Create a uid from a kuid user-namespace pair.
>   *   @targ: The user namespace we want a uid in.
>   *   @kuid: The kernel internal uid to start with.
> @@ -319,18 +348,46 @@ EXPORT_SYMBOL(make_kgid);
>   *   Map @kgid into the user-namespace specified by @targ and
>   *   return the resulting gid.
>   *
> + *   This function is *not* appropriate for security checks because
> + *   if @targ is transparent, the mappings of an ancestor namespace
> + *   are used. If @targ isn't &init_user_ns and you intend to do
> + *   anything with the result apart from returning it to a process
> + *   in @targ, you might want to use from_kgid_opaque() instead.
> + *
>   *   There is always a mapping into the initial user_namespace.
>   *
> - *   If @kgid has no mapping in @targ (gid_t)-1 is returned.
> + *   If @kgid is not visible in @targ (gid_t)-1 is returned.
>   */
>  gid_t from_kgid(struct user_namespace *targ, kgid_t kgid)
>  {
>       /* Map the gid from a global kernel gid */
> -     return map_id_up(&targ->gid_map, __kgid_val(kgid));
> +     struct user_namespace *opaque = READ_ONCE(targ->opaque);
> +
> +     return map_id_up(&opaque->gid_map, __kgid_val(kgid));
>  }
>  EXPORT_SYMBOL(from_kgid);
>  
>  /**
> + *   from_kgid_opaque - Create a gid from a kgid user-namespace pair.
> + *   @targ: The user namespace we want a gid in.
> + *   @kgid: The kernel internal gid to start with.
> + *
> + *   Map @kgid into the user-namespace specified by @targ and
> + *   return the resulting gid. This ignores transparent user
> + *   namespaces and is therefore appropriate for security checks.
> + *
> + *   There is always a mapping into the initial user_namespace.
> + *
> + *   If @kgid has no mapping in @targ (gid_t)-1 is returned.
> + */
> +gid_t from_kgid_opaque(struct user_namespace *targ, kgid_t kgid)
> +{
> +     /* Map the gid from a global kernel gid */
> +     return map_id_up(&targ->gid_map, __kgid_val(kgid));
> +}
> +EXPORT_SYMBOL(from_kgid_opaque);
> +
> +/**
>   *   from_kgid_munged - Create a gid from a kgid user-namespace pair.
>   *   @targ: The user namespace we want a gid in.
>   *   @kgid: The kernel internal gid to start with.
> @@ -811,6 +868,18 @@ static bool new_idmap_permitted(const struct file *file,
>                               struct uid_gid_map *new_map)
>  {
>       const struct cred *cred = file->f_cred;
> +     unsigned int idx;
> +
> +     /* Don't allow non-identity mappings in transparent namespaces. */
> +     if (ns != ns->opaque) {
> +             for (idx = 0; idx < new_map->nr_extents; idx++) {
> +                     struct uid_gid_extent *ext = &new_map->extent[idx];
> +
> +                     if (ext->first != ext->lower_first)
> +                             return false;
> +             }
> +     }
> +
>       /* Don't allow mappings that would allow anything that wouldn't
>        * be allowed without the establishment of unprivileged mappings.
>        */
> @@ -922,6 +991,81 @@ out_unlock:
>       goto out;
>  }
>  
> +int proc_transparent_show(struct seq_file *seq, void *v)
> +{
> +     struct user_namespace *ns = seq->private;
> +     struct user_namespace *opaque = READ_ONCE(ns->opaque);
> +
> +     seq_printf(seq, "%d\n", (ns == opaque) ? 0 : 1);
> +     return 0;
> +}
> +
> +ssize_t proc_transparent_write(struct file *file, const char __user *buf,
> +                          size_t count, loff_t *ppos)
> +{
> +     struct seq_file *seq = file->private_data;
> +     struct user_namespace *ns = seq->private;
> +     char kbuf[8], *pos;
> +     bool transparent;
> +     ssize_t ret;
> +
> +     /* Only allow a very narrow range of strings to be written */
> +     ret = -EINVAL;
> +     if ((*ppos != 0) || (count >= sizeof(kbuf)))
> +             goto out;
> +
> +     /* What was written? */
> +     ret = -EFAULT;
> +     if (copy_from_user(kbuf, buf, count))
> +             goto out;
> +     kbuf[count] = '\0';
> +     pos = kbuf;
> +
> +     /* What is being requested? */
> +     ret = -EINVAL;
> +     if (pos[0] == '1') {
> +             pos += 1;
> +             transparent = true;
> +     } else if (pos[0] == '0') {
> +             pos += 1;
> +             transparent = false;
> +     } else
> +             goto out;
> +
> +     /* Verify there is not trailing junk on the line */
> +     pos = skip_spaces(pos);
> +     if (*pos != '\0')
> +             goto out;
> +
> +     ret = -EPERM;
> +     mutex_lock(&userns_state_mutex);
> +     /* Is the requested state different from the current one? */
> +     if (transparent != (ns->opaque != ns)) {
> +             /* You can't turn off transparent mode. */
> +             if (!transparent)
> +                     goto out_unlock;
> +             /* If there are existing mappings, they might be
> +              * non-identity mappings. Therefore, block transparent
> +              * mode. This also prevents making the init namespace
> +              * transparent (which wouldn't work).
> +              */
> +             if (ns->uid_map.nr_extents != 0 || ns->gid_map.nr_extents != 0)
> +                     goto out_unlock;
> +             /* Okay! Make the namespace transparent. */
> +             ns->opaque = ns->parent->opaque;
> +     }
> +     mutex_unlock(&userns_state_mutex);
> +
> +     /* Report a successful write */
> +     *ppos = count;
> +     ret = count;
> +out:
> +     return ret;
> +out_unlock:
> +     mutex_unlock(&userns_state_mutex);
> +     goto out;
> +}
> +
>  bool userns_may_setgroups(const struct user_namespace *ns)
>  {
>       bool allowed;

Reply via email to