From: Serge Hallyn <serge.hal...@ubuntu.com> 1. lxcapi_create: don't try to unshare and mount for dir backed containers
It's unnecessary, and breaks unprivileged lxc-create (since unpriv users cannot yet unshare(CLONE_NEWNS)). 2. api_create: chown rootfs chown rootfs to the host uid to which container root will be mapped 3. create: run template in a mapped user ns 4. use (setuid-root) newxidmap to set id_map if we are not root This is needed to be able to set userns mappings as an unprivileged user, for unprivileged lxc-start. Signed-off-by: Serge Hallyn <serge.hal...@ubuntu.com> --- src/lxc/conf.c | 102 +++++++++++++++++++++++++----- src/lxc/conf.h | 4 ++ src/lxc/lxccontainer.c | 164 ++++++++++++++++++++++++++++++++++++++++++++----- 3 files changed, 240 insertions(+), 30 deletions(-) diff --git a/src/lxc/conf.c b/src/lxc/conf.c index 208c08b..3f7f0ef 100644 --- a/src/lxc/conf.c +++ b/src/lxc/conf.c @@ -2802,31 +2802,49 @@ int lxc_map_ids(struct lxc_list *idmap, pid_t pid) int ret = 0; enum idtype type; char *buf = NULL, *pos; + int am_root = (getuid() == 0); for(type = ID_TYPE_UID; type <= ID_TYPE_GID; type++) { int left, fill; - - pos = buf; - lxc_list_for_each(iterator, idmap) { - /* The kernel only takes <= 4k for writes to /proc/<nr>/[ug]id_map */ - if (!buf) - buf = pos = malloc(4096); + int had_entry = 0; + if (!buf) { + buf = pos = malloc(4096); if (!buf) return -ENOMEM; + } + pos = buf; + if (!am_root) + pos += sprintf(buf, "new%cidmap %d ", + type == ID_TYPE_UID ? 'u' : 'g', + pid); + lxc_list_for_each(iterator, idmap) { + /* The kernel only takes <= 4k for writes to /proc/<nr>/[ug]id_map */ map = iterator->elem; - if (map->idtype == type) { - left = 4096 - (pos - buf); - fill = snprintf(pos, left, "%lu %lu %lu\n", - map->nsid, map->hostid, map->range); - if (fill <= 0 || fill >= left) - SYSERROR("snprintf failed, too many mappings"); - pos += fill; - } + if (map->idtype != type) + continue; + + had_entry = 1; + left = 4096 - (pos - buf); + fill = snprintf(pos, left, " %lu %lu %lu", map->nsid, + map->hostid, map->range); + if (fill <= 0 || fill >= left) + SYSERROR("snprintf failed, too many mappings"); + pos += fill; } - if (pos == buf) // no mappings were found + if (!had_entry) continue; - ret = write_id_mapping(type, pid, buf, pos-buf); + left = 4096 - (pos - buf); + fill = snprintf(pos, left, "\n"); + if (fill <= 0 || fill >= left) + SYSERROR("snprintf failed, too many mappings"); + pos += fill; + + if (am_root) + ret = write_id_mapping(type, pid, buf, pos-buf); + else + ret = system(buf); + if (ret) break; } @@ -2836,6 +2854,58 @@ int lxc_map_ids(struct lxc_list *idmap, pid_t pid) return ret; } +/* + * return the host uid to which the container root is mapped, or -1 on + * error + */ +int get_mapped_rootid(struct lxc_conf *conf) +{ + struct lxc_list *it; + struct id_map *map; + + lxc_list_for_each(it, &conf->id_map) { + map = it->elem; + if (map->idtype != ID_TYPE_UID) + continue; + if (map->nsid != 0) + continue; + return map->hostid; + } + return -1; +} + +bool hostid_is_mapped(int id, struct lxc_conf *conf) +{ + struct lxc_list *it; + struct id_map *map; + lxc_list_for_each(it, &conf->id_map) { + map = it->elem; + if (map->idtype != ID_TYPE_UID) + continue; + if (id >= map->hostid && id < map->hostid + map->range) + return true; + } + return false; +} + +int find_unmapped_nsuid(struct lxc_conf *conf) +{ + struct lxc_list *it; + struct id_map *map; + uid_t freeid = 0; +again: + lxc_list_for_each(it, &conf->id_map) { + map = it->elem; + if (map->idtype != ID_TYPE_UID) + continue; + if (freeid >= map->nsid && freeid < map->nsid + map->range) { + freeid = map->nsid + map->range; + goto again; + } + } + return freeid; +} + int lxc_find_gateway_addresses(struct lxc_handler *handler) { struct lxc_list *network = &handler->conf->network; diff --git a/src/lxc/conf.h b/src/lxc/conf.h index 84acce8..445867d 100644 --- a/src/lxc/conf.h +++ b/src/lxc/conf.h @@ -361,4 +361,8 @@ extern int lxc_setup(const char *name, struct lxc_conf *lxc_conf, const char *lxcpath, struct cgroup_process_info *cgroup_info); extern void lxc_rename_phys_nics_on_shutdown(struct lxc_conf *conf); + +extern int get_mapped_rootid(struct lxc_conf *conf); +extern int find_unmapped_nsuid(struct lxc_conf *conf); +extern bool hostid_is_mapped(int id, struct lxc_conf *conf); #endif diff --git a/src/lxc/lxccontainer.c b/src/lxc/lxccontainer.c index c8ecef3..816eb39 100644 --- a/src/lxc/lxccontainer.c +++ b/src/lxc/lxccontainer.c @@ -694,6 +694,49 @@ static const char *lxcapi_get_config_path(struct lxc_container *c); static bool lxcapi_set_config_item(struct lxc_container *c, const char *key, const char *v); /* + * chown_mapped: for an unprivileged user with uid X to chown a dir + * to subuid Y, he needs to run chown as root in a userns where + * nsid 0 is mapped to hostuid Y, and nsid Y is mapped to hostuid + * X. That way, the container root is privileged with respect to + * hostuid X, allowing him to do the chown. + */ +static int chown_mapped(int nsrootid, char *path) +{ + if (nsrootid < 0) + return nsrootid; + pid_t pid = fork(); + if (pid < 0) { + SYSERROR("Failed forking"); + return -1; + } + if (!pid) { + int hostuid = geteuid(), ret; + char map1[100], map2[100]; + char *args[] = {"lxc-usernsexec", "-m", map1, "-m", map2, "--", "chown", + "0", path, NULL}; + + // "b:0:nsrootid:1" + ret = snprintf(map1, 100, "b:0:%d:1", nsrootid); + if (ret < 0 || ret >= 100) { + ERROR("Error uid printing map string"); + return -1; + } + + // "b:hostuid:hostuid:1" + ret = snprintf(map2, 100, "b:%d:%d:1", hostuid, hostuid); + if (ret < 0 || ret >= 100) { + ERROR("Error uid printing map string"); + return -1; + } + + ret = execvp("lxc-usernsexec", args); + SYSERROR("Failed executing lxc-usernsexec"); + exit(1); + } + return wait_for_pid(pid); +} + +/* * do_bdev_create: thin wrapper around bdev_create(). Like bdev_create(), * it returns a mounted bdev on success, NULL on error. */ @@ -720,6 +763,25 @@ static struct bdev *do_bdev_create(struct lxc_container *c, const char *type, } lxcapi_set_config_item(c, "lxc.rootfs", bdev->src); + + /* if we are not root, chown the rootfs dir to root in the + * target uidmap */ + + if (geteuid() != 0) { + int rootid; + if ((rootid = get_mapped_rootid(c->lxc_conf)) <= 0) { + ERROR("No mapping for container root"); + bdev_put(bdev); + return NULL; + } + ret = chown_mapped(rootid, bdev->dest); + if (ret < 0) { + ERROR("Error chowning %s to %d\n", bdev->dest, rootid); + bdev_put(bdev); + return NULL; + } + } + return bdev; } @@ -785,6 +847,7 @@ static bool create_run_template(struct lxc_container *c, char *tpath, bool quiet int i; int ret, len, nargs = 0; char **newargv; + struct lxc_conf *conf = c->lxc_conf; process_unlock(); // we're no longer sharing if (quiet) { @@ -795,10 +858,6 @@ static bool create_run_template(struct lxc_container *c, char *tpath, bool quiet open("/dev/null", O_RDWR); open("/dev/null", O_RDWR); } - if (unshare(CLONE_NEWNS) < 0) { - ERROR("error unsharing mounts"); - exit(1); - } src = c->lxc_conf->rootfs.path; /* @@ -815,9 +874,19 @@ static bool create_run_template(struct lxc_container *c, char *tpath, bool quiet exit(1); } - if (bdev->ops->mount(bdev) < 0) { - ERROR("Error mounting rootfs"); - exit(1); + if (strcmp(bdev->type, "dir") != 0) { + if (unshare(CLONE_NEWNS) < 0) { + ERROR("error unsharing mounts"); + exit(1); + } + if (bdev->ops->mount(bdev) < 0) { + ERROR("Error mounting rootfs"); + exit(1); + } + } else { // TODO come up with a better way here! + if (bdev->dest) + free(bdev->dest); + bdev->dest = strdup(bdev->src); } /* @@ -827,6 +896,7 @@ static bool create_run_template(struct lxc_container *c, char *tpath, bool quiet if (argv) for (nargs = 0; argv[nargs]; nargs++) ; nargs += 4; // template, path, rootfs and name args + newargv = malloc(nargs * sizeof(*newargv)); if (!newargv) exit(1); @@ -870,8 +940,68 @@ static bool create_run_template(struct lxc_container *c, char *tpath, bool quiet exit(1); newargv[nargs - 1] = NULL; + /* + * If we're running the template in a mapped userns, then + * we prepend the template command with: + * lxc-usernsexec <-m map1> ... <-m mapn> -- + */ + if (geteuid() != 0 && !lxc_list_empty(&conf->id_map)) { + int n2args = 1; + char **n2 = malloc(n2args * sizeof(*n2)); + struct lxc_list *it; + struct id_map *map; + + newargv[0] = tpath; + tpath = "lxc-usernsexec"; + n2[0] = "lxc-usernsexec"; + lxc_list_for_each(it, &conf->id_map) { + map = it->elem; + n2args += 2; + n2 = realloc(n2, n2args * sizeof(*n2)); + if (!n2) + exit(1); + n2[n2args-2] = "-m"; + n2[n2args-1] = malloc(200); + if (!n2[n2args-1]) + exit(1); + ret = snprintf(n2[n2args-1], 200, "%c:%lu:%lu:%lu", + map->idtype == ID_TYPE_UID ? 'u' : 'g', + map->nsid, map->hostid, map->range); + if (ret < 0 || ret >= 200) + exit(1); + } + bool hostid_mapped = hostid_is_mapped(geteuid(), conf); + int extraargs = hostid_mapped ? 1 : 3; + n2 = realloc(n2, (nargs + n2args + extraargs) * sizeof(*n2)); + if (!n2) + exit(1); + if (!hostid_mapped) { + int free_id = find_unmapped_nsuid(conf); + n2[n2args++] = "-m"; + if (free_id < 0) { + ERROR("Could not find free uid to map"); + exit(1); + } + n2[n2args++] = malloc(200); + if (!n2[n2args-1]) { + SYSERROR("out of memory"); + exit(1); + } + ret = snprintf(n2[n2args-1], 200, "u:%d:%d:1", + free_id, geteuid()); + if (ret < 0 || ret >= 200) { + ERROR("string too long"); + exit(1); + } + } + n2[n2args++] = "--"; + for (i = 0; i < nargs; i++) + n2[i + n2args] = newargv[i]; + free(newargv); + newargv = n2; + } /* execute */ - execv(tpath, newargv); + execvp(tpath, newargv); SYSERROR("failed to execute template %s", tpath); exit(1); } @@ -2100,15 +2230,21 @@ static int clone_update_rootfs(struct lxc_container *c0, return wait_for_pid(pid); process_unlock(); // we're no longer sharing - if (unshare(CLONE_NEWNS) < 0) { - ERROR("error unsharing mounts"); - exit(1); - } bdev = bdev_init(c->lxc_conf->rootfs.path, c->lxc_conf->rootfs.mount, NULL); if (!bdev) exit(1); - if (bdev->ops->mount(bdev) < 0) - exit(1); + if (strcmp(bdev->type, "dir") != 0) { + if (unshare(CLONE_NEWNS) < 0) { + ERROR("error unsharing mounts"); + exit(1); + } + if (bdev->ops->mount(bdev) < 0) + exit(1); + } else { // TODO come up with a better way + if (bdev->dest) + free(bdev->dest); + bdev->dest = strdup(bdev->src); + } if (!lxc_list_empty(&conf->hooks[LXCHOOK_CLONE])) { /* Start of environment variable setup for hooks */ -- 1.8.1.2 ------------------------------------------------------------------------------ October Webinars: Code for Performance Free Intel webinars can help you accelerate application performance. Explore tips for MPI, OpenMP, advanced profiling, and more. Get the most from the latest Intel processors and coprocessors. See abstracts and register > http://pubads.g.doubleclick.net/gampad/clk?id=60135991&iu=/4140/ostg.clktrk _______________________________________________ Lxc-devel mailing list Lxc-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/lxc-devel