3.10 kernel comes with proper hierarchical enforcement of devices cgroup. To keep that code somewhat sane, certain things are not allowed. Switching from default-allow to default-deny and vice versa are not allowed when there are children cgroups. (This *could* be simplified in the kernel by checking that all child cgroups are unpopulated, but that has not yet been done and may be rejected)
The mountcgroup hook causes lxc-start to break with 3.10 kernels, because you cannot write 'a' to devices.deny once you have a child cgroup. With this patch, (a) lxcpath is passed to hooks, (b) the cgroup mount hook sets the container's devices cgroup, and (c) setup_cgroup() during lxc startup ignores failures to write to devices subsystem if we are already in a child of the container's new cgroup. ((a) is not really related to this bug, but is definately needed. The followup work of making the other hooks use the passed-in lxcpath is still to be done) Signed-off-by: Serge Hallyn <serge.hal...@ubuntu.com> --- hooks/mountcgroups | 22 ++++++++++++++++++++++ src/lxc/cgroup.c | 37 +++++++++++++++++++++++++++++++++++++ src/lxc/cgroup.h | 2 ++ src/lxc/conf.c | 16 +++++++++------- src/lxc/conf.h | 6 ++++-- src/lxc/lxccontainer.c | 2 +- src/lxc/start.c | 20 +++++++++++++------- 7 files changed, 88 insertions(+), 17 deletions(-) diff --git a/hooks/mountcgroups b/hooks/mountcgroups index 879fa3c..c1cdcd4 100755 --- a/hooks/mountcgroups +++ b/hooks/mountcgroups @@ -28,15 +28,37 @@ set -e c=$1 d=/sys/fs/cgroup d2=$LXC_ROOTFS_MOUNT/${d} +# name lxc hook lxcpath +lxcpath=$4 if [ ! -d "$d" ]; then exit 0 fi mount -n -t tmpfs tmpfs ${d2} +do_devices_setup() { + local devdir="$1" + local c="$2" + local line + local w # which (allow or deny) + local v # value + egrep "^lxc.cgroup.devices.(allow|deny)[ \t]*=" ${lxcpath}/${c}/config | while read line; do + w=`echo $line | awk -F. '{ print $4 }' | awk '{ print $1 }'` + v=`echo $line | awk -F= '{ print $2 }'` + echo "$v" >> "$devdir"/devices.$w + done +} + # XXX TODO - we'll need to account for other cgroup groups beside 'lxc', # i.e. 'build' or 'users/joe'. for dir in `/bin/ls $d`; do + if [ "$dir" = "devices" ]; then + devicesdir="${d}/${dir}/lxc/${c}" + mkdir -p "$devicesdir" + # set the devices cgroup perms now - we can't change from blacklist to + # whitelist, or add perms, once we have children. + do_devices_setup "$devicesdir" "${c}" + fi mkdir -p "${d}/${dir}/lxc/${c}/${c}.real" echo 1 > "${d}/${dir}/lxc/${c}/${c}.real/tasks" mkdir -p ${d2}/${dir} diff --git a/src/lxc/cgroup.c b/src/lxc/cgroup.c index c28e249..38ed514 100644 --- a/src/lxc/cgroup.c +++ b/src/lxc/cgroup.c @@ -897,3 +897,40 @@ int lxc_cgroup_attach(pid_t pid, const char *name, const char *lxcpath) free(dirpath); return ret; } + +bool is_in_subcgroup(int pid, const char *subsystem, const char *cgpath) +{ + char filepath[MAXPATHLEN], *line = NULL, v1[MAXPATHLEN], v2[MAXPATHLEN]; + FILE *f; + int ret, junk; + size_t sz = 0, l1 = strlen(cgpath), l2; + char *end = index(subsystem, '.'); + int len = end ? (end - subsystem) : strlen(subsystem); + + ret = snprintf(filepath, MAXPATHLEN, "/proc/%d/cgroup", pid); + if (ret < 0 || ret >= MAXPATHLEN) + return false; + if ((f = fopen(filepath, "r")) == NULL) + return false; + while (getline(&line, &sz, f) != -1) { + // nr:subsystem:path + v2[0] = v2[1] = '\0'; + ret = sscanf(line, "%d:%[^:]:%s", &junk, v1, v2); + if (ret != 3) { + fclose(f); + return false; + } + len = end ? end - subsystem : strlen(subsystem); + if (strncmp(v1, subsystem, len) != 0) + continue; + // v2 will start with '/', skip it by using v2+1 + // we must be in SUBcgroup, so make sure l2 > l1 + l2 = strlen(v2+1); + if (l2 > l1 && strncmp(v2+1, cgpath, l1) == 0) { + fclose(f); + return true; + } + } + fclose(f); + return false; +} diff --git a/src/lxc/cgroup.h b/src/lxc/cgroup.h index 747ff5c..c08b2f7 100644 --- a/src/lxc/cgroup.h +++ b/src/lxc/cgroup.h @@ -22,6 +22,7 @@ */ #ifndef _cgroup_h #define _cgroup_h +#include <stdbool.h> struct lxc_handler; extern int lxc_cgroup_destroy(const char *cgpath); @@ -32,4 +33,5 @@ extern char *lxc_cgroup_path_create(const char *lxcgroup, const char *name); extern int lxc_cgroup_enter(const char *cgpath, pid_t pid); extern int lxc_cgroup_attach(pid_t pid, const char *name, const char *lxcpath); extern char *cgroup_path_get(const char *subsystem, const char *cgpath); +extern bool is_in_subcgroup(int pid, const char *subsystem, const char *cgpath); #endif diff --git a/src/lxc/conf.c b/src/lxc/conf.c index 37c0cb4..a69c4f8 100644 --- a/src/lxc/conf.c +++ b/src/lxc/conf.c @@ -313,7 +313,8 @@ static int run_buffer(char *buffer) } static int run_script_argv(const char *name, const char *section, - const char *script, const char *hook, char **argsin) + const char *script, const char *hook, const char *lxcpath, + char **argsin) { int ret, i; char *buffer; @@ -2782,7 +2783,7 @@ int uid_shift_ttys(int pid, struct lxc_conf *conf) return 0; } -int lxc_setup(const char *name, struct lxc_conf *lxc_conf) +int lxc_setup(const char *name, struct lxc_conf *lxc_conf, const char *lxcpath) { #if HAVE_APPARMOR /* || HAVE_SMACK || HAVE_SELINUX */ int mounted; @@ -2798,7 +2799,7 @@ int lxc_setup(const char *name, struct lxc_conf *lxc_conf) return -1; } - if (run_lxc_hooks(name, "pre-mount", lxc_conf, NULL)) { + if (run_lxc_hooks(name, "pre-mount", lxc_conf, lxcpath, NULL)) { ERROR("failed to run pre-mount hooks for container '%s'.", name); return -1; } @@ -2825,13 +2826,13 @@ int lxc_setup(const char *name, struct lxc_conf *lxc_conf) return -1; } - if (run_lxc_hooks(name, "mount", lxc_conf, NULL)) { + if (run_lxc_hooks(name, "mount", lxc_conf, lxcpath, NULL)) { ERROR("failed to run mount hooks for container '%s'.", name); return -1; } if (lxc_conf->autodev) { - if (run_lxc_hooks(name, "autodev", lxc_conf, NULL)) { + if (run_lxc_hooks(name, "autodev", lxc_conf, lxcpath, NULL)) { ERROR("failed to run autodev hooks for container '%s'.", name); return -1; } @@ -2902,7 +2903,8 @@ int lxc_setup(const char *name, struct lxc_conf *lxc_conf) return 0; } -int run_lxc_hooks(const char *name, char *hook, struct lxc_conf *conf, char *argv[]) +int run_lxc_hooks(const char *name, char *hook, struct lxc_conf *conf, + const char *lxcpath, char *argv[]) { int which = -1; struct lxc_list *it; @@ -2926,7 +2928,7 @@ int run_lxc_hooks(const char *name, char *hook, struct lxc_conf *conf, char *arg lxc_list_for_each(it, &conf->hooks[which]) { int ret; char *hookname = it->elem; - ret = run_script_argv(name, "lxc", hookname, hook, argv); + ret = run_script_argv(name, "lxc", hookname, hook, lxcpath, argv); if (ret) return ret; } diff --git a/src/lxc/conf.h b/src/lxc/conf.h index 9b1677e..ed3240d 100644 --- a/src/lxc/conf.h +++ b/src/lxc/conf.h @@ -290,7 +290,8 @@ struct lxc_conf { char *rcfile; // Copy of the top level rcfile we read }; -int run_lxc_hooks(const char *name, char *hook, struct lxc_conf *conf, char *argv[]); +int run_lxc_hooks(const char *name, char *hook, struct lxc_conf *conf, + const char *lxcpath, char *argv[]); extern int setup_cgroup(const char *cgpath, struct lxc_list *cgroups); extern int setup_cgroup_devices(const char *cgpath, struct lxc_list *cgroups); @@ -326,7 +327,8 @@ extern int uid_shift_ttys(int pid, struct lxc_conf *conf); * Configure the container from inside */ -extern int lxc_setup(const char *name, struct lxc_conf *lxc_conf); +extern int lxc_setup(const char *name, struct lxc_conf *lxc_conf, + const char *lxcpath); extern void lxc_rename_phys_nics_on_shutdown(struct lxc_conf *conf); #endif diff --git a/src/lxc/lxccontainer.c b/src/lxc/lxccontainer.c index 4e71fb1..56cae97 100644 --- a/src/lxc/lxccontainer.c +++ b/src/lxc/lxccontainer.c @@ -1810,7 +1810,7 @@ static int clone_update_rootfs(struct lxc_container *c, int flags, char **hookar SYSERROR("failed to set environment variable for rootfs mount"); } - if (run_lxc_hooks(c->name, "clone", conf, hookargs)) { + if (run_lxc_hooks(c->name, "clone", conf, c->get_config_path(c), hookargs)) { ERROR("Error executing clone hook for %s", c->name); exit(1); } diff --git a/src/lxc/start.c b/src/lxc/start.c index 8c8af9c..defa87b 100644 --- a/src/lxc/start.c +++ b/src/lxc/start.c @@ -315,7 +315,7 @@ struct lxc_handler *lxc_init(const char *name, struct lxc_conf *conf, const char } /* End of environment variable setup for hooks */ - if (run_lxc_hooks(name, "pre-start", conf, NULL)) { + if (run_lxc_hooks(name, "pre-start", conf, handler->lxcpath, NULL)) { ERROR("failed to run pre-start hooks for container '%s'.", name); goto out_aborting; } @@ -368,7 +368,7 @@ static void lxc_fini(const char *name, struct lxc_handler *handler) lxc_set_state(name, handler, STOPPING); lxc_set_state(name, handler, STOPPED); - if (run_lxc_hooks(name, "post-stop", handler->conf, NULL)) + if (run_lxc_hooks(name, "post-stop", handler->conf, handler->lxcpath, NULL)) ERROR("failed to run post-stop hooks for container '%s'.", name); /* reset mask set by setup_signal_fd */ @@ -519,7 +519,7 @@ static int do_start(void *data) #endif /* Setup the container, ip, names, utsname, ... */ - if (lxc_setup(handler->name, handler->conf)) { + if (lxc_setup(handler->name, handler->conf, handler->lxcpath)) { ERROR("failed to setup the container"); goto out_warn_father; } @@ -534,7 +534,7 @@ static int do_start(void *data) if (lxc_seccomp_load(handler->conf) != 0) goto out_warn_father; - if (run_lxc_hooks(handler->name, "start", handler->conf, NULL)) { + if (run_lxc_hooks(handler->name, "start", handler->conf, handler->lxcpath, NULL)) { ERROR("failed to run start hooks for container '%s'.", handler->name); goto out_warn_father; } @@ -597,7 +597,7 @@ int save_phys_nics(struct lxc_conf *conf) return 0; } - +extern bool is_in_subcgroup(int pid, const char *subsystem, const char *cgpath); int lxc_spawn(struct lxc_handler *handler) { int failed_before_rename = 0; @@ -703,8 +703,14 @@ int lxc_spawn(struct lxc_handler *handler) goto out_delete_net; if (setup_cgroup_devices(handler->cgroup, &handler->conf->cgroup)) { - ERROR("failed to setup the devices cgroup for '%s'", name); - goto out_delete_net; + /* an unfortunate special case: startup hooks may have already + * setup the cgroup. If a setting fails, and this is the devices + * subsystem, *and* we are already in a subset of the cgroup, + * then ignore the failure */ + if (!is_in_subcgroup(handler->pid, "devices", handler->cgroup)) { + ERROR("failed to setup the devices cgroup for '%s'", name); + goto out_delete_net; + } } /* Tell the child to complete its initialization and wait for -- 1.8.1.2 ------------------------------------------------------------------------------ This SF.net email is sponsored by Windows: Build for Windows Store. http://p.sf.net/sfu/windows-dev2dev _______________________________________________ Lxc-devel mailing list Lxc-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/lxc-devel