(This is just to show where I ended up in trying to support this
feature.  It rewrites more code than I'd like, but the end result
seemed to work and do what we want.)

Add a monitor command to get the cgroup for a running container.  This
allows container r1 started from /var/lib/lxc and container r1 started
from /home/ubuntu/lxcbase to pick unique cgroup directories (which
will be /sys/fs/cgroup/$subsys/lxc/r1 and .../r1-1), and all the lxc-*
tools to get that path over the monitor at lxcpath.

Rework the cgroup code.  Before, if /sys/fs/cgroup/$subsys/lxc/r1
already existed, it would be moved to 'deadXXXXX', and a new r1 created.
Instead, take r1-1, r1-2, etc.

I ended up removing both the use of cgroup.clone_children and support
for ns cgroup.  Presumably we'll want to put support for ns cgroup
back in for older kernels.

Instead of using clone_children, I manually copy the parent's cpuset
mems and cpus files (the only ones requiring clone_children).

Note that upstream kernel is working toward strict hierarchical
limit enforcements, which will be good for us.

NOTE - I am changing the lxc_answer struct size.  This means that
upgrades to this version while containers are running will result
in lxc_* commands on pre-running containers will fail.

(NOTE2 - ignore the testcase for now...  I did start with it,
but as I was working out some monitor bugs I let it languish)

NOTE3 - oh yeah, lxc-attach's cgroup support still needs to be
reimplemented.

Signed-off-by: Serge Hallyn <serge.hal...@ubuntu.com>
---
 src/lxc/cgroup.c       | 817 ++++++++++++++++++++-----------------------------
 src/lxc/cgroup.h       |  17 +-
 src/lxc/commands.c     |  11 +
 src/lxc/commands.h     |   3 +
 src/lxc/conf.c         |   7 +-
 src/lxc/conf.h         |   2 +-
 src/lxc/freezer.c      |  35 ++-
 src/lxc/lxc.h          |  28 +-
 src/lxc/lxc_attach.c   |  96 ++++--
 src/lxc/lxc_cgroup.c   |   4 +-
 src/lxc/lxc_unshare.c  |  10 -
 src/lxc/lxccontainer.c |   4 +-
 src/lxc/start.c        |  54 +++-
 src/lxc/start.h        |   2 +-
 src/lxc/state.c        |   7 +-
 src/lxc/stop.c         |   3 +-
 src/tests/Makefile.am  |   4 +-
 src/tests/cgpath.c     | 142 +++++++++
 src/tests/lxcpath.c    |   2 +-
 19 files changed, 692 insertions(+), 556 deletions(-)
 create mode 100644 src/tests/cgpath.c

diff --git a/src/lxc/cgroup.c b/src/lxc/cgroup.c
index 28f3474..7b13753 100644
--- a/src/lxc/cgroup.c
+++ b/src/lxc/cgroup.c
@@ -38,6 +38,7 @@
 
 #include "error.h"
 #include "config.h"
+#include "commands.h"
 
 #include <lxc/log.h>
 #include <lxc/cgroup.h>
@@ -53,11 +54,6 @@ lxc_log_define(lxc_cgroup, lxc);
 
 #define MTAB "/proc/mounts"
 
-enum {
-       CGROUP_NS_CGROUP = 1,
-       CGROUP_CLONE_CHILDREN,
-};
-
 /* Check if a mount is a cgroup hierarchy for any subsystem.
  * Return the first subsystem found (or NULL if none).
  */
@@ -104,11 +100,12 @@ static char *mount_has_subsystem(const struct mntent 
*mntent)
  * Returns a pointer to the answer, which may be "".
  */
 static char *get_init_cgroup(const char *subsystem, struct mntent *mntent,
-                            char *dsg)
+                            char *dsg, int prependslash)
 {
        FILE *f;
        char *c, *c2;
        char line[MAXPATHLEN];
+       int ret;
 
        *dsg = '\0';
        f = fopen("/proc/1/cgroup", "r");
@@ -134,10 +131,18 @@ static char *get_init_cgroup(const char *subsystem, 
struct mntent *mntent,
 good:
                DEBUG("get_init_cgroup: found init cgroup for subsys %s at 
%s\n",
                        subsystem, c2);
-               strncpy(dsg, c2, MAXPATHLEN);
-               c = &dsg[strlen(dsg)-1];
+               ret = snprintf(dsg, MAXPATHLEN, "%s%s", prependslash ? "/" : 
"", c2);
+               if (ret < 0 || ret >= MAXPATHLEN) {
+                       WARN("init cgroup path name was too long.");
+                       goto found;
+               }
+               if (ret < 1)
+                       goto found;
+
+               c = &dsg[ret-1];
                if (*c == '\n')
                        *c = '\0';
+
                goto found;
        }
 
@@ -146,27 +151,15 @@ found:
        return dsg;
 }
 
-static int get_cgroup_flags(struct mntent *mntent)
-{
-       int flags = 0;
-
-
-       if (hasmntopt(mntent, "ns"))
-               flags |= CGROUP_NS_CGROUP;
-
-       if (hasmntopt(mntent, "clone_children"))
-               flags |= CGROUP_CLONE_CHILDREN;
-
-       DEBUG("cgroup %s has flags 0x%x", mntent->mnt_dir, flags);
-       return flags;
-}
-
+/*
+ * Return cgroup mountpoint plus init cgroup
+ */
 static int get_cgroup_mount(const char *subsystem, char *mnt)
 {
        struct mntent *mntent;
-       char initcgroup[MAXPATHLEN];
+       char initcgroup[MAXPATHLEN], *init;
        FILE *file = NULL;
-       int ret, flags, err = -1;
+       int ret, err = -1;
 
        file = setmntent(MTAB, "r");
        if (!file) {
@@ -181,16 +174,14 @@ static int get_cgroup_mount(const char *subsystem, char 
*mnt)
                if (subsystem) {
                        if (!hasmntopt(mntent, subsystem))
                                continue;
-               }
-               else {
+               } else {
                        if (!mount_has_subsystem(mntent))
                                continue;
                }
 
-               flags = get_cgroup_flags(mntent);
-               ret = snprintf(mnt, MAXPATHLEN, "%s%s%s", mntent->mnt_dir,
-                              get_init_cgroup(subsystem, NULL, initcgroup),
-                              (flags & CGROUP_NS_CGROUP) ? "" : "/lxc");
+               init = get_init_cgroup(NULL, mntent, initcgroup, 1);
+               ret = snprintf(mnt, MAXPATHLEN, "%s%s", mntent->mnt_dir,
+                               init);
                if (ret < 0 || ret >= MAXPATHLEN)
                        goto fail;
 
@@ -207,180 +198,285 @@ out:
        return err;
 }
 
-int lxc_ns_is_mounted(void)
+/*
+ * cgroup_path_get: put into *path the pathname for
+ * %subsystem and cgroup %name.  If %subsystem is NULL, then
+ * the first mounted cgroup will be used (for nr_tasks)
+ */
+int cgroup_path_get(char **path, const char *subsystem, const char *cgpath)
 {
        static char        buf[MAXPATHLEN];
+       static char        retbuf[MAXPATHLEN];
+       int rc;
+
+       /* lxc_cgroup_set passes a state object for the subsystem,
+        * so trim it to just the subsystem part */
+       if (subsystem) {
+               rc = snprintf(retbuf, MAXPATHLEN, "%s", subsystem);
+               if (rc < 0 || rc >= MAXPATHLEN) {
+                       ERROR("subsystem name too long");
+                       return -1;
+               }
+               char *s = index(retbuf, '.');
+               if (s)
+                       *s = '\0';
+               DEBUG("%s: called for subsys %s name %s\n", __func__, retbuf, 
cgpath);
+       }
+       if (get_cgroup_mount(subsystem ? retbuf : NULL, buf)) {
+               ERROR("cgroup is not mounted");
+               return -1;
+       }
 
-       return (get_cgroup_mount("ns", buf) == 0);
+       rc = snprintf(retbuf, MAXPATHLEN, "%s/%s", buf, cgpath);
+       if (rc < 0 || rc >= MAXPATHLEN) {
+               ERROR("name too long");
+               return -1;
+       }
+
+       DEBUG("%s: returning %s for subsystem %s", __func__, retbuf, subsystem);
+
+       *path = retbuf;
+       return 0;
 }
 
-static int cgroup_rename_nsgroup(const char *mnt, const char *name, pid_t pid)
+
+/*
+ * lxc_cgroup_path_get: put into *path the pathname for
+ * %subsystem and cgroup %name.  If %subsystem is NULL, then
+ * the first mounted cgroup will be used (for nr_tasks)
+ *
+ * This is the exported function, which determines cgpath from the
+ * monitor running in lxcpath.
+ */
+int lxc_cgroup_path_get(char **path, const char *subsystem, const char *name, 
const char *lxcpath)
 {
-       char oldname[MAXPATHLEN];
-       char newname[MAXPATHLEN];
-       int ret;
+       struct lxc_command command = {
+               .request = { .type = LXC_COMMAND_CGROUP },
+       };
 
-       ret = snprintf(oldname, MAXPATHLEN, "%s/%d", mnt, pid);
-       if (ret >= MAXPATHLEN)
-               return -1;
+       int ret, stopped = 0;
 
-       ret = snprintf(newname, MAXPATHLEN, "%s/%s", mnt, name);
-       if (ret >= MAXPATHLEN)
+       ret = lxc_command(name, &command, &stopped, lxcpath);
+       if (ret < 0 && stopped)
+               return STOPPED;
+
+       if (ret < 0) {
+               ERROR("failed to send command");
                return -1;
+       }
 
-       if (rename(oldname, newname)) {
-               SYSERROR("failed to rename cgroup %s->%s", oldname, newname);
+       if (!ret) {
+               WARN("'%s' has stopped before sending its state", name);
                return -1;
        }
 
-       DEBUG("'%s' renamed to '%s'", oldname, newname);
+       if (command.answer.ret < 0 || command.answer.pathlen < 0) {
+               ERROR("failed to get state for '%s': %s",
+                       name, strerror(-command.answer.ret));
+               return -1;
+       }
 
-       return 0;
+       return cgroup_path_get(path, subsystem, command.answer.path);
 }
 
-static int cgroup_enable_clone_children(const char *path)
+static int do_cgroup_set(const char *path, const char *filename, const char 
*value)
 {
-       FILE *f;
-       int ret = 0;
+       int fd, ret;
 
-       f = fopen(path, "w");
-       if (!f) {
-               SYSERROR("failed to open '%s'", path);
+       fd = open(path, O_WRONLY);
+       if (fd < 0) {
+               SYSERROR("open %s : %s", path, strerror(errno));
                return -1;
        }
 
-       if (fprintf(f, "1") < 1) {
-               ERROR("failed to write flag to '%s'", path);
-               ret = -1;
-       }
+       ret = write(fd, value, strlen(value));
 
-       fclose(f);
+       if (ret < 0) {
+               close(fd);
+               SYSERROR("write %s : %s", path, strerror(errno));
+               return ret;
+       }
 
-       return ret;
+       close(fd);
+       return 0;
 }
 
-static int lxc_one_cgroup_finish_attach(int fd, pid_t pid)
+int lxc_cgroup_set_bypath(const char *cgpath, const char *filename, const char 
*value)
 {
-       char buf[32];
-       int ret;
+       int ret;
+       char *dirpath;
+       char path[MAXPATHLEN];
 
-       snprintf(buf, 32, "%ld", (long)pid);
+       ret = cgroup_path_get(&dirpath, filename, cgpath);
+       if (ret)
+               return -1;
 
-       ret = write(fd, buf, strlen(buf));
-       if (ret <= 0) {
-               SYSERROR("failed to write pid '%ld' to fd '%d'", (long)pid, fd);
-               ret = -1;
-       } else {
-               ret = 0;
-       }
+       ret = snprintf(path, MAXPATHLEN, "%s/%s", dirpath, filename);
+       if (ret < 0 || ret >= MAXPATHLEN) {
+               ERROR("pathname too long");
+               return -1;
+       }
 
-       close(fd);
-       return ret;
+       return do_cgroup_set(path, filename, value);
 }
 
-static int lxc_one_cgroup_dispose_attach(int fd)
+int lxc_cgroup_set(const char *name, const char *filename, const char *value,
+                  const char *lxcpath)
 {
-       close(fd);
-       return 0;
+       int ret;
+       char *dirpath;
+       char path[MAXPATHLEN];
+
+       ret = lxc_cgroup_path_get(&dirpath, filename, name, lxcpath);
+       if (ret)
+               return -1;
+
+       ret = snprintf(path, MAXPATHLEN, "%s/%s", dirpath, filename);
+       if (ret < 0 || ret >= MAXPATHLEN) {
+               ERROR("pathname too long");
+               return -1;
+       }
+
+       return do_cgroup_set(path, filename, value);
 }
 
-static int lxc_one_cgroup_prepare_attach(const char *name,
-                                        struct mntent *mntent)
+/*
+ * If you pass in NULL value or 0 len, then you are asking for the size
+ * of the file.  Note that we can't get the file size quickly through stat
+ * or lseek.  Therefore if you pass in len > 0 but less than the file size,
+ * your only indication will be that the return value will be equal to the
+ * passed-in ret.  We will not return the actual full file size.
+ */
+int lxc_cgroup_get(const char *name, const char *filename, char *value,
+                  size_t len, const char *lxcpath)
 {
-       int fd;
-       char tasks[MAXPATHLEN], initcgroup[MAXPATHLEN];
-       char *cgmnt = mntent->mnt_dir;
-       int flags;
+       int fd, ret = -1;
+       char *dirpath;
+       char path[MAXPATHLEN];
        int rc;
 
-       flags = get_cgroup_flags(mntent);
+       ret = lxc_cgroup_path_get(&dirpath, filename, name, lxcpath);
+       if (ret)
+               return -1;
 
-       rc = snprintf(tasks, MAXPATHLEN, "%s%s%s/%s/tasks", cgmnt,
-                get_init_cgroup(NULL, mntent, initcgroup),
-                (flags & CGROUP_NS_CGROUP) ? "" : "/lxc",
-                name);
+       rc = snprintf(path, MAXPATHLEN, "%s/%s", dirpath, filename);
        if (rc < 0 || rc >= MAXPATHLEN) {
                ERROR("pathname too long");
                return -1;
        }
 
-       fd = open(tasks, O_WRONLY);
+       fd = open(path, O_RDONLY);
        if (fd < 0) {
-               SYSERROR("failed to open '%s'", tasks);
+               ERROR("open %s : %s", path, strerror(errno));
                return -1;
        }
 
-       return fd;
-}
-
-static int lxc_one_cgroup_attach(const char *name, struct mntent *mntent, 
pid_t pid)
-{
-       int fd;
-
-       fd = lxc_one_cgroup_prepare_attach(name, mntent);
-       if (fd < 0) {
-               return -1;
+       if (!len || !value) {
+               char buf[100];
+               int count = 0;
+               while ((ret = read(fd, buf, 100)) > 0)
+                       count += ret;
+               if (ret >= 0)
+                       ret = count;
+       } else {
+               memset(value, 0, len);
+               ret = read(fd, value, len);
        }
 
-       return lxc_one_cgroup_finish_attach(fd, pid);
+       if (ret < 0)
+               ERROR("read %s : %s", path, strerror(errno));
+
+       close(fd);
+       return ret;
 }
 
-int lxc_cgroup_dispose_attach(void *data)
+int lxc_cgroup_nrtasks(const char *cgpath)
 {
-       int *fds = data;
-       int ret, err;
+       char *dpath;
+       char path[MAXPATHLEN];
+       int pid, ret, count = 0;
+       FILE *file;
+       int rc;
 
-       if (!fds) {
-               return 0;
-       }
+       ret = cgroup_path_get(&dpath, NULL, cgpath);
+       if (ret)
+               return -1;
 
-       ret = 0;
+       rc = snprintf(path, MAXPATHLEN, "%s/tasks", dpath);
+       if (rc < 0 || rc >= MAXPATHLEN) {
+               ERROR("pathname too long");
+               return -1;
+       }
 
-       for (; *fds >= 0; fds++) {
-               err = lxc_one_cgroup_dispose_attach(*fds);
-               if (err) {
-                       ret = err;
-               }
+       file = fopen(path, "r");
+       if (!file) {
+               SYSERROR("fopen '%s' failed", path);
+               return -1;
        }
 
-       free(data);
+       while (fscanf(file, "%d", &pid) != EOF)
+               count++;
 
-       return ret;
+       fclose(file);
+
+       return count;
 }
 
-int lxc_cgroup_finish_attach(void *data, pid_t pid)
+static long get_value(const char *dir, const char *file)
 {
-       int *fds = data;
-       int err;
+       FILE *f;
+       char path[MAXPATHLEN];
+       int ret, retv;
 
-       if (!fds) {
+       retv = snprintf(path, MAXPATHLEN, "%s/%s", dir, file);
+       if (retv < 0 || retv >= MAXPATHLEN)
                return 0;
-       }
+       f = fopen(path, "r");
+       ret = fscanf(f, "%d", &retv);
+       fclose(f);
+       if (ret != 1)
+               return 0;
+       return retv;
+}
 
-       for (; *fds >= 0; fds++) {
-               err = lxc_one_cgroup_finish_attach(*fds, pid);
-               if (err) {
-                       /* get rid of the rest of them */
-                       lxc_cgroup_dispose_attach(data);
-                       return -1;
-               }
-               *fds = -1;
-       }
+static void set_value(const char *dir, const char *file, long v)
+{
+       FILE *f;
+       char path[MAXPATHLEN];
+       int retv;
 
-       free(data);
+       retv = snprintf(path, MAXPATHLEN, "%s/%s", dir, file);
+       if (retv < 0 || retv >= MAXPATHLEN)
+               return;
+       f = fopen(path, "w");
+       fprintf(f, "%ld\n", v);
+       fclose(f);
+}
 
-       return 0;
+static void setup_cpuset(const char *path)
+{
+       /* copy parent values for mems_allowed and cpus_allowed */
+       char *parentpath = strdup(path);
+       char *p;
+       long v;
+       if ((p = rindex(parentpath, '/')) == NULL)
+               goto out;
+       v = get_value(parentpath, "cpuset.mems");
+       set_value(path, "cpuset.mems", v);
+       v = get_value(parentpath, "cpuset.cpus");
+       set_value(path, "cpuset.cpus", v);
+       
+out:
+       free(parentpath);
 }
 
-int lxc_cgroup_prepare_attach(const char *name, void **data)
+/* sanity check - make sure that '/init_cgroup/lxc_group' exists */
+static int create_lxcgroups(const char *lxcgroup)
 {
-       struct mntent *mntent;
        FILE *file = NULL;
-       int err = -1;
-       int found = 0;
-       int *fds;
-       int i;
-       static const int MAXFDS = 256;
+       struct mntent *mntent;
+       int ret, retv = -1;
+       char path[MAXPATHLEN], initpath[MAXPATHLEN];
 
        file = setmntent(MTAB, "r");
        if (!file) {
@@ -388,202 +484,121 @@ int lxc_cgroup_prepare_attach(const char *name, void 
**data)
                return -1;
        }
 
-       /* create a large enough buffer for all practical
-        * use cases
-        */
-       fds = malloc(sizeof(int) * MAXFDS);
-       if (!fds) {
-               err = -1;
-               goto out;
-       }
-       for (i = 0; i < MAXFDS; i++) {
-               fds[i] = -1;
-       }
-
-       err = 0;
-       i = 0;
        while ((mntent = getmntent(file))) {
-               if (i >= MAXFDS - 1) {
-                       ERROR("too many cgroups to attach to, aborting");
-                       lxc_cgroup_dispose_attach(fds);
-                       errno = ENOMEM;
-                       err = -1;
-                       goto out;
-               }
-
-               DEBUG("checking '%s' (%s)", mntent->mnt_dir, mntent->mnt_type);
+               char *init = get_init_cgroup(NULL, mntent, initpath, 1);
 
                if (strcmp(mntent->mnt_type, "cgroup"))
                        continue;
                if (!mount_has_subsystem(mntent))
                        continue;
 
-               INFO("[%d] found cgroup mounted at '%s',opts='%s'",
-                    ++found, mntent->mnt_dir, mntent->mnt_opts);
-
-               fds[i] = lxc_one_cgroup_prepare_attach(name, mntent);
-               if (fds[i] < 0) {
-                       err = fds[i];
-                       lxc_cgroup_dispose_attach(fds);
-                       goto out;
+               /* 
+                * TODO - handle case where lxcgroup has subdirs?  (i.e. 
build/l1)
+                * May not be worthwhile - remember cgroup depth has perf 
penalties
+                * */
+               ret = snprintf(path, MAXPATHLEN, "%s%s/%s",
+                              mntent->mnt_dir, init, lxcgroup ? lxcgroup : 
"lxc");
+               if (ret < 0 || ret >= MAXPATHLEN)
+                       goto fail;
+               if (access(path, F_OK)) {
+                       ret = mkdir(path, 0755);
+                       if (ret == -1 && errno != EEXIST) {
+                               SYSERROR("failed to create '%s' directory", 
path);
+                               goto fail;
+                       }
+               } else if (hasmntopt(mntent, "cpuset")) {
+                       setup_cpuset(path);
                }
-               i++;
-       };
-
-       if (!found)
-               ERROR("No cgroup mounted on the system");
-
-       *data = fds;
-
-out:
-       endmntent(file);
-       return err;
-}
-
-/*
- * for each mounted cgroup, attach a pid to the cgroup for the container
- */
-int lxc_cgroup_attach(const char *name, pid_t pid)
-{
-       void *data = NULL;
-       int ret;
-
-       ret = lxc_cgroup_prepare_attach(name, &data);
-       if (ret < 0) {
-               return ret;
        }
 
-       return lxc_cgroup_finish_attach(data, pid);
+       retv = 0;
+fail:
+       endmntent(file);
+       return retv;
 }
 
 /*
- * rename cgname, which is under cgparent, to a new name starting
- * with 'cgparent/dead'.  That way cgname can be reused.  Return
- * 0 on success, -1 on failure.
+ * XXX This should probably be locked globally
+ * 
+ * Races won't be determintal, you'll just end up with leftover unused cgroups
  */
-int try_to_move_cgname(char *cgparent, char *cgname)
+char *lxc_cgroup_path_create(const char *lxcgroup, const char *name)
 {
-       char *newdir;
+       int i = 0, ret;
+       char *retpath, path[MAXPATHLEN], initpath[MAXPATHLEN], *init;
+       char tail[12];
+       FILE *file = NULL;
+       struct mntent *mntent;
 
-       /* tempnam problems don't matter here - cgroupfs will prevent
-        * duplicates if we race, and we'll just fail at that (unlikely)
-        * point
-        */
+       if (create_lxcgroups(lxcgroup) < 0)
+               return NULL;
 
-       newdir = tempnam(cgparent, "dead");
-       if (!newdir)
-               return -1;
-       if (rename(cgname, newdir))
-               return -1;
-       WARN("non-empty cgroup %s renamed to %s, please manually inspect it\n",
-               cgname, newdir);
-
-       return 0;
-}
-
-/*
- * create a cgroup for the container in a particular subsystem.
- */
-static int lxc_one_cgroup_create(const char *name,
-                                struct mntent *mntent, pid_t pid)
-{
-       char cginit[MAXPATHLEN], cgname[MAXPATHLEN], cgparent[MAXPATHLEN];
-       char clonechild[MAXPATHLEN];
-       char initcgroup[MAXPATHLEN];
-       int flags, ret;
-
-       /* cgparent is the parent dir, e.g., 
/sys/fs/cgroup/<cgroup>/<init-cgroup>/lxc */
-       /* (remember get_init_cgroup() returns a path starting with '/') */
-       /* cgname is the full name, e.g., 
/sys/fs/cgroup/<cgroup>/<init-cgroup>/lxc/name */
-       ret = snprintf(cginit, MAXPATHLEN, "%s%s", mntent->mnt_dir,
-               get_init_cgroup(NULL, mntent, initcgroup));
-       if (ret < 0 || ret >= MAXPATHLEN) {
-               SYSERROR("Failed creating pathname for init's cgroup (%d)\n", 
ret);
-               return -1;
+again:
+       file = setmntent(MTAB, "r");
+       if (!file) {
+               SYSERROR("failed to open %s", MTAB);
+               return NULL;
        }
 
-       flags = get_cgroup_flags(mntent);
-
-       ret = snprintf(cgparent, MAXPATHLEN, "%s%s", cginit,
-                      (flags & CGROUP_NS_CGROUP) ? "" : "/lxc");
-       if (ret < 0 || ret >= MAXPATHLEN) {
-               SYSERROR("Failed creating pathname for cgroup parent (%d)\n", 
ret);
-               return -1;
-       }
-       ret = snprintf(cgname, MAXPATHLEN, "%s/%s", cgparent, name);
-       if (ret < 0 || ret >= MAXPATHLEN) {
-               SYSERROR("Failed creating pathname for cgroup (%d)\n", ret);
-               return -1;
-       }
+       if (i)
+               snprintf(tail, 12, "-%d", i);
+       else
+               *tail = '\0';
 
-       /* Do we have the deprecated ns_cgroup subsystem? */
-       if (flags & CGROUP_NS_CGROUP) {
-               WARN("using deprecated ns_cgroup");
-               return cgroup_rename_nsgroup(cginit, name, pid);
-       }
+       while ((mntent = getmntent(file))) {
 
-       ret = snprintf(clonechild, MAXPATHLEN, "%s/cgroup.clone_children",
-                      cginit);
-       if (ret < 0 || ret >= MAXPATHLEN) {
-               SYSERROR("Failed creating pathname for clone_children (%d)\n", 
ret);
-               return -1;
-       }
+               if (strcmp(mntent->mnt_type, "cgroup"))
+                       continue;
+               if (!mount_has_subsystem(mntent))
+                       continue;
 
-       /* we check if the kernel has clone_children, at this point if there
-        * no clone_children neither ns_cgroup, that means the cgroup is mounted
-        * without the ns_cgroup and it has not the compatibility flag
-        */
-       if (access(clonechild, F_OK)) {
-               ERROR("no ns_cgroup option specified");
-               return -1;
-       }
+               init = get_init_cgroup(NULL, mntent, initpath, 1);
+               /* find unused mnt_dir + init_cgroup + lxcgroup + name + -$i */
+               ret = snprintf(path, MAXPATHLEN, "%s%s/%s/%s%s",
+                              mntent->mnt_dir, init,
+                              lxcgroup ? lxcgroup : "lxc", name, tail);
+               if (ret < 0 || ret >= MAXPATHLEN)
+                       goto fail;
 
-       /* enable the clone_children flag of the cgroup */
-       if (cgroup_enable_clone_children(clonechild)) {
-               SYSERROR("failed to enable 'clone_children flag");
-               return -1;
-       }
+               if (access(path, F_OK) == 0) goto next;
 
-       /* if cgparent does not exist, create it */
-       if (access(cgparent, F_OK)) {
-               ret = mkdir(cgparent, 0755);
-               if (ret == -1 && errno != EEXIST) {
-                       SYSERROR("failed to create '%s' directory", cgparent);
-                       return -1;
+               if (mkdir(path, 0755)) {
+                       ERROR("Error creating cgroups");
+                       goto fail;
                }
-       }
-
-       /*
-        * There is a previous cgroup.  Try to delete it.  If that fails
-        * (i.e. it is not empty) try to move it out of the way.
-        */
-       if (!access(cgname, F_OK) && rmdir(cgname)) {
-               if (try_to_move_cgname(cgparent, cgname)) {
-                       SYSERROR("failed to remove previous cgroup '%s'", 
cgname);
-                       return -1;
+               if (hasmntopt(mntent, "cpuset")) {
+                       setup_cpuset(path);
                }
        }
 
-       /* Let's create the cgroup */
-       if (mkdir(cgname, 0755)) {
-               SYSERROR("failed to create '%s' directory", cgname);
-               return -1;
-       }
+       endmntent(file);
 
-       INFO("created cgroup '%s'", cgname);
+       // print out the cgpath part
+       ret = snprintf(path, MAXPATHLEN, "%s/%s%s",
+                      lxcgroup ? lxcgroup : "lxc", name, tail);
+       if (ret < 0 || ret >= MAXPATHLEN) // can't happen
+               goto fail;
 
-       return 0;
+       retpath = strdup(path);
+
+       return retpath;
+
+next:
+       endmntent(file);
+       i++;
+       goto again;
+
+fail:
+       endmntent(file);
+       return NULL;
 }
 
-/*
- * for each mounted cgroup, create a cgroup for the container and attach a pid
- */
-int lxc_cgroup_create(const char *name, pid_t pid)
+int lxc_cgroup_enter(const char *cgpath, pid_t pid)
 {
+       char path[MAXPATHLEN], initpath[MAXPATHLEN], *init;
+       FILE *file = NULL, *fout;
        struct mntent *mntent;
-       FILE *file = NULL;
-       int err = -1;
-       int found = 0;
+       int ret, retv = -1;
 
        file = setmntent(MTAB, "r");
        if (!file) {
@@ -592,31 +607,30 @@ int lxc_cgroup_create(const char *name, pid_t pid)
        }
 
        while ((mntent = getmntent(file))) {
-               DEBUG("checking '%s' (%s)", mntent->mnt_dir, mntent->mnt_type);
-
                if (strcmp(mntent->mnt_type, "cgroup"))
                        continue;
                if (!mount_has_subsystem(mntent))
                        continue;
-
-               INFO("[%d] found cgroup mounted at '%s',opts='%s'",
-                    ++found, mntent->mnt_dir, mntent->mnt_opts);
-
-               err = lxc_one_cgroup_create(name, mntent, pid);
-               if (err)
+               init = get_init_cgroup(NULL, mntent, initpath, 1);
+               ret = snprintf(path, MAXPATHLEN, "%s%s/%s/tasks",
+                              mntent->mnt_dir, init, cgpath);
+               if (ret < 0 || ret >= MAXPATHLEN) {
+                       ERROR("entering cgroup");
                        goto out;
-
-               err = lxc_one_cgroup_attach(name, mntent, pid);
-               if (err)
+               }
+               fout = fopen(path, "w");
+               if (!fout) {
+                       ERROR("entering cgroup");
                        goto out;
-       };
-
-       if (!found)
-               ERROR("No cgroup mounted on the system");
+               }
+               fprintf(fout, "%d\n", (int)pid);
+               fclose(fout);
+       }
+       retv = 0;
 
 out:
        endmntent(file);
-       return err;
+       return retv;
 }
 
 int recursive_rmdir(char *dirname)
@@ -664,16 +678,14 @@ int recursive_rmdir(char *dirname)
 
 }
 
-int lxc_one_cgroup_destroy(struct mntent *mntent, const char *name)
+static int lxc_one_cgroup_destroy(struct mntent *mntent, const char *cgpath)
 {
        char cgname[MAXPATHLEN], initcgroup[MAXPATHLEN];
        char *cgmnt = mntent->mnt_dir;
-       int flags = get_cgroup_flags(mntent);
        int rc;
 
-       rc = snprintf(cgname, MAXPATHLEN, "%s%s%s/%s", cgmnt,
-               get_init_cgroup(NULL, mntent, initcgroup),
-               (flags & CGROUP_NS_CGROUP) ? "" : "/lxc", name);
+       rc = snprintf(cgname, MAXPATHLEN, "%s%s/%s", cgmnt,
+               get_init_cgroup(NULL, mntent, initcgroup, 1), cgpath);
        if (rc < 0 || rc >= MAXPATHLEN) {
                ERROR("name too long");
                return -1;
@@ -692,11 +704,11 @@ int lxc_one_cgroup_destroy(struct mntent *mntent, const 
char *name)
 /*
  * for each mounted cgroup, destroy the cgroup for the container
  */
-int lxc_cgroup_destroy(const char *name)
+int lxc_cgroup_destroy(const char *cgpath)
 {
        struct mntent *mntent;
        FILE *file = NULL;
-       int err = -1;
+       int err, retv  = 0;
 
        file = setmntent(MTAB, "r");
        if (!file) {
@@ -710,168 +722,17 @@ int lxc_cgroup_destroy(const char *name)
                if (!mount_has_subsystem(mntent))
                        continue;
 
-               err = lxc_one_cgroup_destroy(mntent, name);
-               if (err)
-                       break;
+               err = lxc_one_cgroup_destroy(mntent, cgpath);
+               if (err)  // keep trying to clean up the others
+                       retv = -1;
        }
 
        endmntent(file);
-       return err;
+       return retv;
 }
-/*
- * lxc_cgroup_path_get: put into *path the pathname for
- * %subsystem and cgroup %name.  If %subsystem is NULL, then
- * the first mounted cgroup will be used (for nr_tasks)
- */
-int lxc_cgroup_path_get(char **path, const char *subsystem, const char *name)
-{
-       static char        buf[MAXPATHLEN];
-       static char        retbuf[MAXPATHLEN];
-       int rc;
 
-       /* lxc_cgroup_set passes a state object for the subsystem,
-        * so trim it to just the subsystem part */
-       if (subsystem) {
-               rc = snprintf(retbuf, MAXPATHLEN, "%s", subsystem);
-               if (rc < 0 || rc >= MAXPATHLEN) {
-                       ERROR("subsystem name too long");
-                       return -1;
-               }
-               char *s = index(retbuf, '.');
-               if (s)
-                       *s = '\0';
-               DEBUG("%s: called for subsys %s name %s\n", __func__, retbuf, 
name);
-       }
-       if (get_cgroup_mount(subsystem ? retbuf : NULL, buf)) {
-               ERROR("cgroup is not mounted");
-               return -1;
-       }
-
-       rc = snprintf(retbuf, MAXPATHLEN, "%s/%s", buf, name);
-       if (rc < 0 || rc >= MAXPATHLEN) {
-               ERROR("name too long");
-               return -1;
-       }
-
-       DEBUG("%s: returning %s for subsystem %s", __func__, retbuf, subsystem);
-
-       *path = retbuf;
-       return 0;
-}
-
-int lxc_cgroup_set(const char *name, const char *filename, const char *value)
-{
-       int fd, ret;
-       char *dirpath;
-       char path[MAXPATHLEN];
-       int rc;
-
-       ret = lxc_cgroup_path_get(&dirpath, filename, name);
-       if (ret)
-               return -1;
-
-       rc = snprintf(path, MAXPATHLEN, "%s/%s", dirpath, filename);
-       if (rc < 0 || rc >= MAXPATHLEN) {
-               ERROR("pathname too long");
-               return -1;
-       }
-
-       fd = open(path, O_WRONLY);
-       if (fd < 0) {
-               ERROR("open %s : %s", path, strerror(errno));
-               return -1;
-       }
-
-       ret = write(fd, value, strlen(value));
-       if (ret < 0) {
-               ERROR("write %s : %s", path, strerror(errno));
-               goto out;
-       }
-
-       ret = 0;
-out:
-       close(fd);
-       return ret;
-}
-
-/*
- * If you pass in NULL value or 0 len, then you are asking for the size
- * of the file.  Note that we can't get the file size quickly through stat
- * or lseek.  Therefore if you pass in len > 0 but less than the file size,
- * your only indication will be that the return value will be equal to the
- * passed-in ret.  We will not return the actual full file size.
- */
-int lxc_cgroup_get(const char *name, const char *filename,
-                  char *value, size_t len)
+int lxc_cgroup_attach(const char *name, const char *lxcpath)
 {
-       int fd, ret = -1;
-       char *dirpath;
-       char path[MAXPATHLEN];
-       int rc;
-
-       ret = lxc_cgroup_path_get(&dirpath, filename, name);
-       if (ret)
-               return -1;
-
-       rc = snprintf(path, MAXPATHLEN, "%s/%s", dirpath, filename);
-       if (rc < 0 || rc >= MAXPATHLEN) {
-               ERROR("pathname too long");
-               return -1;
-       }
-
-       fd = open(path, O_RDONLY);
-       if (fd < 0) {
-               ERROR("open %s : %s", path, strerror(errno));
-               return -1;
-       }
-
-    if (!len || !value) {
-        char buf[100];
-        int count = 0;
-        while ((ret = read(fd, buf, 100)) > 0)
-            count += ret;
-        if (ret >= 0)
-            ret = count;
-    } else {
-        memset(value, 0, len);
-        ret = read(fd, value, len);
-    }
-
-       if (ret < 0)
-               ERROR("read %s : %s", path, strerror(errno));
-
-       close(fd);
-       return ret;
-}
-
-int lxc_cgroup_nrtasks(const char *name)
-{
-       char *dpath;
-       char path[MAXPATHLEN];
-       int pid, ret, count = 0;
-       FILE *file;
-       int rc;
-
-       ret = lxc_cgroup_path_get(&dpath, NULL, name);
-       if (ret)
-               return -1;
-
-       rc = snprintf(path, MAXPATHLEN, "%s/tasks", dpath);
-       if (rc < 0 || rc >= MAXPATHLEN) {
-               ERROR("pathname too long");
-               return -1;
-       }
-
-       file = fopen(path, "r");
-       if (!file) {
-               SYSERROR("fopen '%s' failed", path);
-               return -1;
-       }
-
-       while (fscanf(file, "%d", &pid) != EOF)
-               count++;
-
-       fclose(file);
-
-       return count;
+       // XXX not yet implemented XXX TODO
+       return -1;
 }
diff --git a/src/lxc/cgroup.h b/src/lxc/cgroup.h
index 8167f39..457f07e 100644
--- a/src/lxc/cgroup.h
+++ b/src/lxc/cgroup.h
@@ -26,13 +26,12 @@
 #define MAXPRIOLEN 24
 
 struct lxc_handler;
-extern int lxc_cgroup_create(const char *name, pid_t pid);
-extern int lxc_cgroup_destroy(const char *name);
-extern int lxc_cgroup_path_get(char **path, const char *subsystem, const char 
*name);
-extern int lxc_cgroup_nrtasks(const char *name);
-extern int lxc_cgroup_attach(const char *name, pid_t pid);
-extern int lxc_cgroup_prepare_attach(const char *name, void **data);
-extern int lxc_cgroup_finish_attach(void *data, pid_t pid);
-extern int lxc_cgroup_dispose_attach(void *data);
-extern int lxc_ns_is_mounted(void);
+extern int lxc_cgroup_destroy(const char *cgpath);
+extern int lxc_cgroup_path_get(char **path, const char *subsystem, const char 
*name,
+                             const char *lxcpath);
+extern int lxc_cgroup_nrtasks(const char *cgpath);
+extern char *lxc_cgroup_path_create(const char *lxcgroup, const char *name);
+extern int lxc_cgroup_enter(const char *cgpath, pid_t pid);
+extern int lxc_cgroup_attach(const char *name, const char *lxcpath);
+int cgroup_path_get(char **path, const char *subsystem, const char *cgpath);
 #endif
diff --git a/src/lxc/commands.c b/src/lxc/commands.c
index 83e7df1..de9a7c4 100644
--- a/src/lxc/commands.c
+++ b/src/lxc/commands.c
@@ -84,10 +84,19 @@ static int fill_sock_name(char *path, int len, const char 
*name,
 static int receive_answer(int sock, struct lxc_answer *answer)
 {
        int ret;
+       static char answerpath[MAXPATHLEN];
 
        ret = lxc_af_unix_recv_fd(sock, &answer->fd, answer, sizeof(*answer));
        if (ret < 0)
                ERROR("failed to receive answer for the command");
+       if (answer->pathlen == 0)
+               return ret;
+       ret = recv(sock, answerpath, answer->pathlen, 0);
+       if (ret != answer->pathlen) {
+               ERROR("failed to receive answer for the command");
+               ret = 0;
+       } else
+               answer->path = answerpath;
 
        return ret;
 }
@@ -202,6 +211,7 @@ extern int  lxc_stop_callback(int, struct lxc_request *, 
struct lxc_handler *);
 extern int  lxc_state_callback(int, struct lxc_request *, struct lxc_handler 
*);
 extern int  lxc_pid_callback(int, struct lxc_request *, struct lxc_handler *);
 extern int  lxc_clone_flags_callback(int, struct lxc_request *, struct 
lxc_handler *);
+extern int lxc_cgroup_callback(int, struct lxc_request *, struct lxc_handler 
*);
 
 static int trigger_command(int fd, struct lxc_request *request,
                           struct lxc_handler *handler)
@@ -214,6 +224,7 @@ static int trigger_command(int fd, struct lxc_request 
*request,
                [LXC_COMMAND_STATE]       = lxc_state_callback,
                [LXC_COMMAND_PID]         = lxc_pid_callback,
                [LXC_COMMAND_CLONE_FLAGS] = lxc_clone_flags_callback,
+               [LXC_COMMAND_CGROUP]      = lxc_cgroup_callback,
        };
 
        if (request->type < 0 || request->type >= LXC_COMMAND_MAX)
diff --git a/src/lxc/commands.h b/src/lxc/commands.h
index 0b72cf1..8d77ce8 100644
--- a/src/lxc/commands.h
+++ b/src/lxc/commands.h
@@ -29,6 +29,7 @@ enum {
        LXC_COMMAND_STATE,
        LXC_COMMAND_PID,
        LXC_COMMAND_CLONE_FLAGS,
+       LXC_COMMAND_CGROUP,
        LXC_COMMAND_MAX,
 };
 
@@ -41,6 +42,8 @@ struct lxc_answer {
        int fd;
        int ret; /* 0 on success, -errno on failure */
        pid_t pid;
+       int pathlen;
+       const char *path;
 };
 
 struct lxc_command {
diff --git a/src/lxc/conf.c b/src/lxc/conf.c
index 04ab8b8..1b54e9f 100644
--- a/src/lxc/conf.c
+++ b/src/lxc/conf.c
@@ -1375,7 +1375,7 @@ static int setup_kmsg(const struct lxc_rootfs *rootfs,
        return 0;
 }
 
-int setup_cgroup(const char *name, struct lxc_list *cgroups)
+int setup_cgroup(const char *cgpath, struct lxc_list *cgroups)
 {
        struct lxc_list *iterator;
        struct lxc_cgroup *cg;
@@ -1388,8 +1388,11 @@ int setup_cgroup(const char *name, struct lxc_list 
*cgroups)
 
                cg = iterator->elem;
 
-               if (lxc_cgroup_set(name, cg->subsystem, cg->value))
+               if (lxc_cgroup_set_bypath(cgpath, cg->subsystem, cg->value)) {
+                       ERROR("Error setting %s to %s for %s\n", cg->subsystem,
+                               cg->value, cgpath);
                        goto out;
+               }
 
                DEBUG("cgroup '%s' set to '%s'", cg->subsystem, cg->value);
        }
diff --git a/src/lxc/conf.h b/src/lxc/conf.h
index 4c48b46..f20fb2f 100644
--- a/src/lxc/conf.h
+++ b/src/lxc/conf.h
@@ -282,7 +282,7 @@ struct lxc_conf {
 
 int run_lxc_hooks(const char *name, char *hook, struct lxc_conf *conf);
 
-extern int setup_cgroup(const char *name, struct lxc_list *cgroups);
+extern int setup_cgroup(const char *cgpath, struct lxc_list *cgroups);
 extern int detect_shared_rootfs(void);
 
 /*
diff --git a/src/lxc/freezer.c b/src/lxc/freezer.c
index e8b1311..261c2c4 100644
--- a/src/lxc/freezer.c
+++ b/src/lxc/freezer.c
@@ -40,16 +40,11 @@
 
 lxc_log_define(lxc_freezer, lxc);
 
-static int freeze_unfreeze(const char *name, int freeze, const char *lxcpath)
+static int do_unfreeze(const char *nsgroup, int freeze, const char *name, 
const char *lxcpath)
 {
-       char *nsgroup;
        char freezer[MAXPATHLEN], *f;
        char tmpf[32];
        int fd, ret;
-       
-       ret = lxc_cgroup_path_get(&nsgroup, "freezer", name);
-       if (ret)
-               return -1;
 
        ret = snprintf(freezer, MAXPATHLEN, "%s/freezer.state", nsgroup);
        if (ret >= MAXPATHLEN) {
@@ -59,7 +54,7 @@ static int freeze_unfreeze(const char *name, int freeze, 
const char *lxcpath)
 
        fd = open(freezer, O_RDWR);
        if (fd < 0) {
-               SYSERROR("failed to open freezer for '%s'", name);
+               SYSERROR("failed to open freezer at '%s'", nsgroup);
                return -1;
        }
 
@@ -98,7 +93,8 @@ static int freeze_unfreeze(const char *name, int freeze, 
const char *lxcpath)
                ret = strncmp(f, tmpf, strlen(f));
                if (!ret)
                {
-                       lxc_monitor_send_state(name, freeze ? FROZEN : THAWED, 
lxcpath);
+                       if (name)
+                               lxc_monitor_send_state(name, freeze ? FROZEN : 
THAWED, lxcpath);
                        break;          /* Success */
                }
 
@@ -122,6 +118,18 @@ out:
        return ret;
 }
 
+static int freeze_unfreeze(const char *name, int freeze, const char *lxcpath)
+{
+       char *nsgroup;
+       int ret;
+       
+       ret = lxc_cgroup_path_get(&nsgroup, "freezer", name, lxcpath);
+       if (ret)
+               return -1;
+
+       return do_unfreeze(nsgroup, freeze, name, lxcpath);
+}
+
 int lxc_freeze(const char *name, const char *lxcpath)
 {
        lxc_monitor_send_state(name, FREEZING, lxcpath);
@@ -133,3 +141,14 @@ int lxc_unfreeze(const char *name, const char *lxcpath)
        return freeze_unfreeze(name, 0, lxcpath);
 }
 
+int lxc_unfreeze_bypath(const char *cgpath)
+{
+       char *nsgroup;
+       int ret;
+       
+       ret = cgroup_path_get(&nsgroup, "freezer", cgpath);
+       if (ret)
+               return -1;
+
+       return do_unfreeze(nsgroup, 0, NULL, NULL);
+}
diff --git a/src/lxc/lxc.h b/src/lxc/lxc.h
index 0fa9480..784b3ca 100644
--- a/src/lxc/lxc.h
+++ b/src/lxc/lxc.h
@@ -118,6 +118,14 @@ extern int lxc_freeze(const char *name, const char 
*lxcpath);
 extern int lxc_unfreeze(const char *name, const char *lxcpath);
 
 /*
+ * Unfreeze all previously frozen tasks.
+ * This is the function to use from inside the monitor
+ * @name : the name of the container
+ * Return 0 on sucess, < 0 otherwise
+ */
+extern int lxc_unfreeze_bypath(const char *cgpath);
+
+/*
  * Retrieve the container state
  * @name : the name of the container
  * Returns the state of the container on success, < 0 otherwise
@@ -127,24 +135,36 @@ extern lxc_state_t lxc_state(const char *name, const char 
*lxcpath);
 /*
  * Set a specified value for a specified subsystem. The specified
  * subsystem must be fully specified, eg. "cpu.shares"
+ * @cgpath    : the cgroup path of the container
+ * @filename  : the cgroup attribute filename
+ * @value     : the value to be set
+ * Returns 0 on success, < 0 otherwise
+ */
+extern int lxc_cgroup_set_bypath(const char *cgpath, const char *filename, 
const char *value);
+
+/*
+ * Set a specified value for a specified subsystem. The specified
+ * subsystem must be fully specified, eg. "cpu.shares"
  * @name      : the name of the container
- * @filename : the cgroup attribute filename
+ * @filename  : the cgroup attribute filename
  * @value     : the value to be set
+ * @lxcpath   : lxc config path for container
  * Returns 0 on success, < 0 otherwise
  */
-extern int lxc_cgroup_set(const char *name, const char *filename, const char 
*value);
+extern int lxc_cgroup_set(const char *name, const char *filename, const char 
*value, const char *lxcpath);
 
 /*
  * Get a specified value for a specified subsystem. The specified
  * subsystem must be fully specified, eg. "cpu.shares"
  * @name      : the name of the container
- * @filename : the cgroup attribute filename
+ * @filename  : the cgroup attribute filename
  * @value     : the value to be set
  * @len       : the len of the value variable
+ * @lxcpath   : lxc config path for container
  * Returns the number of bytes read, < 0 on error
  */
 extern int lxc_cgroup_get(const char *name, const char *filename,
-                         char *value, size_t len);
+                         char *value, size_t len, const char *lxcpath);
 
 /*
  * Retrieve the error string associated with the error returned by
diff --git a/src/lxc/lxc_attach.c b/src/lxc/lxc_attach.c
index 370bfbc..16680b0 100644
--- a/src/lxc/lxc_attach.c
+++ b/src/lxc/lxc_attach.c
@@ -127,9 +127,9 @@ int main(int argc, char *argv[])
        struct passwd *passwd;
        struct lxc_proc_context_info *init_ctx;
        struct lxc_handler *handler;
-       void *cgroup_data = NULL;
        uid_t uid;
        char *curdir;
+       int mypipe[2];
 
        ret = lxc_caps_init();
        if (ret)
@@ -156,18 +156,6 @@ int main(int argc, char *argv[])
                return -1;
        }
 
-       if (!elevated_privileges) {
-               /* we have to do this now since /sys/fs/cgroup may not
-                * be available inside the container or we may not have
-                * the required permissions anymore
-                */
-               ret = lxc_cgroup_prepare_attach(my_args.name, &cgroup_data);
-               if (ret < 0) {
-                       ERROR("failed to prepare attaching to cgroup");
-                       return -1;
-               }
-       }
-
        curdir = getcwd(NULL, 0);
 
        /* determine which namespaces the container was created with
@@ -183,6 +171,51 @@ int main(int argc, char *argv[])
                }
        }
 
+       if (pipe(mypipe)) {
+               SYSERROR("failed creating communications pipe");
+               return -1;
+       }
+
+       pid = fork();
+       if (pid < 0) {
+               SYSERROR("failed to fork\n");
+               return -1;
+       }
+       if (pid) {
+               int status;
+               int gchild;
+
+               close(mypipe[1]);
+               if (read(mypipe[0], &gchild, sizeof(gchild)) <= 0) {
+                       ERROR("failed to get pid from grand-child");
+                       return -1;
+               }
+
+               if (!elevated_privileges) {
+                       ret = lxc_cgroup_attach(my_args.name, my_args.lxcpath);
+                       if (ret < 0) {
+                               ERROR("failed to attach process to cgroup");
+                               return -1;
+                       }
+               }
+
+               close(mypipe[0]);
+       again1:
+               if (waitpid(pid, &status, 0) < 0) {
+                       if (errno == EINTR)
+                               goto again1;
+                       SYSERROR("failed to wait '%d'", pid);
+                       return -1;
+               }
+
+               if (WIFEXITED(status))
+                       return WEXITSTATUS(status);
+
+               return -1;
+
+               return 0;
+       }
+
        /* we need to attach before we fork since certain namespaces
         * (such as pid namespaces) only really affect children of the
         * current process and not the process itself
@@ -224,22 +257,13 @@ int main(int argc, char *argv[])
                if (lxc_sync_wait_child(handler, LXC_SYNC_CONFIGURE))
                        return -1;
 
-               /* now that we are done with all privileged operations,
-                * we can add ourselves to the cgroup. Since we smuggled in
-                * the fds earlier, we still have write permission
-                */
-               if (!elevated_privileges) {
-                       /* since setns() for pid namespaces only really
-                        * affects child processes, the pid we have is
-                        * still valid outside the container, so this is
-                        * fine
-                        */
-                       ret = lxc_cgroup_finish_attach(cgroup_data, pid);
-                       if (ret < 0) {
-                               ERROR("failed to attach process to cgroup");
-                               return -1;
-                       }
+               // ask parent to set cgroups for child
+               close(mypipe[0]);
+               if (write(mypipe[1], &pid, sizeof(pid)) != sizeof(pid)) {
+                       ERROR("Error writing child's pid to parent");
+                       return -1;
                }
+               close(mypipe[1]);
 
                /* tell the child we are done initializing */
                if (lxc_sync_wake_child(handler, LXC_SYNC_POST_CONFIGURE))
@@ -263,7 +287,8 @@ int main(int argc, char *argv[])
 
        if (!pid) {
                lxc_sync_fini_parent(handler);
-               lxc_cgroup_dispose_attach(cgroup_data);
+               close(mypipe[0]);
+               close(mypipe[1]);
 
                /* A description of the purpose of this functionality is
                 * provided in the lxc-attach(1) manual page. We have to
@@ -307,6 +332,19 @@ int main(int argc, char *argv[])
                        return -1;
                }
 
+               if (namespace_flags & CLONE_NEWUSER) {
+                       /* XXX FIXME this should get the uid of the container 
init and setuid to that */
+                       /* XXX FIXME or perhaps try to map in the lxc-attach 
caller's uid? */
+                       if (setgid(0)) {
+                               SYSERROR("switching to container gid");
+                               return -1;
+                       }
+                       if (setuid(0)) {
+                               SYSERROR("switching to container uid");
+                               return -1;
+                       }
+               }
+
                uid = getuid();
 
                passwd = getpwuid(uid);
diff --git a/src/lxc/lxc_cgroup.c b/src/lxc/lxc_cgroup.c
index 970391b..4a55633 100644
--- a/src/lxc/lxc_cgroup.c
+++ b/src/lxc/lxc_cgroup.c
@@ -78,7 +78,7 @@ int main(int argc, char *argv[])
                value = my_args.argv[1];
 
        if (value) {
-               if (lxc_cgroup_set(my_args.name, state_object, value)) {
+               if (lxc_cgroup_set(my_args.name, state_object, value, 
my_args.lxcpath)) {
                        ERROR("failed to assign '%s' value to '%s' for '%s'",
                                value, state_object, my_args.name);
                        return -1;
@@ -88,7 +88,7 @@ int main(int argc, char *argv[])
                int ret;
                char buffer[len];
 
-               ret = lxc_cgroup_get(my_args.name, state_object, buffer, len);
+               ret = lxc_cgroup_get(my_args.name, state_object, buffer, len, 
my_args.lxcpath);
                if (ret < 0) {
                        ERROR("failed to retrieve value of '%s' for '%s'",
                              state_object, my_args.name);
diff --git a/src/lxc/lxc_unshare.c b/src/lxc/lxc_unshare.c
index d92a96a..c5d3332 100644
--- a/src/lxc/lxc_unshare.c
+++ b/src/lxc/lxc_unshare.c
@@ -112,7 +112,6 @@ int main(int argc, char *argv[])
 {
        int opt, status;
        int ret;
-       char *pid_name;
        char *namespaces = NULL;
        char **args;
        int flags = 0;
@@ -170,14 +169,5 @@ int main(int argc, char *argv[])
                return -1;
        }
 
-       if (lxc_ns_is_mounted()) {
-               if (asprintf(&pid_name, "%d", pid) == -1) {
-                       ERROR("pid_name: failed to allocate memory");
-                       return -1;
-               }
-               lxc_cgroup_destroy(pid_name);
-               free(pid_name);
-       }
-
        return  lxc_error_set_and_log(pid, status);
 }
diff --git a/src/lxc/lxccontainer.c b/src/lxc/lxccontainer.c
index f24c39f..78f780d 100644
--- a/src/lxc/lxccontainer.c
+++ b/src/lxc/lxccontainer.c
@@ -959,7 +959,7 @@ static bool lxcapi_set_cgroup_item(struct lxc_container *c, 
const char *subsys,
        if (is_stopped_nolock(c))
                goto err;
 
-       ret = lxc_cgroup_set(c->name, subsys, value);
+       ret = lxc_cgroup_set(c->name, subsys, value, c->config_path);
        if (!ret)
                b = true;
 err:
@@ -980,7 +980,7 @@ static int lxcapi_get_cgroup_item(struct lxc_container *c, 
const char *subsys, c
        if (is_stopped_nolock(c))
                goto out;
 
-       ret = lxc_cgroup_get(c->name, subsys, retv, inlen);
+       ret = lxc_cgroup_get(c->name, subsys, retv, inlen, c->config_path);
 
 out:
        lxcunlock(c->privlock);
diff --git a/src/lxc/start.c b/src/lxc/start.c
index de431be..f0e82a3 100644
--- a/src/lxc/start.c
+++ b/src/lxc/start.c
@@ -268,6 +268,7 @@ int lxc_pid_callback(int fd, struct lxc_request *request,
        struct lxc_answer answer;
        int ret;
 
+       memset(&answer, 0, sizeof(answer));
        answer.pid = handler->pid;
        answer.ret = 0;
 
@@ -285,12 +286,49 @@ int lxc_pid_callback(int fd, struct lxc_request *request,
        return 0;
 }
 
+int lxc_cgroup_callback(int fd, struct lxc_request *request,
+                    struct lxc_handler *handler)
+{
+       struct lxc_answer answer;
+       int ret;
+
+       memset(&answer, 0, sizeof(answer));
+       answer.pathlen = strlen(handler->cgroup) + 1;
+       answer.path = handler->cgroup;
+       answer.ret = 0;
+
+       ret = send(fd, &answer, sizeof(answer), 0);
+       if (ret < 0) {
+               WARN("failed to send answer to the peer");
+               return -1;
+       }
+
+       if (ret != sizeof(answer)) {
+               ERROR("partial answer sent");
+               return -1;
+       }
+
+       ret = send(fd, answer.path, answer.pathlen, 0);
+       if (ret < 0) {
+               WARN("failed to send answer to the peer");
+               return -1;
+       }
+
+       if (ret != answer.pathlen) {
+               ERROR("partial answer sent");
+               return -1;
+       }
+
+       return 0;
+}
+
 int lxc_clone_flags_callback(int fd, struct lxc_request *request,
                             struct lxc_handler *handler)
 {
        struct lxc_answer answer;
        int ret;
 
+       memset(&answer, 0, sizeof(answer));
        answer.pid = 0;
        answer.ret = handler->clone_flags;
 
@@ -467,7 +505,7 @@ out_free:
        return NULL;
 }
 
-void lxc_fini(const char *name, struct lxc_handler *handler)
+static void lxc_fini(const char *name, struct lxc_handler *handler)
 {
        /* The STOPPING state is there for future cleanup code
         * which can take awhile
@@ -487,6 +525,11 @@ void lxc_fini(const char *name, struct lxc_handler 
*handler)
        close(handler->conf->maincmd_fd);
        handler->conf->maincmd_fd = -1;
        free(handler->name);
+       if (handler->cgroup) {
+               lxc_cgroup_destroy(handler->cgroup);
+               free(handler->cgroup);
+               handler->cgroup = NULL;
+       }
        free(handler);
 }
 
@@ -756,7 +799,11 @@ int lxc_spawn(struct lxc_handler *handler)
        if (lxc_sync_wait_child(handler, LXC_SYNC_CONFIGURE))
                failed_before_rename = 1;
 
-       if (lxc_cgroup_create(name, handler->pid))
+       /* TODO - pass lxc.cgroup.dir (or user's pam cgroup) in for first 
argument */
+       if ((handler->cgroup = lxc_cgroup_path_create(NULL, name)) == NULL)
+               goto out_delete_net;
+       
+       if (lxc_cgroup_enter(handler->cgroup, handler->pid) < 0)
                goto out_delete_net;
 
        if (failed_before_rename)
@@ -786,7 +833,7 @@ int lxc_spawn(struct lxc_handler *handler)
        if (lxc_sync_barrier_child(handler, LXC_SYNC_POST_CONFIGURE))
                goto out_delete_net;
 
-       if (setup_cgroup(name, &handler->conf->cgroup)) {
+       if (setup_cgroup(handler->cgroup, &handler->conf->cgroup)) {
                ERROR("failed to setup the cgroups for '%s'", name);
                goto out_delete_net;
        }
@@ -904,7 +951,6 @@ out_fini:
        lxc_delete_network(handler);
 
 out_fini_nonet:
-       lxc_cgroup_destroy(name);
        lxc_fini(name, handler);
        return err;
 
diff --git a/src/lxc/start.h b/src/lxc/start.h
index a0afa87..5b3488f 100644
--- a/src/lxc/start.h
+++ b/src/lxc/start.h
@@ -51,6 +51,7 @@ struct lxc_handler {
 #endif
        int pinfd;
        const char *lxcpath;
+       char *cgroup;
 };
 
 extern struct lxc_handler *lxc_init(const char *name, struct lxc_conf *, const 
char *);
@@ -58,7 +59,6 @@ extern int lxc_spawn(struct lxc_handler *);
 
 extern int lxc_poll(const char *name, struct lxc_handler *handler);
 extern void lxc_abort(const char *name, struct lxc_handler *handler);
-extern void lxc_fini(const char *name, struct lxc_handler *handler);
 extern int lxc_set_state(const char *, struct lxc_handler *, lxc_state_t);
 extern int lxc_check_inherited(struct lxc_conf *conf, int fd_to_ignore);
 int __lxc_start(const char *, struct lxc_conf *, struct lxc_operations *,
diff --git a/src/lxc/state.c b/src/lxc/state.c
index 9fcb06d..169aaf5 100644
--- a/src/lxc/state.c
+++ b/src/lxc/state.c
@@ -66,7 +66,7 @@ lxc_state_t lxc_str2state(const char *state)
        return -1;
 }
 
-static int freezer_state(const char *name)
+static int freezer_state(const char *name, const char *lxcpath)
 {
        char *nsgroup;
        char freezer[MAXPATHLEN];
@@ -74,7 +74,7 @@ static int freezer_state(const char *name)
        FILE *file;
        int err;
 
-       err = lxc_cgroup_path_get(&nsgroup, "freezer", name);
+       err = lxc_cgroup_path_get(&nsgroup, "freezer", name, lxcpath);
        if (err)
                return -1;
 
@@ -132,7 +132,7 @@ static lxc_state_t __lxc_getstate(const char *name, const 
char *lxcpath)
 
 lxc_state_t lxc_getstate(const char *name, const char *lxcpath)
 {
-       int state = freezer_state(name);
+       int state = freezer_state(name, lxcpath);
        if (state != FROZEN && state != FREEZING)
                state = __lxc_getstate(name, lxcpath);
        return state;
@@ -148,6 +148,7 @@ extern int lxc_state_callback(int fd, struct lxc_request 
*request,
        struct lxc_answer answer;
        int ret;
 
+       memset(&answer, 0, sizeof(answer));
        answer.ret = handler->state;
 
        ret = send(fd, &answer, sizeof(answer), 0);
diff --git a/src/lxc/stop.c b/src/lxc/stop.c
index c796283..851a4bf 100644
--- a/src/lxc/stop.c
+++ b/src/lxc/stop.c
@@ -83,9 +83,10 @@ extern int lxc_stop_callback(int fd, struct lxc_request 
*request,
        struct lxc_answer answer;
        int ret;
 
+       memset(&answer, 0, sizeof(answer));
        answer.ret = kill(handler->pid, SIGKILL);
        if (!answer.ret) {
-               ret = lxc_unfreeze(handler->name, handler->lxcpath);
+               ret = lxc_unfreeze_bypath(handler->cgroup);
                if (!ret)
                        return 0;
 
diff --git a/src/tests/Makefile.am b/src/tests/Makefile.am
index a883d36..4e5228a 100644
--- a/src/tests/Makefile.am
+++ b/src/tests/Makefile.am
@@ -16,6 +16,7 @@ lxc_test_shutdowntest_SOURCES = shutdowntest.c
 lxc_test_get_item_SOURCES = get_item.c
 lxc_test_getkeys_SOURCES = getkeys.c
 lxc_test_lxcpath_SOURCES = lxcpath.c
+lxc_test_cgpath_SOURCES = cgpath.c
 
 AM_CFLAGS=-I$(top_srcdir)/src \
        -DLXCROOTFSMOUNT=\"$(LXCROOTFSMOUNT)\" \
@@ -25,6 +26,7 @@ AM_CFLAGS=-I$(top_srcdir)/src \
 
 bin_PROGRAMS = lxc-test-containertests lxc-test-locktests lxc-test-startone \
        lxc-test-destroytest lxc-test-saveconfig lxc-test-createtest \
-       lxc-test-shutdowntest lxc-test-get_item lxc-test-getkeys 
lxc-test-lxcpath
+       lxc-test-shutdowntest lxc-test-get_item lxc-test-getkeys 
lxc-test-lxcpath \
+       lxc-test-cgpath
 
 endif
diff --git a/src/tests/cgpath.c b/src/tests/cgpath.c
new file mode 100644
index 0000000..43a1513
--- /dev/null
+++ b/src/tests/cgpath.c
@@ -0,0 +1,142 @@
+/* liblxcapi
+ *
+ * Copyright © 2012 Serge Hallyn <serge.hal...@ubuntu.com>.
+ * Copyright © 2012 Canonical Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+#include "../lxc/lxccontainer.h"
+
+#include <unistd.h>
+#include <signal.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <stdlib.h>
+#include <errno.h>
+#include "../lxc/cgroup.h"
+#include "../lxc/lxc.h"
+
+#define MYNAME "lxctest1"
+#define MYNAME2 "lxctest2"
+
+#define TSTERR(x) do { \
+       fprintf(stderr, "%d: %s\n", __LINE__, x); \
+} while (0)
+
+int main()
+{
+       struct lxc_container *c = NULL, *c2 = NULL;
+       char *path;
+       int len;
+       int ret, retv = -1;
+
+       /* won't require privilege necessarily once users are classified by
+        * pam_cgroup */
+       if (geteuid() != 0) {
+               TSTERR("requires privilege");
+               exit(0);
+       }
+
+       path = lxc_cgroup_path_create(NULL, MYNAME);
+       len = strlen(path);
+       if (!path || !len) {
+               TSTERR("zero result from lxc_cgroup_path_create");
+               exit(1);
+       }
+       if (strstr(path, "lxc/" MYNAME)) {
+               TSTERR("lxc_cgroup_path_create NULL lxctest1");
+               exit(1);
+       }
+       free(path);
+
+       path = lxc_cgroup_path_create("ab", MYNAME);
+       len = strlen(path);
+       if (!path || !len) {
+               TSTERR("zero result from lxc_cgroup_path_create");
+               exit(1);
+       }
+       if (strstr(path, "ab/" MYNAME)) {
+               TSTERR("lxc_cgroup_path_create ab lxctest1");
+               exit(1);
+       }
+       free(path);
+
+       if ((c = lxc_container_new(MYNAME, NULL)) == NULL) {
+               TSTERR("instantiating first container");
+               exit(1);
+       }
+       c->set_config_item(c, "lxc.network.type", "empty");
+       if (!c->createl(c, "ubuntu", NULL)) {
+               TSTERR("creating first container");
+               exit(1);
+       }
+       c->load_config(c, NULL);
+       c->want_daemonize(c);
+       if (!c->startl(c, 0, NULL)) {
+               TSTERR("starting first container");
+               exit(1);
+       }
+
+       char *nsgroup;
+#define ALTBASE "/var/lib/lxctest2"
+       ret = mkdir(ALTBASE, 755);
+
+       ret = lxc_cgroup_path_get(&nsgroup, "freezer", MYNAME, 
c->get_config_path(c));
+       if (ret < 0 || strstr(nsgroup, "lxc/" MYNAME)) {
+               TSTERR("getting first cgroup path from lxc_command");
+               exit(1);
+       }
+
+       /* start second container */
+       if ((c2 = lxc_container_new(MYNAME2, ALTBASE)) == NULL) {
+               TSTERR("instantiating first container");
+               exit(1);
+       }
+       c2->set_config_item(c2, "lxc.network.type", "empty");
+       if (!c2->createl(c2, "ubuntu", NULL)) {
+               TSTERR("creating first container");
+               exit(1);
+       }
+
+       c2->load_config(c2, NULL);
+       c2->want_daemonize(c2);
+       if (!c2->startl(c2, 0, NULL)) {
+               TSTERR("starting first container");
+               goto out;
+       }
+
+       ret = lxc_cgroup_path_get(&nsgroup, "freezer", MYNAME2, 
c2->get_config_path(c2));
+       if (ret < 0 || strstr(nsgroup, "lxc/" MYNAME2)) {
+               TSTERR("getting second cgroup path from lxc_command");
+               exit(1);
+       }
+
+       if (lxc_cgroup_nrtasks(nsgroup) < 1) {
+               TSTERR("getting nrtasks");
+               exit(1);
+       }
+
+       retv = 0;
+out:
+       if (c2) {
+               c2->stop(c2);
+               c2->destroy(c2);
+       }
+       if (c) {
+               c->stop(c);
+               c->destroy(c);
+       }
+       return retv;
+}
diff --git a/src/tests/lxcpath.c b/src/tests/lxcpath.c
index 92525ed..a849a38 100644
--- a/src/tests/lxcpath.c
+++ b/src/tests/lxcpath.c
@@ -29,7 +29,7 @@
 #define MYNAME "lxctest1"
 
 #define TSTERR(x) do { \
-       fprintf(stderr, "%d: %s", __LINE__, x); \
+       fprintf(stderr, "%d: %s\n", __LINE__, x); \
 } while (0)
 
 int main()
-- 
1.8.1.2


------------------------------------------------------------------------------
The Go Parallel Website, sponsored by Intel - in partnership with Geeknet, 
is your hub for all things parallel software development, from weekly thought 
leadership blogs to news, videos, case studies, tutorials, tech docs, 
whitepapers, evaluation guides, and opinion stories. Check out the most 
recent posts - join the conversation now. http://goparallel.sourceforge.net/
_______________________________________________
Lxc-devel mailing list
Lxc-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/lxc-devel

Reply via email to