On Wed, 2011-08-24 at 14:17 +0400, Vladimir Smirnov wrote: > Clone allows more flexible control. Currently, if there is any inherited fd, > lxc-start exits with error. With clone it's possible not to pass open fd's to > childs. >
Hmm... when it comes to file descriptors, you have two flavours: - CLONE_FILES: the child will share the file descriptor table with the parent. If any of the two tasks open or close a fd, the other will also see the change. - without CLONE_FILES: the child will get a copy of the parent's file descriptor table. Then, each task may open or close a fd without effect for the other. As you see, the child shares or at least inherits a copy of all the fd's with its parent. Honestly, I don't know any clone flag that would prevent the child to be passed a fd... > Signed-off-by: Vladimir Smirnov <ci...@yandex-team.ru> > --- > src/lxc/lxc_attach.c | 79 > +++++++++++++++++++++++++++++--------------------- > src/lxc/lxc_init.c | 46 ++++++++++++++++++----------- > 2 files changed, 75 insertions(+), 50 deletions(-) > > diff --git a/src/lxc/lxc_attach.c b/src/lxc/lxc_attach.c Why do you need to patch lxc-attach ? AFAIK it doesn't check for inherited file descriptors... > index ed3d5a4..930865e 100644 > --- a/src/lxc/lxc_attach.c > +++ b/src/lxc/lxc_attach.c > @@ -20,6 +20,7 @@ > * License along with this library; if not, write to the Free Software > * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA > */ > +#define STACKSIZE 16384 > > #define _GNU_SOURCE > #include <unistd.h> > @@ -29,6 +30,7 @@ > #include <sys/param.h> > #include <sys/types.h> > #include <sys/wait.h> > +#include <sched.h> > > #include "commands.h" > #include "arguments.h" > @@ -56,14 +58,48 @@ Options :\n\ > .checker = NULL, > }; > > +static char *envp_global; > + > +int child_main() > +{ > + struct passwd *passwd; > + uid_t uid; > + > + if (my_args.argc) { > + execve(my_args.argv[0], my_args.argv, envp_global); > + SYSERROR("failed to exec '%s'", my_args.argv[0]); > + return -1; > + } > + > + uid = getuid(); > + > + passwd = getpwuid(uid); > + if (!passwd) { > + SYSERROR("failed to get passwd " \ > + "entry for uid '%d'", uid); > + return -1; > + } > + > + { > + char *const args[] = { > + passwd->pw_shell, > + NULL, > + }; > + > + execve(args[0], args, envp_global); > + SYSERROR("failed to exec '%s'", args[0]); > + return -1; > + } > +} > + > int main(int argc, char *argv[], char *envp[]) > { > int ret; > pid_t pid; > - struct passwd *passwd; > - uid_t uid; > char *curdir; > > + envp_global = envp; > + > ret = lxc_caps_init(); > if (ret) > return ret; > @@ -96,7 +132,14 @@ int main(int argc, char *argv[], char *envp[]) > > free(curdir); > > - pid = fork(); > + void **newstack; > + newstack = (void **) malloc(STACKSIZE); > + if (!newstack) > + exit(newstack); > + > + newstack = (void **)(STACKSIZE + (char *)newstack); > + *--newstack = 0; > + pid = clone(child_main, newstack, CLONE_VM | CLONE_FS | CLONE_SIGHAND, > 0); > > if (pid < 0) { > SYSERROR("failed to fork"); > @@ -120,35 +163,5 @@ int main(int argc, char *argv[], char *envp[]) > return -1; > } > > - if (!pid) { > - > - if (my_args.argc) { > - execve(my_args.argv[0], my_args.argv, envp); > - SYSERROR("failed to exec '%s'", my_args.argv[0]); > - return -1; > - } > - > - uid = getuid(); > - > - passwd = getpwuid(uid); > - if (!passwd) { > - SYSERROR("failed to get passwd " \ > - "entry for uid '%d'", uid); > - return -1; > - } > - > - { > - char *const args[] = { > - passwd->pw_shell, > - NULL, > - }; > - > - execve(args[0], args, envp); > - SYSERROR("failed to exec '%s'", args[0]); > - return -1; > - } > - > - } > - > return 0; > } > diff --git a/src/lxc/lxc_init.c b/src/lxc/lxc_init.c > index a534b51..7debd6f 100644 > --- a/src/lxc/lxc_init.c > +++ b/src/lxc/lxc_init.c > @@ -20,6 +20,7 @@ > * License along with this library; if not, write to the Free Software > * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA > */ > +#define STACKSIZE 16384 > > #include <stdio.h> > #include <unistd.h> > @@ -32,6 +33,7 @@ > #include <sys/wait.h> > #define _GNU_SOURCE > #include <getopt.h> > +#include <sched.h> > > #include "log.h" > #include "caps.h" > @@ -40,6 +42,9 @@ > > lxc_log_define(lxc_init, lxc); > > +static sigset_t mask, omask; > +static char **aargv; > + > static int quiet; > > static struct option options[] = { > @@ -49,6 +54,23 @@ static struct option options[] = { > > static int was_interrupted = 0; > > +int child_main() > +{ > + int i; > + int err = -1; > + /* restore default signal handlers */ > + for (i = 1; i < NSIG; i++) > + signal(i, SIG_DFL); > + > + sigprocmask(SIG_SETMASK, &omask, NULL); > + > + NOTICE("about to exec '%s'", aargv[0]); > + > + execvp(aargv[0], aargv); > + ERROR("failed to exec: '%s' : %m", aargv[0]); > + exit(err); > +} > + > int main(int argc, char *argv[]) > { > > @@ -61,8 +83,6 @@ int main(int argc, char *argv[]) > pid_t pid; > int nbargs = 0; > int err = -1; > - char **aargv; > - sigset_t mask, omask; > int i, shutdown = 0; > > while (1) { > @@ -115,25 +135,17 @@ int main(int argc, char *argv[]) > if (lxc_caps_reset()) > exit(err); > > - pid = fork(); > - > - if (pid < 0) > + void **newstack; > + newstack = (void **) malloc(STACKSIZE); > + if (!newstack) > exit(err); > > - if (!pid) { > - > - /* restore default signal handlers */ > - for (i = 1; i < NSIG; i++) > - signal(i, SIG_DFL); > + newstack = (void **)(STACKSIZE + (char *)newstack); > + *--newstack = 0; > + pid = clone(child_main, newstack, CLONE_VM | CLONE_FS | CLONE_SIGHAND, > 0); > Passing CLONE_VM here means execvp() will flush the memory for both parent and child... This means the lxc-init code doesn't run anymore and this isn't acceptable, as it honors the child ripping work in an application container. > - sigprocmask(SIG_SETMASK, &omask, NULL); > - > - NOTICE("about to exec '%s'", aargv[0]); > - > - execvp(aargv[0], aargv); > - ERROR("failed to exec: '%s' : %m", aargv[0]); > + if (pid < 0) > exit(err); > - } > > /* let's process the signals now */ > sigdelset(&omask, SIGALRM); -- Gregory Kurz gk...@fr.ibm.com Software Engineer @ IBM/Meiosys http://www.ibm.com Tel +33 (0)534 638 479 Fax +33 (0)561 400 420 "Anarchy is about taking complete responsibility for yourself." Alan Moore. ------------------------------------------------------------------------------ EMC VNX: the world's simplest storage, starting under $10K The only unified storage solution that offers unified management Up to 160% more powerful than alternatives and 25% more efficient. Guaranteed. http://p.sf.net/sfu/emc-vnx-dev2dev _______________________________________________ Lxc-devel mailing list Lxc-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/lxc-devel