On Wed, 2011-08-24 at 14:17 +0400, Vladimir Smirnov wrote:
> Clone allows more flexible control. Currently, if there is any inherited fd,
> lxc-start exits with error. With clone it's possible not to pass open fd's to 
> childs.
> 

Hmm... when it comes to file descriptors, you have two flavours:
- CLONE_FILES: the child will share the file descriptor table with the
parent. If any of the two tasks open or close a fd, the other will also
see the change.
- without CLONE_FILES: the child will get a copy of the parent's file
descriptor table. Then, each task may open or close a fd without effect
for the other.

As you see, the child shares or at least inherits a copy of all the fd's
with its parent. Honestly, I don't know any clone flag that would
prevent the child to be passed a fd...

> Signed-off-by: Vladimir Smirnov <ci...@yandex-team.ru>
> ---
>  src/lxc/lxc_attach.c |   79 
> +++++++++++++++++++++++++++++---------------------
>  src/lxc/lxc_init.c   |   46 ++++++++++++++++++-----------
>  2 files changed, 75 insertions(+), 50 deletions(-)
> 
> diff --git a/src/lxc/lxc_attach.c b/src/lxc/lxc_attach.c

Why do you need to patch lxc-attach ? AFAIK it doesn't check for
inherited file descriptors...

> index ed3d5a4..930865e 100644
> --- a/src/lxc/lxc_attach.c
> +++ b/src/lxc/lxc_attach.c
> @@ -20,6 +20,7 @@
>   * License along with this library; if not, write to the Free Software
>   * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
>   */
> +#define STACKSIZE 16384
> 
>  #define _GNU_SOURCE
>  #include <unistd.h>
> @@ -29,6 +30,7 @@
>  #include <sys/param.h>
>  #include <sys/types.h>
>  #include <sys/wait.h>
> +#include <sched.h>
> 
>  #include "commands.h"
>  #include "arguments.h"
> @@ -56,14 +58,48 @@ Options :\n\
>       .checker  = NULL,
>  };
> 
> +static char *envp_global;
> +
> +int child_main()
> +{
> +     struct passwd *passwd;
> +     uid_t uid;
> +
> +     if (my_args.argc) {
> +             execve(my_args.argv[0], my_args.argv, envp_global);
> +             SYSERROR("failed to exec '%s'", my_args.argv[0]);
> +             return -1;
> +     }
> +
> +     uid = getuid();
> +
> +     passwd = getpwuid(uid);
> +     if (!passwd) {
> +             SYSERROR("failed to get passwd "                \
> +                      "entry for uid '%d'", uid);
> +             return -1;
> +     }
> +
> +     {
> +             char *const args[] = {
> +                     passwd->pw_shell,
> +                     NULL,
> +             };
> +
> +             execve(args[0], args, envp_global);
> +             SYSERROR("failed to exec '%s'", args[0]);
> +             return -1;
> +     }
> +}
> +
>  int main(int argc, char *argv[], char *envp[])
>  {
>       int ret;
>       pid_t pid;
> -     struct passwd *passwd;
> -     uid_t uid;
>       char *curdir;
> 
> +     envp_global = envp;
> +
>       ret = lxc_caps_init();
>       if (ret)
>               return ret;
> @@ -96,7 +132,14 @@ int main(int argc, char *argv[], char *envp[])
> 
>       free(curdir);
> 
> -     pid = fork();
> +     void **newstack;
> +     newstack = (void **) malloc(STACKSIZE);
> +     if (!newstack)
> +             exit(newstack);
> +
> +     newstack = (void **)(STACKSIZE + (char *)newstack);
> +     *--newstack = 0;
> +     pid = clone(child_main, newstack, CLONE_VM | CLONE_FS | CLONE_SIGHAND, 
> 0);
> 
>       if (pid < 0) {
>               SYSERROR("failed to fork");
> @@ -120,35 +163,5 @@ int main(int argc, char *argv[], char *envp[])
>               return -1;
>       }
> 
> -     if (!pid) {
> -
> -             if (my_args.argc) {
> -                     execve(my_args.argv[0], my_args.argv, envp);
> -                     SYSERROR("failed to exec '%s'", my_args.argv[0]);
> -                     return -1;
> -             }
> -
> -             uid = getuid();
> -
> -             passwd = getpwuid(uid);
> -             if (!passwd) {
> -                     SYSERROR("failed to get passwd "                \
> -                              "entry for uid '%d'", uid);
> -                     return -1;
> -             }
> -
> -             {
> -                     char *const args[] = {
> -                             passwd->pw_shell,
> -                             NULL,
> -                     };
> -
> -                     execve(args[0], args, envp);
> -                     SYSERROR("failed to exec '%s'", args[0]);
> -                     return -1;
> -             }
> -
> -     }
> -
>       return 0;
>  }
> diff --git a/src/lxc/lxc_init.c b/src/lxc/lxc_init.c
> index a534b51..7debd6f 100644
> --- a/src/lxc/lxc_init.c
> +++ b/src/lxc/lxc_init.c
> @@ -20,6 +20,7 @@
>   * License along with this library; if not, write to the Free Software
>   * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
>   */
> +#define STACKSIZE 16384
> 
>  #include <stdio.h>
>  #include <unistd.h>
> @@ -32,6 +33,7 @@
>  #include <sys/wait.h>
>  #define _GNU_SOURCE
>  #include <getopt.h>
> +#include <sched.h>
> 
>  #include "log.h"
>  #include "caps.h"
> @@ -40,6 +42,9 @@
> 
>  lxc_log_define(lxc_init, lxc);
> 
> +static sigset_t mask, omask;
> +static char **aargv;
> +
>  static int quiet;
> 
>  static struct option options[] = {
> @@ -49,6 +54,23 @@ static struct option options[] = {
> 
>  static       int was_interrupted = 0;
> 
> +int child_main()
> +{
> +     int i;
> +     int err = -1;
> +     /* restore default signal handlers */
> +     for (i = 1; i < NSIG; i++)
> +             signal(i, SIG_DFL);
> +
> +     sigprocmask(SIG_SETMASK, &omask, NULL);
> +
> +     NOTICE("about to exec '%s'", aargv[0]);
> +
> +     execvp(aargv[0], aargv);
> +     ERROR("failed to exec: '%s' : %m", aargv[0]);
> +     exit(err);
> +}
> +
>  int main(int argc, char *argv[])
>  {
> 
> @@ -61,8 +83,6 @@ int main(int argc, char *argv[])
>       pid_t pid;
>       int nbargs = 0;
>       int err = -1;
> -     char **aargv;
> -     sigset_t mask, omask;
>       int i, shutdown = 0;
> 
>       while (1) {
> @@ -115,25 +135,17 @@ int main(int argc, char *argv[])
>       if (lxc_caps_reset())
>               exit(err);
> 
> -     pid = fork();
> -
> -     if (pid < 0)
> +     void **newstack;
> +     newstack = (void **) malloc(STACKSIZE);
> +     if (!newstack)
>               exit(err);
> 
> -     if (!pid) {
> -
> -             /* restore default signal handlers */
> -             for (i = 1; i < NSIG; i++)
> -                     signal(i, SIG_DFL);
> +     newstack = (void **)(STACKSIZE + (char *)newstack);
> +     *--newstack = 0;
> +     pid = clone(child_main, newstack, CLONE_VM | CLONE_FS | CLONE_SIGHAND, 
> 0);
> 

Passing CLONE_VM here means execvp() will flush the memory for both
parent and child... This means the lxc-init code doesn't run anymore and
this isn't acceptable, as it honors the child ripping work in an
application container.

> -             sigprocmask(SIG_SETMASK, &omask, NULL);
> -
> -             NOTICE("about to exec '%s'", aargv[0]);
> -
> -             execvp(aargv[0], aargv);
> -             ERROR("failed to exec: '%s' : %m", aargv[0]);
> +     if (pid < 0)
>               exit(err);
> -     }
> 
>       /* let's process the signals now */
>       sigdelset(&omask, SIGALRM);

-- 
Gregory Kurz                                     gk...@fr.ibm.com
Software Engineer @ IBM/Meiosys                  http://www.ibm.com
Tel +33 (0)534 638 479                           Fax +33 (0)561 400 420

"Anarchy is about taking complete responsibility for yourself."
        Alan Moore.


------------------------------------------------------------------------------
EMC VNX: the world's simplest storage, starting under $10K
The only unified storage solution that offers unified management 
Up to 160% more powerful than alternatives and 25% more efficient. 
Guaranteed. http://p.sf.net/sfu/emc-vnx-dev2dev
_______________________________________________
Lxc-devel mailing list
Lxc-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/lxc-devel

Reply via email to