vmd(8): teach it to exec vm's after fork

Dave Voutila Mon, 17 Apr 2023 13:24:39 -0700

tech@,

vmd currently uses a fork-only approach to spawning new vm's, resulting
in each vm having the same address space and inheriting some global
state from the vmm process (like the list of running vm's). This diff
introduces an execvp(2) call after fork, making the vm process re-exec.


Things to note:

1. the vmm process switches to unveil(2) instead of chroot(2) as it now
   needs access to the vmd binary for exec. As a result, I adapted the
   design from sshd(8) requiring an an absolute path for execution. You
   will receive a "vmd: re-exec requires execution with an absolute
   path" warning and vmd will terminate if not launched with an absolute
   path to vmd.

2. vm process similarly adopts unveil(2) as it is now re-exec'ing and
   not root, so it cannot call chroot(2).

3. vmm process adopts "exec" pledge(2) promise.

If testing the diff and you want to use send/receive, you'll also need
the diff [1] I previously shared on tech@.

I'd prefer some testing before asking for commit as I'd like to land [1]
first before this change.

-dv

[1] https://marc.info/?l=openbsd-tech&m=168166549712160&q=mbox


diffstat refs/heads/master refs/heads/vmd-fork+exec
 M  usr.sbin/vmd/vm.c   |  139+  82-
 M  usr.sbin/vmd/vmd.c  |   28+   5-
 M  usr.sbin/vmd/vmd.h  |    5+   0-
 M  usr.sbin/vmd/vmm.c  |  100+  12-

4 files changed, 272 insertions(+), 99 deletions(-)

diff refs/heads/master refs/heads/vmd-fork+exec
commit - 291811d84b76a57a1ac3f9885ec1482a600a81cd
commit + 89b4ed10ffc7ed48c6f1334497272e071f7c7a88
blob - 5b9a1831f5b5b896589f7e8c79309d370ace400d
blob + 50d59fbcfb8a7c63dfb8c3f9316b7cc52e8be514
--- usr.sbin/vmd/vm.c
+++ usr.sbin/vmd/vm.c
@@ -74,8 +74,7 @@ int run_vm(int, int[][VM_MAX_BASE_PER_DISK], int *,

 io_fn_t ioports_map[MAX_PORTS];

-int run_vm(int, int[][VM_MAX_BASE_PER_DISK], int *,
-    struct vmop_create_params *, struct vcpu_reg_state *);
+static int run_vm(struct vmop_create_params *, struct vcpu_reg_state *);
 void vm_dispatch_vmm(int, short, void *);
 void *event_thread(void *);
 void *vcpu_run_loop(void *);
@@ -214,6 +213,72 @@ static const struct vcpu_reg_state vcpu_init_flat16 =
 };

 /*
+ * vm_main
+ *
+ * Primary entrypoint for launching a vm. Does not return.
+ *
+ * fd: file descriptor for communicating with vmm process.
+ */
+void
+vm_main(int fd)
+{
+       struct vm_create_params *vcp = NULL;
+       struct vmd_vm            vm;
+       size_t                   sz = 0;
+       int                      ret = 0;
+
+       /*
+        * We aren't root, so we can't chroot(2). Use unveil(2) instead.
+        */
+       if (unveil("/var/empty", "") == -1)
+               fatal("unveil /var/empty");
+       if (unveil(NULL, NULL) == -1)
+               fatal("unveil lock");
+
+       /*
+        * pledge in the vm processes:
+        * stdio - for malloc and basic I/O including events.
+        * vmm - for the vmm ioctls and operations.
+        * recvfd - for vm send/recv and sending fd to devices.
+        * proc - required for vmm(4) VMM_IOC_CREATE ioctl
+        */
+       if (pledge("stdio vmm recvfd proc", NULL) == -1)
+               fatal("pledge");
+
+       /* Receive our vm configuration. */
+       memset(&vm, 0, sizeof(vm));
+       sz = atomicio(read, fd, &vm, sizeof(vm));
+       if (sz != sizeof(vm)) {
+               log_warnx("failed to receive start message");
+               _exit(EIO);
+       }
+
+       /* Receive the /dev/vmm fd number. */
+       sz = atomicio(read, fd, &env->vmd_fd, sizeof(env->vmd_fd));
+       if (sz != sizeof(env->vmd_fd)) {
+               log_warnx("failed to receive /dev/vmm fd");
+               _exit(EIO);
+       }
+
+       /* Update process with the vm name. */
+       vcp = &vm.vm_params.vmc_params;
+       setproctitle("%s", vcp->vcp_name);
+       log_procinit(vcp->vcp_name);
+
+       /*
+        * We need, at minimum, a vm_kernel fd to boot a vm. This is either a
+        * kernel or a BIOS image.
+        */
+       if (vm.vm_kernel < 0 && !(vm.vm_state & VM_STATE_RECEIVED)) {
+               log_warnx("%s: failed to receive boot fd", vcp->vcp_name);
+               _exit(EINVAL);
+       }
+
+       ret = start_vm(&vm, fd);
+       _exit(ret);
+}
+
+/*
  * loadfile_bios
  *
  * Alternatively to loadfile_elf, this function loads a non-ELF BIOS image
@@ -300,15 +365,14 @@ start_vm(struct vmd_vm *vm, int fd)
        struct vm_rwregs_params  vrp;
        struct stat              sb;

-       /* Child */
-       setproctitle("%s", vcp->vcp_name);
-       log_procinit(vcp->vcp_name);
-
+       /*
+        * We first try to initialize and allocate memory before bothering
+        * vmm(4) with a request to create a new vm.
+        */
        if (!(vm->vm_state & VM_STATE_RECEIVED))
                create_memory_map(vcp);

-       ret = alloc_guest_mem(vcp);
-
+       ret = alloc_guest_mem(&vm->vm_params.vmc_params);
        if (ret) {
                struct rlimit lim;
                char buf[FMT_SCALED_STRSIZE];
@@ -318,31 +382,44 @@ start_vm(struct vmd_vm *vm, int fd)
                                    "limit is %s)", buf);
                }
                errno = ret;
-               fatal("could not allocate guest memory");
+               log_warn("could not allocate guest memory");
+               return (ret);
        }

+       /* We've allocated guest memory, so now create the vm in vmm(4). */
        ret = vmm_create_vm(vcp);
+       if (ret) {
+               /* Let the vmm process know we failed by sending a 0 vm id. */
+               vcp->vcp_id = 0;
+               atomicio(vwrite, fd, &vcp->vcp_id, sizeof(vcp->vcp_id));
+               return (ret);
+       }
+
+       /* Tighten pledge now that we've called VMM_IOC_CREATE ioctl. */
+       if (pledge("stdio vmm recvfd", NULL) == -1)
+               fatal("pledge");
+
+       /*
+        * Some of vmd currently relies on global state (current_vm, con_fd).
+        */
        current_vm = vm;
+       con_fd = vm->vm_tty;
+       if (fcntl(con_fd, F_SETFL, O_NONBLOCK) == -1) {
+               log_warn("failed to set nonblocking mode on console");
+               return (1);
+       }

-       /* send back the kernel-generated vm id (0 on error) */
+       /*
+        * We now let the vmm process know we were successful by sending it our
+        * vmm(4) assigned vm id.
+        */
        if (atomicio(vwrite, fd, &vcp->vcp_id, sizeof(vcp->vcp_id)) !=
-           sizeof(vcp->vcp_id))
-               fatal("failed to send created vm id to vmm process");
-
-       if (ret) {
-               errno = ret;
-               fatal("create vmm ioctl failed - exiting");
+           sizeof(vcp->vcp_id)) {
+               log_warn("failed to send created vm id to vmm process");
+               return (1);
        }

-       /*
-        * pledge in the vm processes:
-        * stdio - for malloc and basic I/O including events.
-        * recvfd - for send/recv.
-        * vmm - for the vmm ioctls and operations.
-        */
-       if (pledge("stdio vmm recvfd", NULL) == -1)
-               fatal("pledge");
-
+       /* Prepare either our boot image or receive an existing vm to launch. */
        if (vm->vm_state & VM_STATE_RECEIVED) {
                ret = atomicio(read, vm->vm_receive_fd, &vrp, sizeof(vrp));
                if (ret != sizeof(vrp))
@@ -377,17 +454,38 @@ start_vm(struct vmd_vm *vm, int fd)
        }

        if (vm->vm_kernel != -1)
-               close(vm->vm_kernel);
+               close_fd(vm->vm_kernel);

-       con_fd = vm->vm_tty;
-       if (fcntl(con_fd, F_SETFL, O_NONBLOCK) == -1)
-               fatal("failed to set nonblocking mode on console");
+       /* Initialize our mutexes. */
+       ret = pthread_mutex_init(&threadmutex, NULL);
+       if (ret) {
+               log_warn("%s: could not initialize thread state mutex",
+                   __func__);
+               return (ret);
+       }
+       ret = pthread_cond_init(&threadcond, NULL);
+       if (ret) {
+               log_warn("%s: could not initialize thread state "
+                   "condition variable", __func__);
+               return (ret);
+       }
+       mutex_lock(&threadmutex);

-       for (i = 0; i < VM_MAX_NICS_PER_VM; i++)
-               nicfds[i] = vm->vm_ifs[i].vif_fd;

+       /*
+        * Finalize our communication socket with the vmm process. From here
+        * onwards, communication with the vmm process is event-based.
+        */
        event_init();
+       if (vmm_pipe(vm, fd, vm_dispatch_vmm) == -1)
+               fatal("setup vm pipe");

+       /*
+        * Initialize or restore our emulated hardware.
+        */
+       for (i = 0; i < VMM_MAX_NICS_PER_VM; i++)
+               nicfds[i] = vm->vm_ifs[i].vif_fd;
+
        if (vm->vm_state & VM_STATE_RECEIVED) {
                restore_emulated_hw(vcp, vm->vm_receive_fd, nicfds,
                    vm->vm_disks, vm->vm_cdrom);
@@ -395,14 +493,14 @@ start_vm(struct vmd_vm *vm, int fd)
                if (restore_vm_params(vm->vm_receive_fd, vcp))
                        fatal("restore vm params failed");
                unpause_vm(vcp);
-       }
+       } else
+               init_emulated_hw(vmc, vm->vm_cdrom, vm->vm_disks, nicfds);

-       if (vmm_pipe(vm, fd, vm_dispatch_vmm) == -1)
-               fatal("setup vm pipe");
+       /*
+        * Execute the vcpu run loop(s) for this VM.
+        */
+       ret = run_vm(&vm->vm_params, &vrs);

-       /* Execute the vcpu run loop(s) for this VM */
-       ret = run_vm(vm->vm_cdrom, vm->vm_disks, nicfds, &vm->vm_params, &vrs);
-
        /* Ensure that any in-flight data is written back */
        virtio_shutdown(vm);

@@ -1205,10 +1303,8 @@ int
  *  0: the VM exited normally
  *  !0 : the VM exited abnormally or failed to start
  */
-int
-run_vm(int child_cdrom, int child_disks[][VM_MAX_BASE_PER_DISK],
-    int *child_taps, struct vmop_create_params *vmc,
-    struct vcpu_reg_state *vrs)
+static int
+run_vm(struct vmop_create_params *vmc, struct vcpu_reg_state *vrs)
 {
        struct vm_create_params *vcp = &vmc->vmc_params;
        struct vm_rwregs_params vregsp;
@@ -1223,24 +1319,6 @@ run_vm(int child_cdrom, int child_disks[][VM_MAX_BASE_
        if (vcp == NULL)
                return (EINVAL);

-       if (child_cdrom == -1 && strlen(vcp->vcp_cdrom))
-               return (EINVAL);
-
-       if (child_disks == NULL && vcp->vcp_ndisks != 0)
-               return (EINVAL);
-
-       if (child_taps == NULL && vcp->vcp_nnics != 0)
-               return (EINVAL);
-
-       if (vcp->vcp_ncpus > VMM_MAX_VCPUS_PER_VM)
-               return (EINVAL);
-
-       if (vcp->vcp_ndisks > VM_MAX_DISKS_PER_VM)
-               return (EINVAL);
-
-       if (vcp->vcp_nnics > VM_MAX_NICS_PER_VM)
-               return (EINVAL);
-
        if (vcp->vcp_nmemranges == 0 ||
            vcp->vcp_nmemranges > VMM_MAX_MEM_RANGES)
                return (EINVAL);
@@ -1253,30 +1331,9 @@ run_vm(int child_cdrom, int child_disks[][VM_MAX_BASE_
                return (ENOMEM);
        }

-       log_debug("%s: initializing hardware for vm %s", __func__,
-           vcp->vcp_name);
+       log_debug("%s: starting %zu vcpu thread(s) for vm %s", __func__,
+           vcp->vcp_ncpus, vcp->vcp_name);

-       if (!(current_vm->vm_state & VM_STATE_RECEIVED))
-               init_emulated_hw(vmc, child_cdrom, child_disks, child_taps);
-
-       ret = pthread_mutex_init(&threadmutex, NULL);
-       if (ret) {
-               log_warn("%s: could not initialize thread state mutex",
-                   __func__);
-               return (ret);
-       }
-       ret = pthread_cond_init(&threadcond, NULL);
-       if (ret) {
-               log_warn("%s: could not initialize thread state "
-                   "condition variable", __func__);
-               return (ret);
-       }
-
-       mutex_lock(&threadmutex);
-
-       log_debug("%s: starting vcpu threads for vm %s", __func__,
-           vcp->vcp_name);
-
        /*
         * Create and launch one thread for each VCPU. These threads may
         * migrate between PCPUs over time; the need to reload CPU state
blob - 38b8ad46bf5af242677637ffcaaf4359dd409d8d
blob + 0a5f5c2e732caae43dcc43cf27ed3ff7e453602a
--- usr.sbin/vmd/vmd.c
+++ usr.sbin/vmd/vmd.c
@@ -75,8 +75,10 @@ static struct privsep_proc procs[] = {
        /* Keep "priv" on top as procs[0] */
        { "priv",       PROC_PRIV,      vmd_dispatch_priv, priv },
        { "control",    PROC_CONTROL,   vmd_dispatch_control, control },
-       { "vmm",        PROC_VMM,       vmd_dispatch_vmm, vmm, vmm_shutdown },
-       { "agentx",     PROC_AGENTX,    vmd_dispatch_agentx, vm_agentx, 
vm_agentx_shutdown, "/" }
+       { "vmm",        PROC_VMM,       vmd_dispatch_vmm, vmm,
+         vmm_shutdown, "/" },
+       { "agentx",     PROC_AGENTX,    vmd_dispatch_agentx, vm_agentx,
+         vm_agentx_shutdown, "/" }
 };

 enum privsep_procid privsep_process;
@@ -768,7 +770,7 @@ main(int argc, char **argv)
        int                      ch;
        const char              *conffile = VMD_CONF;
        enum privsep_procid      proc_id = PROC_PARENT;
-       int                      proc_instance = 0;
+       int                      proc_instance = 0, vm_launch = 0, vm_fd = -1;
        const char              *errp, *title = NULL;
        int                      argc0 = argc;

@@ -777,7 +779,7 @@ main(int argc, char **argv)
        if ((env = calloc(1, sizeof(*env))) == NULL)
                fatal("calloc: env");

-       while ((ch = getopt(argc, argv, "D:P:I:df:vn")) != -1) {
+       while ((ch = getopt(argc, argv, "D:P:I:V:df:vn")) != -1) {
                switch (ch) {
                case 'D':
                        if (cmdline_symset(optarg) < 0)
@@ -793,6 +795,7 @@ main(int argc, char **argv)
                case 'v':
                        env->vmd_verbose++;
                        break;
+               /* vmd fork/exec */
                case 'n':
                        env->vmd_noaction = 1;
                        break;
@@ -808,6 +811,13 @@ main(int argc, char **argv)
                        if (errp)
                                fatalx("invalid process instance");
                        break;
+               /* child vm fork/exec */
+               case 'V':
+                       vm_launch = VMD_LAUNCH_VM;
+                       vm_fd = strtonum(optarg, 0, 128, &errp);
+                       if (errp)
+                               fatalx("invalid vm fd");
+                       break;
                default:
                        usage();
                }
@@ -823,8 +833,13 @@ main(int argc, char **argv)
        log_init(env->vmd_debug, LOG_DAEMON);
        log_setverbose(env->vmd_verbose);

+       /* Re-exec from the vmm child process requires an absolute path. */
+       if (proc_id == PROC_PARENT && *argv[0] != '/')
+               fatalx("re-exec requires execution with an absolute path");
+       env->argv0 = argv[0];
+
        /* check for root privileges */
-       if (env->vmd_noaction == 0) {
+       if (env->vmd_noaction == 0 && !vm_launch) {
                if (geteuid())
                        fatalx("need root privileges");
        }
@@ -843,6 +858,14 @@ main(int argc, char **argv)
        proc_priv->p_pw = &proc_privpw; /* initialized to all 0 */
        proc_priv->p_chroot = ps->ps_pw->pw_dir; /* from VMD_USER */

+       /*
+        * If we're launching a new vm or its device, we short out here.
+        */
+       if (vm_launch == VMD_LAUNCH_VM) {
+               vm_main(vm_fd);
+               /* NOTREACHED */
+       }
+
        /* Open /dev/vmm early. */
        if (env->vmd_noaction == 0 && proc_id == PROC_PARENT) {
                env->vmd_fd = open(VMM_NODE, O_RDWR);
blob - 153d4206257bcb36748e7ec50da2b978cf822c81
blob + 71e02b6d2f74e0e2760971b2af5d3909470f25dc
--- usr.sbin/vmd/vmd.h
+++ usr.sbin/vmd/vmd.h
@@ -72,6 +72,9 @@

 #define VMD_DEFAULT_STAGGERED_START_DELAY 30

+/* Launch mode identifiers for when a vm fork+exec's. */
+#define VMD_LAUNCH_VM          1
+
 /* Rate-limit fast reboots */
 #define VM_START_RATE_SEC      6       /* min. seconds since last reboot */
 #define VM_START_RATE_LIMIT    3       /* max. number of fast reboots */
@@ -355,6 +358,7 @@ struct vmd {
 struct vmd {
        struct privsep           vmd_ps;
        const char              *vmd_conffile;
+       char                    *argv0; /* abs. path to vmd for exec, unveil */

        /* global configuration that is sent to the children */
        struct vmd_config        vmd_cfg;
@@ -464,6 +468,7 @@ void         mutex_lock(pthread_mutex_t *);
 int     vmm_pipe(struct vmd_vm *, int, void (*)(int, short, void *));

 /* vm.c */
+void    vm_main(int);
 void    mutex_lock(pthread_mutex_t *);
 void    mutex_unlock(pthread_mutex_t *);
 int     read_mem(paddr_t, void *buf, size_t);
blob - 36c909e94bef9e6998e86eb2f2c8ea72d11d9d9c
blob + 4ab90d07ac603f431c2aa82b3985879dce517f44
--- usr.sbin/vmd/vmm.c
+++ usr.sbin/vmd/vmm.c
@@ -80,20 +80,28 @@ vmm_run(struct privsep *ps, struct privsep_proc *p, vo
        if (config_init(ps->ps_env) == -1)
                fatal("failed to initialize configuration");

-       signal_del(&ps->ps_evsigchld);
-       signal_set(&ps->ps_evsigchld, SIGCHLD, vmm_sighdlr, ps);
-       signal_add(&ps->ps_evsigchld, NULL);
+       /*
+        * We aren't root, so we can't chroot(2). Use unveil(2) instead.
+        */
+       if (unveil(env->argv0, "x") == -1)
+               fatal("unveil %s", env->argv0);
+       if (unveil(NULL, NULL) == -1)
+               fatal("unveil lock");

        /*
         * pledge in the vmm process:
         * stdio - for malloc and basic I/O including events.
         * vmm - for the vmm ioctls and operations.
-        * proc - for forking and maitaining vms.
+        * proc, exec - for forking and execing new vm's.
         * sendfd - for sending send/recv fds to vm proc.
         * recvfd - for disks, interfaces and other fds.
         */
-       if (pledge("stdio vmm sendfd recvfd proc", NULL) == -1)
+       if (pledge("stdio vmm sendfd recvfd proc exec", NULL) == -1)
                fatal("pledge");
+
+       signal_del(&ps->ps_evsigchld);
+       signal_set(&ps->ps_evsigchld, SIGCHLD, vmm_sighdlr, ps);
+       signal_add(&ps->ps_evsigchld, NULL);
 }

 int
@@ -603,7 +611,7 @@ opentap(char *ifname)
 /*
  * vmm_start_vm
  *
- * Prepares and forks a new VM process.
+ * Prepares and fork+execs a new VM process.
  *
  * Parameters:
  *  imsg: The VM data structure that is including the VM create parameters.
@@ -619,7 +627,8 @@ vmm_start_vm(struct imsg *imsg, uint32_t *id, pid_t *p
 {
        struct vm_create_params *vcp;
        struct vmd_vm           *vm;
-       int                      ret = EINVAL;
+       char                    *nargv[5], num[32];
+       int                      fd, ret = EINVAL;
        int                      fds[2];
        pid_t                    vm_pid;
        size_t                   i, j, sz;
@@ -641,10 +650,17 @@ vmm_start_vm(struct imsg *imsg, uint32_t *id, pid_t *p
        if (socketpair(AF_UNIX, SOCK_STREAM, PF_UNSPEC, fds) == -1)
                fatal("socketpair");

-       /* Fork the vmm process to create the vm, inheriting open device fds. */
+       /* Keep our channel open after exec. */
+       if (fcntl(fds[1], F_SETFD, 0)) {
+               ret = errno;
+               log_warn("%s: fcntl", __func__);
+               goto err;
+       }
+
+       /* Start child vmd for this VM (fork, chroot, drop privs) */
        vm_pid = fork();
        if (vm_pid == -1) {
-               log_warn("%s: fork child failed", __func__);
+               log_warn("%s: start child failed", __func__);
                ret = EIO;
                goto err;
        }
@@ -654,6 +670,16 @@ vmm_start_vm(struct imsg *imsg, uint32_t *id, pid_t *p
                vm->vm_pid = vm_pid;
                close_fd(fds[1]);

+               /* Send the details over the pipe to the child. */
+               sz = atomicio(vwrite, fds[0], vm, sizeof(*vm));
+               if (sz != sizeof(*vm)) {
+                       log_warnx("%s: failed to send config for vm '%s'",
+                           __func__, vcp->vcp_name);
+                       ret = EIO;
+                       /* Defer error handling until after fd closing. */
+               }
+
+               /* As the parent/vmm process, we no longer need these fds. */
                for (i = 0 ; i < vcp->vcp_ndisks; i++) {
                        for (j = 0; j < VM_MAX_BASE_PER_DISK; j++) {
                                if (close_fd(vm->vm_disks[i][j]) == 0)
@@ -671,6 +697,20 @@ vmm_start_vm(struct imsg *imsg, uint32_t *id, pid_t *p
                if (close_fd(vm->vm_tty) == 0)
                        vm->vm_tty = -1;

+               /* Deferred error handling from sending the vm struct. */
+               if (ret == EIO)
+                       goto err;
+
+               /* Send the fd number for /dev/vmm. */
+               sz = atomicio(vwrite, fds[0], &env->vmd_fd,
+                   sizeof(env->vmd_fd));
+               if (sz != sizeof(env->vmd_fd)) {
+                       log_warnx("%s: failed to send /dev/vmm fd for vm '%s'",
+                           __func__, vcp->vcp_name);
+                       ret = EIO;
+                       goto err;
+               }
+
                /* Read back the kernel-generated vm id from the child */
                sz = atomicio(read, fds[0], &vcp->vcp_id, sizeof(vcp->vcp_id));
                if (sz != sizeof(vcp->vcp_id)) {
@@ -681,30 +721,78 @@ vmm_start_vm(struct imsg *imsg, uint32_t *id, pid_t *p
                        goto err;
                }

+               /* Check for an invalid id. This indicates child failure. */
                if (vcp->vcp_id == 0)
                        goto err;

                *id = vcp->vcp_id;
                *pid = vm->vm_pid;

+               /* Wire up our pipe into the event handling. */
                if (vmm_pipe(vm, fds[0], vmm_dispatch_vm) == -1)
                        fatal("setup vm pipe");

                return (0);
        } else {
-               /* Child */
+               /* Child. Create a new session. */
+               if (setsid() == -1)
+                       fatal("setsid");
+
                close_fd(fds[0]);
                close_fd(PROC_PARENT_SOCK_FILENO);

-               ret = start_vm(vm, fds[1]);
+               /* Detach from terminal. */
+               if (!env->vmd_debug && (fd =
+                       open("/dev/null", O_RDWR, 0)) != -1) {
+                       dup2(fd, STDIN_FILENO);
+                       dup2(fd, STDOUT_FILENO);
+                       dup2(fd, STDERR_FILENO);
+                       if (fd > 2)
+                               close(fd);
+               }

+               /* Toggle all fds to not close on exec. */
+               for (i = 0 ; i < vcp->vcp_ndisks; i++)
+                       for (j = 0; j < VM_MAX_BASE_PER_DISK; j++)
+                               if (vm->vm_disks[i][j] != -1)
+                                       fcntl(vm->vm_disks[i][j], F_SETFD, 0);
+               for (i = 0 ; i < vcp->vcp_nnics; i++)
+                       fcntl(vm->vm_ifs[i].vif_fd, F_SETFD, 0);
+               if (vm->vm_kernel != -1)
+                       fcntl(vm->vm_kernel, F_SETFD, 0);
+               if (vm->vm_cdrom != -1)
+                       fcntl(vm->vm_cdrom, F_SETFD, 0);
+               if (vm->vm_tty != -1)
+                       fcntl(vm->vm_tty, F_SETFD, 0);
+               fcntl(env->vmd_fd, F_SETFD, 0); /* vmm device fd */
+
+               /*
+                * Prepare our new argv for execvp(2) with the fd of our open
+                * pipe to the parent/vmm process as an argument.
+                */
+               memset(num, 0, sizeof(num));
+               snprintf(num, sizeof(num), "%d", fds[1]);
+
+               nargv[0] = env->argv0;
+               nargv[1] = "-V";
+               nargv[2] = num;
+               nargv[3] = "-n";
+               nargv[4] = NULL;
+
+               /* Control resumes in vmd main(). */
+               execvp(nargv[0], nargv);
+
+               ret = errno;
+               log_warn("execvp %s", nargv[0]);
                _exit(ret);
+               /* NOTREACHED */
        }

        return (0);

  err:
-       vm_remove(vm, __func__);
+       if (!vm->vm_from_config)
+               vm_remove(vm, __func__);

        return (ret);
 }

vmd(8): teach it to exec vm's after fork

Reply via email to