The diff below adapts the way vmctl & vmd handle opening kernel images when a vmctl user provides the -b argument and a path. This moves the file opening into vmctl, as the current user, and passes the fd to vmd.
With some tweaks, it checks for this user-provided boot fd and uses it as an override for either the predefined boot file in vm.conf or the default vmm-firmware (seabios). Primary use case is to allow non-root users that "own" a vm (per /etc/vm.conf) to repair or reinstall their vm that they control. The file provided is opened for reading as that user and not root or _vmd. ok? diff 41ea0730f70a83ff1b0d66615596fb26e804761c refs/heads/vmd-temp commit - 41ea0730f70a83ff1b0d66615596fb26e804761c commit + 3f7970b663162524bccc19e29ce4eff592afdf06 blob - c1b04c510ec8006c08a28dfb81a64ce1e205df74 blob + e711ca82d2bbcd5ce0964cfad18a332c908d4d67 --- usr.sbin/vmctl/main.c +++ usr.sbin/vmctl/main.c @@ -81,7 +81,7 @@ struct ctl_command ctl_commands[] = { { "show", CMD_STATUS, ctl_status, "[id]" }, { "start", CMD_START, ctl_start, "[-cL] [-B device] [-b path] [-d disk] [-i count]\n" - "\t\t[-m size] [-n switch] [-r path] [-t name] id | name" }, + "\t\t[-m size] [-n switch] [-r path] [-t name] id | name", 1}, { "status", CMD_STATUS, ctl_status, "[id]" }, { "stop", CMD_STOP, ctl_stop, "[-fw] [id | -a]" }, { "unpause", CMD_UNPAUSE, ctl_unpause, "id" }, @@ -820,6 +820,10 @@ ctl_start(struct parse_result *res, int argc, char *ar char path[PATH_MAX]; const char *s; + /* We may require sendfd */ + if (pledge("stdio rpath exec unix getpw unveil sendfd", NULL) == -1) + err(1, "pledge"); + while ((ch = getopt(argc, argv, "b:B:cd:i:Lm:n:r:t:")) != -1) { switch (ch) { case 'b': blob - c4992985bfcffffa5b0a09b4e7c938d179963d46 blob + 048d34fbdb54b611f6d63a9994f003208cd0488e --- usr.sbin/vmctl/vmctl.c +++ usr.sbin/vmctl/vmctl.c @@ -81,6 +81,15 @@ vm_start(uint32_t start_id, const char *name, size_t m int i; const char *s; + if (kernel) { + if (unveil(kernel, "r") == -1) + err(1, "unveil boot kernel"); + } else { + /* We can drop sendfd promise. */ + if (pledge("stdio rpath exec unix getpw unveil", NULL) == -1) + err(1, "pledge"); + } + if (memsize) flags |= VMOP_CREATE_MEMORY; if (nnics) @@ -98,7 +107,7 @@ vm_start(uint32_t start_id, const char *name, size_t m memsize = VM_DEFAULT_MEMORY; if (ndisks > VM_MAX_DISKS_PER_VM) errx(1, "too many disks"); - else if (ndisks == 0) + else if (kernel == NULL && ndisks == 0) warnx("starting without disks"); if (kernel == NULL && ndisks == 0 && !iso) errx(1, "no kernel or disk/cdrom specified"); @@ -106,13 +115,13 @@ vm_start(uint32_t start_id, const char *name, size_t m nnics = 0; if (nnics > VM_MAX_NICS_PER_VM) errx(1, "too many network interfaces"); - if (nnics == 0) + if (kernel == NULL && nnics == 0) warnx("starting without network interfaces"); } if ((vmc = calloc(1, sizeof(struct vmop_create_params))) == NULL) return (ENOMEM); - + vmc->vmc_kernel = -1; vmc->vmc_flags = flags; /* vcp includes configuration that is shared with the kernel */ @@ -173,10 +182,13 @@ vm_start(uint32_t start_id, const char *name, size_t m sizeof(vcp->vcp_name)) >= sizeof(vcp->vcp_name)) errx(1, "vm name too long"); } - if (kernel != NULL) - if (strlcpy(vmc->vmc_kernel, kernel, - sizeof(vmc->vmc_kernel)) >= sizeof(vmc->vmc_kernel)) + if (kernel != NULL) { + if (strnlen(kernel, PATH_MAX) == PATH_MAX) errx(1, "kernel name too long"); + vmc->vmc_kernel = open(kernel, O_RDONLY); + if (vmc->vmc_kernel == -1) + err(1, "cannot open kernel '%s'", kernel); + } if (iso != NULL) if (strlcpy(vmc->vmc_cdrom, iso, sizeof(vmc->vmc_cdrom)) >= sizeof(vmc->vmc_cdrom)) @@ -187,7 +199,7 @@ vm_start(uint32_t start_id, const char *name, size_t m errx(1, "instance vm name too long"); vmc->vmc_bootdevice = bootdevice; - imsg_compose(ibuf, IMSG_VMDOP_START_VM_REQUEST, 0, 0, -1, + imsg_compose(ibuf, IMSG_VMDOP_START_VM_REQUEST, 0, 0, vmc->vmc_kernel, vmc, sizeof(struct vmop_create_params)); free(vcp); blob - e40cc6540168f18d78576323e9f711aa3b13e48e blob + 8c8afb2ee0cf0019817d3f015aa97039d90df507 --- usr.sbin/vmd/config.c +++ usr.sbin/vmd/config.c @@ -283,14 +283,15 @@ config_setvm(struct privsep *ps, struct vmd_vm *vm, ui /* * From here onward, all failures need cleanup and use goto fail */ - - if (!(vm->vm_state & VM_STATE_RECEIVED)) { - if (strlen(vmc->vmc_kernel)) { + if (!(vm->vm_state & VM_STATE_RECEIVED) && vm->vm_kernel == -1) { + if (vm->vm_kernel_path != NULL) { /* Open external kernel for child */ - if ((kernfd = open(vmc->vmc_kernel, O_RDONLY)) == -1) { + kernfd = open(vm->vm_kernel_path, O_RDONLY); + if (kernfd == -1) { ret = errno; log_warn("%s: can't open kernel or BIOS " - "boot image %s", __func__, vmc->vmc_kernel); + "boot image %s", __func__, + vm->vm_kernel_path); goto fail; } } @@ -301,21 +302,25 @@ config_setvm(struct privsep *ps, struct vmd_vm *vm, ui * typically distributed separately due to an incompatible * license. */ - if (kernfd == -1 && - (kernfd = open(VM_DEFAULT_BIOS, O_RDONLY)) == -1) { - log_warn("can't open %s", VM_DEFAULT_BIOS); - ret = VMD_BIOS_MISSING; - goto fail; + if (kernfd == -1) { + if ((kernfd = open(VM_DEFAULT_BIOS, O_RDONLY)) == -1) { + log_warn("can't open %s", VM_DEFAULT_BIOS); + ret = VMD_BIOS_MISSING; + goto fail; + } } if (vm_checkaccess(kernfd, vmc->vmc_checkaccess & VMOP_CREATE_KERNEL, uid, R_OK) == -1) { - log_warnx("vm \"%s\" no read access to kernel %s", - vcp->vcp_name, vmc->vmc_kernel); + log_warnx("vm \"%s\" no read access to kernel " + "%s", vcp->vcp_name, vm->vm_kernel_path); ret = EPERM; goto fail; } + + vm->vm_kernel = kernfd; + vmc->vmc_kernel = kernfd; } /* Open CDROM image for child */ @@ -467,11 +472,11 @@ config_setvm(struct privsep *ps, struct vmd_vm *vm, ui /* XXX check proc_compose_imsg return values */ if (vm->vm_state & VM_STATE_RECEIVED) proc_compose_imsg(ps, PROC_VMM, -1, - IMSG_VMDOP_RECEIVE_VM_REQUEST, vm->vm_vmid, fd, vmc, + IMSG_VMDOP_RECEIVE_VM_REQUEST, vm->vm_vmid, fd, vmc, sizeof(struct vmop_create_params)); else proc_compose_imsg(ps, PROC_VMM, -1, - IMSG_VMDOP_START_VM_REQUEST, vm->vm_vmid, kernfd, + IMSG_VMDOP_START_VM_REQUEST, vm->vm_vmid, vm->vm_kernel, vmc, sizeof(*vmc)); if (strlen(vmc->vmc_cdrom)) @@ -502,7 +507,7 @@ config_setvm(struct privsep *ps, struct vmd_vm *vm, ui if (!(vm->vm_state & VM_STATE_RECEIVED)) proc_compose_imsg(ps, PROC_VMM, -1, - IMSG_VMDOP_START_VM_END, vm->vm_vmid, fd, NULL, 0); + IMSG_VMDOP_START_VM_END, vm->vm_vmid, fd, NULL, 0); free(tapfds); @@ -520,7 +525,7 @@ config_setvm(struct privsep *ps, struct vmd_vm *vm, ui fail: log_warnx("failed to start vm %s", vcp->vcp_name); - if (kernfd != -1) + if (vm->vm_kernel != -1) close(kernfd); if (cdromfd != -1) close(cdromfd); @@ -551,16 +556,15 @@ config_getvm(struct privsep *ps, struct imsg *imsg) IMSG_SIZE_CHECK(imsg, &vmc); memcpy(&vmc, imsg->data, sizeof(vmc)); + vmc.vmc_kernel = imsg->fd; errno = 0; if (vm_register(ps, &vmc, &vm, imsg->hdr.peerid, 0) == -1) goto fail; - /* If the fd is -1, the kernel will be searched on the disk */ - vm->vm_kernel = imsg->fd; vm->vm_state |= VM_STATE_RUNNING; vm->vm_peerid = (uint32_t)-1; - + vm->vm_kernel = imsg->fd; return (0); fail: blob - 1fe8c1ecba38a86698303e026f2436af9f40793c blob + a9bde55116d794cc0ccd737ca2ac7f03f0c44698 --- usr.sbin/vmd/control.c +++ usr.sbin/vmd/control.c @@ -451,8 +451,10 @@ control_dispatch_imsg(int fd, short event, void *arg) vmc.vmc_owner.uid = c->peercred.uid; vmc.vmc_owner.gid = -1; + /* imsg.fd may contain kernel image fd. */ if (proc_compose_imsg(ps, PROC_PARENT, -1, - imsg.hdr.type, fd, -1, &vmc, sizeof(vmc)) == -1) { + imsg.hdr.type, fd, imsg.fd, &vmc, + sizeof(vmc)) == -1) { control_close(fd, cs); return; } blob - b3e53eb9dec1d090fadbab3146c224c484a2dec7 blob + 475a2613bf411cca7853333fad5c953552d5b48b --- usr.sbin/vmd/parse.y +++ usr.sbin/vmd/parse.y @@ -96,6 +96,7 @@ static char vsw_type[IF_NAMESIZE]; static struct vmop_create_params vmc; static struct vm_create_params *vcp; static struct vmd_switch *vsw; +static char *kernel = NULL; static char vsw_type[IF_NAMESIZE]; static int vmc_disable; static size_t vmc_nnics; @@ -325,6 +326,8 @@ vm : VM string vm_instance { char *name; memset(&vmc, 0, sizeof(vmc)); + vmc.vmc_kernel = -1; + vcp = &vmc.vmc_params; vmc_disable = 0; vmc_nnics = 0; @@ -396,8 +399,11 @@ vm : VM string vm_instance { vmc_disable ? "disabled" : "enabled"); } + vm->vm_kernel_path = kernel; + vm->vm_kernel = -1; vm->vm_from_config = 1; } + kernel = NULL; } ; @@ -458,7 +464,7 @@ vm_opts : disable { | BOOT string { char path[PATH_MAX]; - if (vmc.vmc_kernel[0] != '\0') { + if (kernel != NULL) { yyerror("kernel specified more than once"); free($2); YYERROR; @@ -471,12 +477,7 @@ vm_opts : disable { YYERROR; } free($2); - if (strlcpy(vmc.vmc_kernel, path, - sizeof(vmc.vmc_kernel)) >= - sizeof(vmc.vmc_kernel)) { - yyerror("kernel name too long"); - YYERROR; - } + kernel = path; vmc.vmc_flags |= VMOP_CREATE_KERNEL; } | BOOT DEVICE bootdevice { blob - 1ae60983755be95bd182de1b3df4ff07a2acf779 blob + f2720bb7635834883d81726a9a1898e4e5813d96 --- usr.sbin/vmd/vm.c +++ usr.sbin/vmd/vm.c @@ -271,9 +271,17 @@ vm_main(int fd) * We need, at minimum, a vm_kernel fd to boot a vm. This is either a * kernel or a BIOS image. */ - if (vm.vm_kernel < 0 && !(vm.vm_state & VM_STATE_RECEIVED)) { - log_warnx("%s: failed to receive boot fd", vcp->vcp_name); - _exit(EINVAL); + if (!(vm.vm_state & VM_STATE_RECEIVED)) { + if (vm.vm_kernel == -1) { + log_warnx("%s: failed to receive boot fd", + vcp->vcp_name); + _exit(EINVAL); + } + if (fcntl(vm.vm_kernel, F_SETFL, O_NONBLOCK) == -1) { + ret = errno; + log_warn("failed to set nonblocking mode on boot fd"); + _exit(ret); + } } ret = start_vm(&vm, fd); blob - f8af05f15a619dbb799bde46d36f0ae0ccd324f0 blob + 37dad5144fcf1815d73ee2f11cae73aba0b2598c --- usr.sbin/vmd/vmd.c +++ usr.sbin/vmd/vmd.c @@ -111,8 +111,10 @@ vmd_dispatch_control(int fd, struct privsep_proc *p, s case IMSG_VMDOP_START_VM_REQUEST: IMSG_SIZE_CHECK(imsg, &vmc); memcpy(&vmc, imsg->data, sizeof(vmc)); + vmc.vmc_kernel = imsg->fd; + ret = vm_register(ps, &vmc, &vm, 0, vmc.vmc_owner.uid); - if (vmc.vmc_flags == 0) { + if (vmc.vmc_flags == 0 || vmc.vmc_flags == VMOP_CREATE_KERNEL) { /* start an existing VM with pre-configured options */ if (!(ret == -1 && errno == EALREADY && !(vm->vm_state & VM_STATE_RUNNING))) { @@ -123,6 +125,7 @@ vmd_dispatch_control(int fd, struct privsep_proc *p, s res = errno; cmd = IMSG_VMDOP_START_VM_RESPONSE; } + if (res == 0) { res = config_setvm(ps, vm, imsg->hdr.peerid, vm->vm_params.vmc_owner.uid); @@ -1290,6 +1293,8 @@ vm_remove(struct vmd_vm *vm, const char *caller) TAILQ_REMOVE(env->vmd_vms, vm, vm_entry); vm_stop(vm, 0, caller); + if (vm->vm_kernel_path != NULL && !vm->vm_from_config) + free(vm->vm_kernel_path); free(vm); } @@ -1353,6 +1358,7 @@ vm_register(struct privsep *ps, struct vmop_create_par errno = EPERM; goto fail; } + vm->vm_kernel = vmc->vmc_kernel; *ret_vm = vm; errno = EALREADY; goto fail; @@ -1385,8 +1391,8 @@ vm_register(struct privsep *ps, struct vmop_create_par } else if (vmc->vmc_nnics > VM_MAX_NICS_PER_VM) { log_warnx("invalid number of interfaces"); goto fail; - } else if (strlen(vmc->vmc_kernel) == 0 && - vmc->vmc_ndisks == 0 && strlen(vmc->vmc_cdrom) == 0) { + } else if (vmc->vmc_kernel == -1 && vmc->vmc_ndisks == 0 + && strlen(vmc->vmc_cdrom) == 0) { log_warnx("no kernel or disk/cdrom specified"); goto fail; } else if (strlen(vcp->vcp_name) == 0) { @@ -1415,8 +1421,12 @@ vm_register(struct privsep *ps, struct vmop_create_par vm->vm_pid = -1; vm->vm_tty = -1; vm->vm_receive_fd = -1; + vm->vm_kernel = -1; vm->vm_state &= ~VM_STATE_PAUSED; + if (vmc->vmc_kernel > -1) + vm->vm_kernel = vmc->vmc_kernel; + for (i = 0; i < VM_MAX_DISKS_PER_VM; i++) for (j = 0; j < VM_MAX_BASE_PER_DISK; j++) vm->vm_disks[i][j] = -1; @@ -1445,7 +1455,6 @@ vm_register(struct privsep *ps, struct vmop_create_par vmc->vmc_macs[i][5] = rng >> 8; } } - vm->vm_kernel = -1; vm->vm_cdrom = -1; vm->vm_iev.ibuf.fd = -1; @@ -1593,17 +1602,14 @@ vm_instance(struct privsep *ps, struct vmd_vm **vm_par } /* kernel */ - if (strlen(vmc->vmc_kernel) > 0) { + if (vmc->vmc_kernel > -1 || (vm->vm_kernel_path != NULL && + strnlen(vm->vm_kernel_path, PATH_MAX) < PATH_MAX)) { if (vm_checkinsflag(vmcp, VMOP_CREATE_KERNEL, uid) != 0) { log_warnx("vm \"%s\" no permission to set boot image", name); return (EPERM); } vmc->vmc_checkaccess |= VMOP_CREATE_KERNEL; - } else if (strlcpy(vmc->vmc_kernel, vmcp->vmc_kernel, - sizeof(vmc->vmc_kernel)) >= sizeof(vmc->vmc_kernel)) { - log_warnx("vm \"%s\" kernel name too long", name); - return (EINVAL); } /* cdrom */ blob - aabf53869f3e48477f46f5c207da6589c7fae80c blob + 77b333838e18292775a6ba8210cdb18604147738 --- usr.sbin/vmd/vmd.h +++ usr.sbin/vmd/vmd.h @@ -230,7 +230,7 @@ struct vmop_create_params { #define VMDF_QCOW2 0x02 char vmc_cdrom[PATH_MAX]; - char vmc_kernel[PATH_MAX]; + int vmc_kernel; size_t vmc_nnics; char vmc_ifnames[VM_MAX_NICS_PER_VM][IF_NAMESIZE]; @@ -299,7 +299,10 @@ struct vmd_vm { struct vmop_create_params vm_params; pid_t vm_pid; uint32_t vm_vmid; + int vm_kernel; + char *vm_kernel_path; /* Used by vm.conf. */ + int vm_cdrom; int vm_disks[VM_MAX_DISKS_PER_VM][VM_MAX_BASE_PER_DISK]; struct vmd_if vm_ifs[VM_MAX_NICS_PER_VM];