Hi, it sometimes happens that a VM is stuck in a reboot loop. This isn't very pleasent for vmd, so this diff attempts to introduce a hard rate-limit: if the VM rebooted after less than VM_START_RATE_SEC (6) seconds, increment a counter. If this happens VM_START_RATE_LIMIT (3) times in a row, stop the VM.
The idea is that it might be desirable in some cases to reboot quickly (you're either really fast on the boot prompt, or you use something like grub that can automatically reboot into a previous kernel). But if this happens too often (more than 3 times), something is wrong and cannot be intended, not even in the worst Linux/grub/unikernel/... situation. These limits are a guessed default. Test case: I dd'ed random bytes to a kernel after some initial bytes, keeping the original size of the kernel. The boot loader loads the header, the complete kernel, tries to boot it and *boom*, reset ;) Comments? Concerns? Better ideas? OKs? Reyk Index: usr.sbin/vmd/config.c =================================================================== RCS file: /cvs/src/usr.sbin/vmd/config.c,v retrieving revision 1.50 diff -u -p -u -p -r1.50 config.c --- usr.sbin/vmd/config.c 7 Aug 2018 14:49:05 -0000 1.50 +++ usr.sbin/vmd/config.c 5 Oct 2018 21:15:12 -0000 @@ -187,6 +187,7 @@ config_setvm(struct privsep *ps, struct char ifname[IF_NAMESIZE], *s; char path[PATH_MAX]; unsigned int unit; + struct timeval tv, rate, since_last; errno = 0; @@ -204,6 +205,39 @@ config_setvm(struct privsep *ps, struct goto fail; } } + + /* + * Rate-limit the VM so that it cannot restart in a loop: + * if the VM restarts after less than VM_START_RATE_SEC seconds, + * we increment the limit counter. After VM_START_RATE_LIMIT + * of suchs fast reboots the VM is stopped. + */ + getmonotime(&tv); + if (vm->vm_start_tv.tv_sec) { + timersub(&tv, &vm->vm_start_tv, &since_last); + + rate.tv_sec = VM_START_RATE_SEC; + rate.tv_usec = 0; + if (timercmp(&since_last, &rate, <)) + vm->vm_start_limit++; + else { + /* Reset counter */ + vm->vm_start_limit = 0; + } + + log_debug("%s: vm %u restarted after %lld.%ld seconds," + " limit %d/%d", __func__, vcp->vcp_id, since_last.tv_sec, + since_last.tv_usec, vm->vm_start_limit, + VM_START_RATE_LIMIT); + + if (vm->vm_start_limit >= VM_START_RATE_LIMIT) { + log_warnx("%s: vm %u restarted too quickly", + __func__, vcp->vcp_id); + errno = EPERM; + goto fail; + } + } + vm->vm_start_tv = tv; diskfds = reallocarray(NULL, vcp->vcp_ndisks, sizeof(*diskfds)); if (diskfds == NULL) { Index: usr.sbin/vmd/vmd.c =================================================================== RCS file: /cvs/src/usr.sbin/vmd/vmd.c,v retrieving revision 1.102 diff -u -p -u -p -r1.102 vmd.c --- usr.sbin/vmd/vmd.c 29 Sep 2018 22:33:09 -0000 1.102 +++ usr.sbin/vmd/vmd.c 5 Oct 2018 21:15:12 -0000 @@ -1918,3 +1918,14 @@ prefixlen2mask(uint8_t prefixlen) return (htonl(0xffffffff << (32 - prefixlen))); } + +void +getmonotime(struct timeval *tv) +{ + struct timespec ts; + + if (clock_gettime(CLOCK_MONOTONIC, &ts)) + fatal("clock_gettime"); + + TIMESPEC_TO_TIMEVAL(tv, &ts); +} Index: usr.sbin/vmd/vmd.h =================================================================== RCS file: /cvs/src/usr.sbin/vmd/vmd.h,v retrieving revision 1.81 diff -u -p -u -p -r1.81 vmd.h --- usr.sbin/vmd/vmd.h 1 Oct 2018 09:31:15 -0000 1.81 +++ usr.sbin/vmd/vmd.h 5 Oct 2018 21:15:13 -0000 @@ -54,6 +54,10 @@ #define VMD_SWITCH_TYPE "bridge" #define VM_DEFAULT_MEMORY 512 +/* Rate-limit fast reboots */ +#define VM_START_RATE_SEC 6 /* min. seconds since last reboot */ +#define VM_START_RATE_LIMIT 3 /* max. number of fast reboots */ + /* default user instance limits */ #define VM_DEFAULT_USER_MAXCPU 4 #define VM_DEFAULT_USER_MAXMEM 2048 @@ -260,6 +264,10 @@ struct vmd_vm { int vm_receive_fd; struct vmd_user *vm_user; + /* For rate-limiting */ + struct timeval vm_ ----- Message truncated -----