vmd: rate-limit to avoid reboot loops

Reyk Floeter Fri, 05 Oct 2018 14:33:36 -0700

Hi,

it sometimes happens that a VM is stuck in a reboot loop.  This isn't
very pleasent for vmd, so this diff attempts to introduce a hard
rate-limit: if the VM rebooted after less than VM_START_RATE_SEC (6)
seconds, increment a counter.  If this happens VM_START_RATE_LIMIT (3)
times in a row, stop the VM.


The idea is that it might be desirable in some cases to reboot quickly
(you're either really fast on the boot prompt, or you use something
like grub that can automatically reboot into a previous kernel).  But
if this happens too often (more than 3 times), something is wrong and
cannot be intended, not even in the worst Linux/grub/unikernel/...
situation.

These limits are a guessed default.

Test case: I dd'ed random bytes to a kernel after some initial bytes,
keeping the original size of the kernel.  The boot loader loads the
header, the complete kernel, tries to boot it and *boom*, reset ;)

Comments?  Concerns?  Better ideas?  OKs?

Reyk

Index: usr.sbin/vmd/config.c
===================================================================
RCS file: /cvs/src/usr.sbin/vmd/config.c,v
retrieving revision 1.50
diff -u -p -u -p -r1.50 config.c
--- usr.sbin/vmd/config.c       7 Aug 2018 14:49:05 -0000       1.50
+++ usr.sbin/vmd/config.c       5 Oct 2018 21:15:12 -0000
@@ -187,6 +187,7 @@ config_setvm(struct privsep *ps, struct 
        char                     ifname[IF_NAMESIZE], *s;
        char                     path[PATH_MAX];
        unsigned int             unit;
+       struct timeval           tv, rate, since_last;
 
        errno = 0;
 
@@ -204,6 +205,39 @@ config_setvm(struct privsep *ps, struct 
                        goto fail;
                }
        }
+
+       /*
+        * Rate-limit the VM so that it cannot restart in a loop:
+        * if the VM restarts after less than VM_START_RATE_SEC seconds,
+        * we increment the limit counter.  After VM_START_RATE_LIMIT
+        * of suchs fast reboots the VM is stopped.
+        */
+       getmonotime(&tv);
+       if (vm->vm_start_tv.tv_sec) {
+               timersub(&tv, &vm->vm_start_tv, &since_last);
+
+               rate.tv_sec = VM_START_RATE_SEC;
+               rate.tv_usec = 0;
+               if (timercmp(&since_last, &rate, <))
+                       vm->vm_start_limit++;
+               else {
+                       /* Reset counter */
+                       vm->vm_start_limit = 0;
+               }
+
+               log_debug("%s: vm %u restarted after %lld.%ld seconds,"
+                   " limit %d/%d", __func__, vcp->vcp_id, since_last.tv_sec,
+                   since_last.tv_usec, vm->vm_start_limit,
+                   VM_START_RATE_LIMIT);
+
+               if (vm->vm_start_limit >= VM_START_RATE_LIMIT) {
+                       log_warnx("%s: vm %u restarted too quickly",
+                           __func__, vcp->vcp_id);
+                       errno = EPERM;
+                       goto fail;
+               }
+       }
+       vm->vm_start_tv = tv;
 
        diskfds = reallocarray(NULL, vcp->vcp_ndisks, sizeof(*diskfds));
        if (diskfds == NULL) {
Index: usr.sbin/vmd/vmd.c
===================================================================
RCS file: /cvs/src/usr.sbin/vmd/vmd.c,v
retrieving revision 1.102
diff -u -p -u -p -r1.102 vmd.c
--- usr.sbin/vmd/vmd.c  29 Sep 2018 22:33:09 -0000      1.102
+++ usr.sbin/vmd/vmd.c  5 Oct 2018 21:15:12 -0000
@@ -1918,3 +1918,14 @@ prefixlen2mask(uint8_t prefixlen)
 
        return (htonl(0xffffffff << (32 - prefixlen)));
 }
+
+void
+getmonotime(struct timeval *tv)
+{
+       struct timespec  ts;
+
+       if (clock_gettime(CLOCK_MONOTONIC, &ts))
+               fatal("clock_gettime");
+
+       TIMESPEC_TO_TIMEVAL(tv, &ts);
+}
Index: usr.sbin/vmd/vmd.h
===================================================================
RCS file: /cvs/src/usr.sbin/vmd/vmd.h,v
retrieving revision 1.81
diff -u -p -u -p -r1.81 vmd.h
--- usr.sbin/vmd/vmd.h  1 Oct 2018 09:31:15 -0000       1.81
+++ usr.sbin/vmd/vmd.h  5 Oct 2018 21:15:13 -0000
@@ -54,6 +54,10 @@
 #define VMD_SWITCH_TYPE                "bridge"
 #define VM_DEFAULT_MEMORY      512
 
+/* Rate-limit fast reboots */
+#define VM_START_RATE_SEC      6       /* min. seconds since last reboot */
+#define VM_START_RATE_LIMIT    3       /* max. number of fast reboots */
+
 /* default user instance limits */
 #define VM_DEFAULT_USER_MAXCPU 4
 #define VM_DEFAULT_USER_MAXMEM 2048
@@ -260,6 +264,10 @@ struct vmd_vm {
        int                      vm_receive_fd;
        struct vmd_user         *vm_user;
 
+       /* For rate-limiting */
+       struct timeval           vm_
----- Message truncated -----

vmd: rate-limit to avoid reboot loops

Reply via email to