On Mon, Nov 25, 2019 at 8:14 PM Felipe Franciosi <fel...@nutanix.com> wrote: > > This introduces a self-fence mechanism to Qemu, causing it to die if a > heartbeat condition is not met. Currently, a file-based heartbeat is > available and can be configured as follows: > > -object file-fence,id=ff0,file=/foo,qtimeout=20,ktimeout=25,signal=kill > > Qemu will watch 'file' for attribute changes. Touching the file works as > a heartbeat. This parameter is mandatory. > > Fencing happens after 'qtimeout' or 'ktimeout' seconds elapse without a > heartbeat. At least one of these must be specified. Both may be used. > > When using 'qtimeout', an internal Qemu timer is used. Fencing with this > method gives Qemu a chance to write a log message indicating which file > caused the event. If Qemu's main loop is hung for whatever reason, this > method won't successfully kill Qemu. > > When using 'ktimeout', a kernel timer is used. In this case, 'signal' > can be 'kill' (for SIGKILL, default) or 'quit' (for SIGQUIT). Using > SIGQUIT may be preferred for obtaining core dumps. If Qemu is hung > (eg. uninterruptable sleep), this method won't successfully kill Qemu. > > It is worth noting that even successfully killing Qemu may not be > sufficient to completely fence a VM as certain operations like network > packets or block commands may be pending in the kernel. If that is a > concern, systems should consider using further fencing mechanisms like > hardware watchdogs either in addition or in conjunction with this for > additional protection. > > Signed-off-by: Felipe Franciosi <fel...@nutanix.com> > --- > Based-on: <20191125153619.39893-2-fel...@nutanix.com> > > Makefile.objs | 1 + > fence/Makefile.objs | 1 + > fence/file_fence.c | 381 ++++++++++++++++++++++++++++++++++++++++++++
I think it could be under backends/ And a slight preference for - seperated words in filenames over qemu codebase. > qemu-options.hx | 27 +++- > 4 files changed, 409 insertions(+), 1 deletion(-) > create mode 100644 fence/Makefile.objs > create mode 100644 fence/file_fence.c > > diff --git a/Makefile.objs b/Makefile.objs > index 11ba1a36bd..998eed4796 100644 > --- a/Makefile.objs > +++ b/Makefile.objs > @@ -75,6 +75,7 @@ common-obj-$(CONFIG_TPM) += tpm.o > > common-obj-y += backends/ > common-obj-y += chardev/ > +common-obj-y += fence/ > > common-obj-$(CONFIG_SECCOMP) += qemu-seccomp.o > qemu-seccomp.o-cflags := $(SECCOMP_CFLAGS) > diff --git a/fence/Makefile.objs b/fence/Makefile.objs > new file mode 100644 > index 0000000000..2ed2092568 > --- /dev/null > +++ b/fence/Makefile.objs > @@ -0,0 +1 @@ > +common-obj-y += file_fence.o > diff --git a/fence/file_fence.c b/fence/file_fence.c > new file mode 100644 > index 0000000000..5b743e69d2 > --- /dev/null > +++ b/fence/file_fence.c > @@ -0,0 +1,381 @@ > +/* > + * QEMU file-based self-fence mechanism > + * > + * Copyright (c) 2019 Nutanix Inc. All rights reserved. > + * > + * Authors: > + * Felipe Franciosi <fel...@nutanix.com> > + * > + * This library is free software; you can redistribute it and/or > + * modify it under the terms of the GNU Lesser General Public > + * License as published by the Free Software Foundation; either > + * version 2 of the License, or (at your option) any later version. > + * > + * This library is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + * Lesser General Public License for more details. > + * > + * You should have received a copy of the GNU Lesser General Public > + * License along with this library; if not, see > <http://www.gnu.org/licenses/>. > + * > + */ > + > +#include "qemu/osdep.h" > +#include "qapi/error.h" > +#include "qom/object_interfaces.h" > +#include "qemu/filemonitor.h" > +#include "qemu/timer.h" > + > +#include <time.h> > + > +#define TYPE_FILE_FENCE "file-fence" > + > +typedef struct FileFence { > + Object parent_obj; > + > + gchar *dir; > + gchar *file; > + uint32_t qtimeout; > + uint32_t ktimeout; > + int signal; > + > + timer_t ktimer; > + QEMUTimer *qtimer; > + > + QFileMonitor *fm; > + uint64_t id; > +} FileFence; > + > +#define FILE_FENCE(obj) \ > + OBJECT_CHECK(FileFence, (obj), TYPE_FILE_FENCE) > + > +static void > +timer_update(FileFence *ff) > +{ > + struct itimerspec its = { > + .it_value.tv_sec = ff->ktimeout, > + }; > + int err; > + > + if (ff->qtimeout) { > + timer_mod(ff->qtimer, qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + > + ff->qtimeout * 1000); > + } > + > + if (ff->ktimeout) { > + err = timer_settime(ff->ktimer, 0, &its, NULL); > + g_assert(err == 0); > + } > +} > + > +static void > +file_fence_abort_cb(void *opaque) > +{ > + FileFence *ff = opaque; > + printf("Fencing after %u seconds on '%s'\n", > + ff->qtimeout, g_strconcat(ff->dir, "/", ff->file, NULL)); May be error_printf() instead. > + abort(); > +} > + > +static void > +file_fence_watch_cb(int64_t id, QFileMonitorEvent ev, const char *file, > + void *opaque) > +{ > + FileFence *ff = opaque; > + > + if (ev != QFILE_MONITOR_EVENT_ATTRIBUTES) { > + return; > + } > + > + if (g_strcmp0(file, ff->file) != 0) { Does it actually happen? Apparently the code in util/filemonitor-inotify.c already checks for g_str_equal(filename) > + return; > + } > + > + timer_update(ff); > +} > + > +static void > +ktimer_tear(FileFence *ff) > +{ > + int err; > + > + if (ff->ktimer) { > + err = timer_delete(ff->ktimer); > + g_assert(err == 0); > + ff->ktimer = NULL; > + } > +} > + > +static void > +ktimer_setup(FileFence *ff, Error **errp) > +{ > + int err; > + > + struct sigevent sev = { > + .sigev_notify = SIGEV_SIGNAL, > + .sigev_signo = ff->signal ? ff->signal : SIGKILL, > + }; > + > + if (ff->ktimeout == 0) { > + return; > + } > + > + err = timer_create(CLOCK_MONOTONIC, &sev, &ff->ktimer); > + if (err == -1) { > + error_setg(errp, "Error creating kernel timer: %m"); > + return; > + } > +} > + > +static void > +qtimer_tear(FileFence *ff) > +{ > + if (ff->qtimer) { > + timer_del(ff->qtimer); > + timer_free(ff->qtimer); > + } > + ff->qtimer = NULL; > +} > + > +static void > +qtimer_setup(FileFence *ff, Error **errp) > +{ > + QEMUTimer *qtimer; > + > + if (ff->qtimeout == 0) { > + return; > + } > + > + qtimer = timer_new_ms(QEMU_CLOCK_REALTIME, file_fence_abort_cb, ff); > + if (qtimer == NULL) { > + error_setg(errp, "Error creating Qemu timer"); > + return; > + } > + > + ff->qtimer = qtimer; > +} > + > +static void > +watch_tear(FileFence *ff) > +{ > + if (ff->fm) { > + qemu_file_monitor_remove_watch(ff->fm, ff->dir, ff->id); > + qemu_file_monitor_free(ff->fm); > + ff->fm = NULL; > + ff->id = 0; > + } > +} > + > +static void > +watch_setup(FileFence *ff, Error **errp) > +{ > + QFileMonitor *fm; > + int64_t id; > + > + fm = qemu_file_monitor_new(errp); > + if (!fm) { > + return; > + } > + > + id = qemu_file_monitor_add_watch(fm, ff->dir, ff->file, > + file_fence_watch_cb, ff, errp); > + if (id < 0) { > + qemu_file_monitor_free(fm); > + return; > + } > + > + ff->fm = fm; > + ff->id = id; > +} > + > +static void > +file_fence_complete(UserCreatable *obj, Error **errp) > +{ > + FileFence *ff = FILE_FENCE(obj); > + > + if (ff->dir == NULL) { > + error_setg(errp, "A 'file' must be set"); > + return; > + } > + > + if (ff->signal != 0 && ff->ktimeout == 0) { > + error_setg(errp, "Using 'signal' requires 'ktimeout' to be set"); > + return; > + } > + > + if (ff->ktimeout == 0 && ff->qtimeout == 0) { > + error_setg(errp, "One or both of 'ktimeout' or 'qtimeout' must be > set"); > + return; > + } > + > + if (ff->qtimeout >= ff->ktimeout) { > + error_setg(errp, "Using 'qtimeout' >= 'ktimeout' doesn't make > sense"); > + return; > + } > + > + watch_setup(ff, errp); > + if (*errp != NULL) { > + return; > + } > + > + qtimer_setup(ff, errp); > + if (*errp != NULL) { > + goto err_watch; > + } > + > + ktimer_setup(ff, errp); > + if (*errp != NULL) { > + goto err_qtimer; > + } I would rather return success on the setup functions and write: if (!watch_setup(ff, errp) || !qtimer_setup(...) || !...) { return; (no cleanup) } Object creation will fail, and finalize function will be called. > + > + timer_update(ff); > + > + return; > + > +err_qtimer: > + qtimer_tear(ff); > +err_watch: > + watch_tear(ff); > +} > + > +static void > +file_fence_set_signal(Object *obj, const char *value, Error **errp) > +{ > + FileFence *ff = FILE_FENCE(obj); > + gchar *gsig; > + > + if (ff->signal) { > + error_setg(errp, "Signal property already set"); > + return; > + } > + > + gsig = g_ascii_strup(value, -1); > + > + if (g_strcmp0(gsig, "QUIT") == 0) { > + ff->signal = SIGQUIT; > + } else > + if (g_strcmp0(gsig, "KILL") == 0) { > + ff->signal = SIGKILL; > + } else { > + error_setg(errp, "Invalid signal. Must be 'quit' or 'kill'"); > + } Or bail out early for NULL case and use g_ascii_strcasecmp() > + > + g_free(gsig); > +} > + > +static char * > +file_fence_get_signal(Object *obj, Error **errp) > +{ > + FileFence *ff = FILE_FENCE(obj); > + > + switch (ff->signal) { > + case SIGKILL: > + return g_strdup("kill"); > + case SIGQUIT: > + return g_strdup("quit"); > + } > + > + /* Unreachable */ > + abort(); > +} > + > +static void > +file_fence_set_file(Object *obj, const char *value, Error **errp) > +{ > + FileFence *ff = FILE_FENCE(obj); > + gchar *dir, *file; g_autofree > + > + if (ff->dir) { > + error_setg(errp, "File property already set"); > + return; > + } > + > + dir = g_path_get_dirname(value); > + if (g_str_equal(dir, ".")) { > + error_setg(errp, "Path for file-fence must be absolute"); > + return; > + } > + > + file = g_path_get_basename(value); > + if (g_str_equal(file, ".")) { > + error_setg(errp, "Path for file-fence must be a file"); > + g_free(dir); > + return; > + } > + > + ff->dir = dir; > + ff->file = file; g_steal_pointer() > +} > + > +static char * > +file_fence_get_file(Object *obj, Error **errp) > +{ > + FileFence *ff = FILE_FENCE(obj); > + > + if (ff->file) { > + return g_strconcat(ff->dir, "/", ff->file, NULL); Or g_build_filename() > + } > + > + return NULL; > +} > + > +static void > +file_fence_instance_finalize(Object *obj) > +{ > + FileFence *ff = FILE_FENCE(obj); > + > + ktimer_tear(ff); > + qtimer_tear(ff); > + watch_tear(ff); > + > + g_free(ff->file); > + g_free(ff->dir); > +} > + > +static void > +file_fence_instance_init(Object *obj) > +{ > + FileFence *ff = FILE_FENCE(obj); > + > + object_property_add_str(obj, "file", > + file_fence_get_file, > + file_fence_set_file, > + &error_abort); > + object_property_add_str(obj, "signal", > + file_fence_get_signal, > + file_fence_set_signal, > + &error_abort); > + object_property_add_uint32_ptr(obj, "qtimeout", &ff->qtimeout, > + false, &error_abort); > + object_property_add_uint32_ptr(obj, "ktimeout", &ff->ktimeout, > + false, &error_abort); Make them class properties (this will help with -object file-fence,help and such). (fwiw, I have pending patches to support class property description & default values..) > +} > + > +static void > +file_fence_class_init(ObjectClass *klass, void *class_data) > +{ > + UserCreatableClass *ucc = USER_CREATABLE_CLASS(klass); > + ucc->complete = file_fence_complete; > +} > + > +static const TypeInfo file_fence_info = { > + .name = TYPE_FILE_FENCE, > + .parent = TYPE_OBJECT, > + .class_init = file_fence_class_init, > + .instance_size = sizeof(FileFence), > + .instance_init = file_fence_instance_init, > + .instance_finalize = file_fence_instance_finalize, > + .interfaces = (InterfaceInfo[]) { > + { TYPE_USER_CREATABLE }, > + { } > + } > +}; > + > +static void > +register_types(void) > +{ > + type_register_static(&file_fence_info); > +} > + > +type_init(register_types); > diff --git a/qemu-options.hx b/qemu-options.hx > index 65c9473b73..995d3d6abf 100644 > --- a/qemu-options.hx > +++ b/qemu-options.hx > @@ -4929,8 +4929,33 @@ CN=laptop.example.com,O=Example > Home,L=London,ST=London,C=GB > > @end table > > -ETEXI > +@item -object > file-fence,id=@var{id},file=@var{file},qtimeout=@var{qtimeout},ktimeout=@var{ktimeout},signal=@{signal} > + > +Self-fence Qemu if @var{file} is not modified within a given timeout. > + > +Qemu will watch @var{file} for attribute changes. Touching the file works as > a > +heartbeat. This parameter is mandatory. > + > +Fencing happens after @var{qtimeout} or @var{ktimeout} seconds elapse > +without a heartbeat. At least one of these must be specified. Both may be > used. > > +When using @var{qtimeout}, an internal Qemu timer is used. Fencing with > +this method gives Qemu a chance to write a log message indicating which file > +caused the event. If Qemu's main loop is hung for whatever reason, this > method > +won't successfully kill Qemu. > + > +When using @var{ktimeout}, a kernel timer is used. In this case, @var{signal} > +can be 'kill' (for SIGKILL, default) or 'quit' (for SIGQUIT). Using SIGQUIT > may > +be preferred for obtaining core dumps. If Qemu is hung (eg. uninterruptable > +sleep), this method won't successfully kill Qemu. > + > +It is worth noting that even successfully killing Qemu may not be sufficient > to > +completely fence a VM as certain operations like network packets or block > +commands may be pending in the kernel. If that is a concern, systems should > +consider using further fencing mechanisms like hardware watchdogs either in > +addition or in conjunction with this feature for additional protection. > + > +ETEXI > > HXCOMM This is the last statement. Insert new options before this line! > STEXI > -- > 2.20.1 > Code looks fine to me otherwise -- Marc-André Lureau