Module Name: src Committed By: riastradh Date: Fri Jul 7 12:34:50 UTC 2023
Modified Files: src/sys/kern: files.kern init_main.c kern_clock.c kern_cpu.c src/sys/sys: cpu_data.h Added Files: src/share/man/man9: heartbeat.9 src/sys/kern: kern_heartbeat.c src/sys/sys: heartbeat.h Log Message: heartbeat(9): New mechanism to check progress of kernel. This uses hard interrupts to check progress of low-priority soft interrupts, and one CPU to check progress of another CPU. If no progress has been made after a configurable number of seconds (kern.heartbeat.max_period, default 15), then the system panics -- preferably on the CPU that is stuck so we get a stack trace in dmesg of where it was stuck, but if the stuckness was detected by another CPU and the stuck CPU doesn't acknowledge the request to panic within one second, the detecting CPU panics instead. This doesn't supplant hardware watchdog timers. It is possible for hard interrupts to be stuck on all CPUs for some reason too; in that case heartbeat(9) has no opportunity to complete. Downside: heartbeat(9) relies on hardclock to run at a reasonably consistent rate, which might cause trouble for the glorious tickless future. However, it could be adapted to take a parameter for an approximate number of units that have elapsed since the last call on the current CPU, rather than treating that as a constant 1. XXX kernel revbump -- changes struct cpu_info layout To generate a diff of this commit: cvs rdiff -u -r0 -r1.1 src/share/man/man9/heartbeat.9 cvs rdiff -u -r1.57 -r1.58 src/sys/kern/files.kern cvs rdiff -u -r1.541 -r1.542 src/sys/kern/init_main.c cvs rdiff -u -r1.149 -r1.150 src/sys/kern/kern_clock.c cvs rdiff -u -r1.94 -r1.95 src/sys/kern/kern_cpu.c cvs rdiff -u -r0 -r1.1 src/sys/kern/kern_heartbeat.c cvs rdiff -u -r1.52 -r1.53 src/sys/sys/cpu_data.h cvs rdiff -u -r0 -r1.1 src/sys/sys/heartbeat.h Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/sys/kern/files.kern diff -u src/sys/kern/files.kern:1.57 src/sys/kern/files.kern:1.58 --- src/sys/kern/files.kern:1.57 Sun Sep 19 15:51:27 2021 +++ src/sys/kern/files.kern Fri Jul 7 12:34:50 2023 @@ -1,13 +1,15 @@ -# $NetBSD: files.kern,v 1.57 2021/09/19 15:51:27 thorpej Exp $ +# $NetBSD: files.kern,v 1.58 2023/07/07 12:34:50 riastradh Exp $ # # kernel sources # define kern: cprng_fast, machdep, uvm +defflag opt_heartbeat.h HEARTBEAT defflag opt_kern.h KERN defflag opt_script.h SETUIDSCRIPTS FDSCRIPTS defflag KASLR defparam opt_cnmagic.h CNMAGIC +defparam heartbeat.h HEARTBEAT_MAX_PERIOD_DEFAULT file conf/debugsyms.c kern file conf/param.c kern @@ -48,6 +50,7 @@ file kern/kern_exec.c kern file kern/kern_exit.c kern file kern/kern_fork.c kern file kern/kern_idle.c kern +file kern/kern_heartbeat.c kern & heartbeat file kern/kern_hook.c kern file kern/kern_kthread.c kern file kern/kern_ktrace.c ktrace Index: src/sys/kern/init_main.c diff -u src/sys/kern/init_main.c:1.541 src/sys/kern/init_main.c:1.542 --- src/sys/kern/init_main.c:1.541 Wed Oct 26 23:20:47 2022 +++ src/sys/kern/init_main.c Fri Jul 7 12:34:50 2023 @@ -1,4 +1,4 @@ -/* $NetBSD: init_main.c,v 1.541 2022/10/26 23:20:47 riastradh Exp $ */ +/* $NetBSD: init_main.c,v 1.542 2023/07/07 12:34:50 riastradh Exp $ */ /*- * Copyright (c) 2008, 2009, 2019 The NetBSD Foundation, Inc. @@ -97,10 +97,11 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: init_main.c,v 1.541 2022/10/26 23:20:47 riastradh Exp $"); +__KERNEL_RCSID(0, "$NetBSD: init_main.c,v 1.542 2023/07/07 12:34:50 riastradh Exp $"); #include "opt_cnmagic.h" #include "opt_ddb.h" +#include "opt_heartbeat.h" #include "opt_inet.h" #include "opt_ipsec.h" #include "opt_modular.h" @@ -199,6 +200,7 @@ extern void *_binary_splash_image_end; #include <sys/cprng.h> #include <sys/psref.h> #include <sys/radixtree.h> +#include <sys/heartbeat.h> #include <sys/syscall.h> #include <sys/syscallargs.h> @@ -557,6 +559,14 @@ main(void) /* Once all CPUs are detected, initialize the per-CPU cprng_fast. */ cprng_fast_init(); +#ifdef HEARTBEAT + /* + * Now that softints can be established, start monitoring + * system heartbeat on all CPUs. + */ + heartbeat_start(); +#endif + ssp_init(); ubc_init(); /* must be after autoconfig */ Index: src/sys/kern/kern_clock.c diff -u src/sys/kern/kern_clock.c:1.149 src/sys/kern/kern_clock.c:1.150 --- src/sys/kern/kern_clock.c:1.149 Fri Jun 30 21:42:05 2023 +++ src/sys/kern/kern_clock.c Fri Jul 7 12:34:50 2023 @@ -1,4 +1,4 @@ -/* $NetBSD: kern_clock.c,v 1.149 2023/06/30 21:42:05 riastradh Exp $ */ +/* $NetBSD: kern_clock.c,v 1.150 2023/07/07 12:34:50 riastradh Exp $ */ /*- * Copyright (c) 2000, 2004, 2006, 2007, 2008 The NetBSD Foundation, Inc. @@ -69,11 +69,12 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: kern_clock.c,v 1.149 2023/06/30 21:42:05 riastradh Exp $"); +__KERNEL_RCSID(0, "$NetBSD: kern_clock.c,v 1.150 2023/07/07 12:34:50 riastradh Exp $"); #ifdef _KERNEL_OPT #include "opt_dtrace.h" #include "opt_gprof.h" +#include "opt_heartbeat.h" #include "opt_multiprocessor.h" #endif @@ -92,6 +93,7 @@ __KERNEL_RCSID(0, "$NetBSD: kern_clock.c #include <sys/cpu.h> #include <sys/atomic.h> #include <sys/rndsource.h> +#include <sys/heartbeat.h> #ifdef GPROF #include <sys/gmon.h> @@ -335,6 +337,13 @@ hardclock(struct clockframe *frame) tc_ticktock(); } +#ifdef HEARTBEAT + /* + * Make sure the CPUs and timecounter are making progress. + */ + heartbeat(); +#endif + /* * Update real-time timeout queue. */ Index: src/sys/kern/kern_cpu.c diff -u src/sys/kern/kern_cpu.c:1.94 src/sys/kern/kern_cpu.c:1.95 --- src/sys/kern/kern_cpu.c:1.94 Sun Feb 26 07:13:55 2023 +++ src/sys/kern/kern_cpu.c Fri Jul 7 12:34:50 2023 @@ -1,4 +1,4 @@ -/* $NetBSD: kern_cpu.c,v 1.94 2023/02/26 07:13:55 skrll Exp $ */ +/* $NetBSD: kern_cpu.c,v 1.95 2023/07/07 12:34:50 riastradh Exp $ */ /*- * Copyright (c) 2007, 2008, 2009, 2010, 2012, 2019 The NetBSD Foundation, Inc. @@ -60,10 +60,11 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: kern_cpu.c,v 1.94 2023/02/26 07:13:55 skrll Exp $"); +__KERNEL_RCSID(0, "$NetBSD: kern_cpu.c,v 1.95 2023/07/07 12:34:50 riastradh Exp $"); #ifdef _KERNEL_OPT #include "opt_cpu_ucode.h" +#include "opt_heartbeat.h" #endif #include <sys/param.h> @@ -85,6 +86,7 @@ __KERNEL_RCSID(0, "$NetBSD: kern_cpu.c,v #include <sys/namei.h> #include <sys/callout.h> #include <sys/pcu.h> +#include <sys/heartbeat.h> #include <uvm/uvm_extern.h> @@ -377,6 +379,10 @@ fail: s = splsched(); spc->spc_flags &= ~SPCF_OFFLINE; splx(s); + +#ifdef HEARTBEAT + heartbeat_suspend(); +#endif } static void @@ -385,6 +391,10 @@ cpu_xc_online(struct cpu_info *ci, void struct schedstate_percpu *spc; int s; +#ifdef HEARTBEAT + heartbeat_resume(); +#endif + spc = &ci->ci_schedstate; s = splsched(); spc->spc_flags &= ~SPCF_OFFLINE; Index: src/sys/sys/cpu_data.h diff -u src/sys/sys/cpu_data.h:1.52 src/sys/sys/cpu_data.h:1.53 --- src/sys/sys/cpu_data.h:1.52 Sun Jun 14 21:41:42 2020 +++ src/sys/sys/cpu_data.h Fri Jul 7 12:34:50 2023 @@ -1,4 +1,4 @@ -/* $NetBSD: cpu_data.h,v 1.52 2020/06/14 21:41:42 ad Exp $ */ +/* $NetBSD: cpu_data.h,v 1.53 2023/07/07 12:34:50 riastradh Exp $ */ /*- * Copyright (c) 2004, 2006, 2007, 2008, 2019, 2020 The NetBSD Foundation, Inc. @@ -188,6 +188,11 @@ struct cpu_data { kcpuset_t *cpu_kcpuset; /* kcpuset_t of this cpu only */ struct lwp * volatile cpu_pcu_curlwp[PCU_UNIT_COUNT]; int64_t cpu_counts[CPU_COUNT_MAX];/* per-CPU counts */ + + unsigned cpu_heartbeat_count; /* # of heartbeats */ + unsigned cpu_heartbeat_uptime_cache; /* last time_uptime */ + unsigned cpu_heartbeat_uptime_stamp; /* heartbeats since + * uptime changed */ }; #define ci_schedstate ci_data.cpu_schedstate @@ -216,6 +221,10 @@ struct cpu_data { #define ci_faultrng ci_data.cpu_faultrng #define ci_counts ci_data.cpu_counts +#define ci_heartbeat_count ci_data.cpu_heartbeat_count +#define ci_heartbeat_uptime_cache ci_data.cpu_heartbeat_uptime_cache +#define ci_heartbeat_uptime_stamp ci_data.cpu_heartbeat_uptime_stamp + #define cpu_nsyscall cpu_counts[CPU_COUNT_NSYSCALL] #define cpu_ntrap cpu_counts[CPU_COUNT_NTRAP] #define cpu_nswtch cpu_counts[CPU_COUNT_NSWTCH] Added files: Index: src/share/man/man9/heartbeat.9 diff -u /dev/null src/share/man/man9/heartbeat.9:1.1 --- /dev/null Fri Jul 7 12:34:50 2023 +++ src/share/man/man9/heartbeat.9 Fri Jul 7 12:34:49 2023 @@ -0,0 +1,169 @@ +.\" $NetBSD: heartbeat.9,v 1.1 2023/07/07 12:34:49 riastradh Exp $ +.\" +.\" Copyright (c) 2023 The NetBSD Foundation, Inc. +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS +.\" ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +.\" TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +.\" PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS +.\" BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +.\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +.\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +.\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +.\" POSSIBILITY OF SUCH DAMAGE. +.\" +.Dd July 6, 2023 +.Dt HEARTBEAT 9 +.Os +.\""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" +.Sh NAME +.Nm heartbeat +.Nd periodic checks to ensure CPUs are making progress +.\""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" +.Sh SYNOPSIS +.Fd "options HEARTBEAT" +.Fd "options HEARTBEAT_MAX_PERIOD_DEFAULT=15" +.\" +.In sys/heartbeat.h +.\" +.Ft void +.Fn heartbeat_start void +.Ft void +.Fn heartbeat void +.Ft void +.Fn heartbeat_suspend void +.Ft void +.Fn heartbeat_resume void +.Fd "#ifdef DDB" +.Ft void +.Fn heartbeat_dump void +.Fd "#endif" +.\""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" +.Sh DESCRIPTION +The +.Nm +subsystem verifies that soft interrupts +.Pq Xr softint 9 +and the system +.Xr timecounter 9 +are making progress over time, and panics if they appear stuck. +.Pp +The number of seconds before +.Nm +panics without progress is controlled by the sysctl knob +.Li kern.heartbeat.max_period , +which defaults to 15. +If set to zero, heartbeat checks are disabled. +.Pp +The periodic hardware timer interrupt handler calls +.Fn heartbeat +every tick on each CPU. +Once per second +.Po +i.e., every +.Xr hz 9 +ticks +.Pc , +.Fn heartbeat +schedules a soft interrupt at priority +.Dv SOFTINT_CLOCK +to advance the current CPU's view of +.Xr time_uptime 9 . +.Pp +.Fn heartbeat +checks whether +.Xr time_uptime 9 +has changed, to see if either the +.Xr timecounter 9 +or soft intrrupts on the current CPU are stuck. +If it hasn't advanced within +.Li kern.heartbeat.max_period +seconds worth of ticks, or if it has updated and the current CPU's view +of it hasn't been updated by more than +.Li kern.heartbeat.max_period +seconds, then +.Fn heartbeat +panics. +.Pp +.Fn heartbeat +also checks whether the next online CPU has advanced its view of +.Xr time_uptime 9 , +to see if soft interrupts +.Pq including Xr callout 9 +on that CPU are stuck. +If it hasn't updated within +.Li kern.heartbeat.max_period +seconds, +.Fn heartbeat +sends an +.Xr ipi 9 +to panic on that CPU. +If that CPU has not acknowledged the +.Xr ipi 9 +within one second, +.Fn heartbeat +panics on the current CPU instead. +.\""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" +.Sh FUNCTIONS +.Bl -tag -width Fn +.It Fn heartbeat +Check for timecounter and soft interrupt progress on this CPU and on +another CPU, and schedule a soft interrupt to advance this CPU's view +of timecounter progress. +.Pp +Called by +.Xr hardclock 9 +periodically. +.It Fn heartbeat_dump +Print all the heartbeat counter, uptime cache, and uptime cache +timestamp (in units of heartbeats) to the console. +.Pp +Can be invoked from +.Xr ddb 9 +by +.Ql call heartbeat_dump . +.It Fn heartbeat_resume +Resume heartbeat monitoring of the current CPU. +.Pp +Called after a CPU has started running but before it has been +marked online. +.It Fn heartbeat_start +Start monitoring heartbeats systemwide. +.Pp +Called by +.Xr main 9 +as soon as soft interrupts can be established. +.It Fn heartbeat_suspend +Suspend heartbeat monitoring of the current CPU. +.Pp +Called after the current CPU has been marked offline but before it has +stopped running. +.El +.\""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" +.Sh CODE REFERENCES +The +.Nm +subsystem is implemented in +.Pa sys/kern/kern_heartbeat.c . +.\""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" +.Sh SEE ALSO +.Xr wdogctl 8 , +.Xr swwdog 4 +.\""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" +.Sh HISTORY +The +.Nm +subsystem first appeared in +.Nx 11.0 . Index: src/sys/kern/kern_heartbeat.c diff -u /dev/null src/sys/kern/kern_heartbeat.c:1.1 --- /dev/null Fri Jul 7 12:34:50 2023 +++ src/sys/kern/kern_heartbeat.c Fri Jul 7 12:34:50 2023 @@ -0,0 +1,656 @@ +/* $NetBSD: kern_heartbeat.c,v 1.1 2023/07/07 12:34:50 riastradh Exp $ */ + +/*- + * Copyright (c) 2023 The NetBSD Foundation, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * heartbeat(9) -- periodic checks to ensure CPUs are making progress + * + * Manual tests to run when changing this file. Magic numbers are for + * evbarm; adjust for other platforms. Tests involving cpuctl + * online/offline assume a 2-CPU system -- for full testing on a >2-CPU + * system, offline all but one CPU. + * + * 1. cpuctl offline 0 + * sleep 20 + * cpuctl online 0 + * + * 2. cpuctl offline 1 + * sleep 20 + * cpuctl online 1 + * + * 3. cpuctl offline 0 + * sysctl -w kern.heartbeat.max_period=5 + * sleep 10 + * sysctl -w kern.heartbeat.max_period=0 + * sleep 10 + * sysctl -w kern.heartbeat.max_period=5 + * sleep 10 + * cpuctl online 0 + * + * 4. sysctl -w debug.crashme_enable=1 + * sysctl -w debug.crashme.spl_spinout=1 # IPL_SOFTCLOCK + * # verify system panics after 15sec + * + * 5. sysctl -w debug.crashme_enable=1 + * sysctl -w debug.crashme.spl_spinout=6 # IPL_SCHED + * # verify system panics after 15sec + * + * 6. cpuctl offline 0 + * sysctl -w debug.crashme_enable=1 + * sysctl -w debug.crashme.spl_spinout=1 # IPL_SOFTCLOCK + * # verify system panics after 15sec + * + * 7. cpuctl offline 0 + * sysctl -w debug.crashme_enable=1 + * sysctl -w debug.crashme.spl_spinout=5 # IPL_VM + * # verify system panics after 15sec + * + * # Not this -- IPL_SCHED and IPL_HIGH spinout on a single CPU + * # require a hardware watchdog timer. + * #cpuctl offline 0 + * #sysctl -w debug.crashme_enable + * #sysctl -w debug.crashme.spl_spinout=6 # IPL_SCHED + * # hope watchdog timer kicks in + */ + +#include <sys/cdefs.h> +__KERNEL_RCSID(0, "$NetBSD: kern_heartbeat.c,v 1.1 2023/07/07 12:34:50 riastradh Exp $"); + +#ifdef _KERNEL_OPT +#include "opt_ddb.h" +#include "opt_heartbeat.h" +#endif + +#include "heartbeat.h" + +#include <sys/param.h> +#include <sys/types.h> + +#include <sys/atomic.h> +#include <sys/cpu.h> +#include <sys/errno.h> +#include <sys/heartbeat.h> +#include <sys/ipi.h> +#include <sys/mutex.h> +#include <sys/sysctl.h> +#include <sys/systm.h> +#include <sys/xcall.h> + +#ifdef DDB +#include <ddb/ddb.h> +#endif + +/* + * Global state. + * + * heartbeat_lock serializes access to heartbeat_max_period_secs + * and heartbeat_max_period_ticks. Two separate variables so we + * can avoid multiplication or division in the heartbeat routine. + * + * heartbeat_sih is stable after initialization in + * heartbeat_start. + */ +kmutex_t heartbeat_lock __cacheline_aligned; +unsigned heartbeat_max_period_secs __read_mostly; +unsigned heartbeat_max_period_ticks __read_mostly; + +void *heartbeat_sih __read_mostly; + +/* + * heartbeat_suspend() + * + * Suspend heartbeat monitoring of the current CPU. + * + * Called after the current CPU has been marked offline but before + * it has stopped running. Caller must have preemption disabled. + */ +void +heartbeat_suspend(void) +{ + + KASSERT(kpreempt_disabled()); + + /* + * Nothing to do -- we just check the SPCF_OFFLINE flag. + */ +} + +/* + * heartbeat_resume() + * + * Resume heartbeat monitoring of the current CPU. + * + * Called after the current CPU has started running but before it + * has been marked online. Also used internally when starting up + * heartbeat monitoring at boot or when the maximum period is set + * from zero to nonzero. Caller must have preemption disabled. + */ +void +heartbeat_resume(void) +{ + struct cpu_info *ci = curcpu(); + int s; + + KASSERT(kpreempt_disabled()); + + /* + * Block heartbeats while we reset the state so we don't + * spuriously think we had a heart attack in the middle of + * resetting the count and the uptime stamp. + */ + s = splsched(); + ci->ci_heartbeat_count = 0; + ci->ci_heartbeat_uptime_cache = atomic_load_relaxed(&time_uptime); + ci->ci_heartbeat_uptime_stamp = 0; + splx(s); +} + +/* + * heartbeat_reset_xc(a, b) + * + * Cross-call handler to reset heartbeat state just prior to + * enabling heartbeat checks. + */ +static void +heartbeat_reset_xc(void *a, void *b) +{ + + heartbeat_resume(); +} + +/* + * set_max_period(max_period) + * + * Set the maximum period, in seconds, for heartbeat checks. + * + * - If max_period is zero, disable them. + * + * - If the max period was zero and max_period is nonzero, ensure + * all CPUs' heartbeat uptime caches are up-to-date before + * re-enabling them. + * + * max_period must be below UINT_MAX/4/hz to avoid arithmetic + * overflow and give room for slop. + * + * Caller must hold heartbeat_lock. + */ +static void +set_max_period(unsigned max_period) +{ + + KASSERTMSG(max_period <= UINT_MAX/4/hz, + "max_period=%u must not exceed UINT_MAX/4/hz=%u (hz=%u)", + max_period, UINT_MAX/4/hz, hz); + KASSERT(mutex_owned(&heartbeat_lock)); + + /* + * If we're enabling heartbeat checks, make sure we have a + * reasonably up-to-date time_uptime cache on all CPUs so we + * don't think we had an instant heart attack. + */ + if (heartbeat_max_period_secs == 0 && max_period != 0) + xc_wait(xc_broadcast(0, &heartbeat_reset_xc, NULL, NULL)); + + /* + * Once the heartbeat state has been updated on all (online) + * CPUs, set the period. At this point, heartbeat checks can + * begin. + */ + atomic_store_relaxed(&heartbeat_max_period_secs, max_period); + atomic_store_relaxed(&heartbeat_max_period_ticks, max_period*hz); +} + +/* + * heartbeat_max_period_ticks(SYSCTLFN_ARGS) + * + * Sysctl handler for sysctl kern.heartbeat.max_period. Verifies + * it lies within a reasonable interval and sets it. + */ +static int +heartbeat_max_period_sysctl(SYSCTLFN_ARGS) +{ + struct sysctlnode node; + unsigned max_period; + int error; + + mutex_enter(&heartbeat_lock); + + max_period = heartbeat_max_period_secs; + node = *rnode; + node.sysctl_data = &max_period; + error = sysctl_lookup(SYSCTLFN_CALL(&node)); + if (error || newp == NULL) + goto out; + + /* + * Ensure there's plenty of slop between heartbeats. + */ + if (max_period > UINT_MAX/4/hz) { + error = EOVERFLOW; + goto out; + } + + /* + * Success! Set the period. This enables heartbeat checks if + * we went from zero period to nonzero period, or disables them + * if the other way around. + */ + set_max_period(max_period); + error = 0; + +out: mutex_exit(&heartbeat_lock); + return error; +} + +/* + * sysctl_heartbeat_setup() + * + * Set up the kern.heartbeat.* sysctl subtree. + */ +SYSCTL_SETUP(sysctl_heartbeat_setup, "sysctl kern.heartbeat setup") +{ + const struct sysctlnode *rnode; + int error; + + mutex_init(&heartbeat_lock, MUTEX_DEFAULT, IPL_NONE); + + /* kern.heartbeat */ + error = sysctl_createv(NULL, 0, NULL, &rnode, + CTLFLAG_PERMANENT, + CTLTYPE_NODE, "heartbeat", + SYSCTL_DESCR("Kernel heartbeat parameters"), + NULL, 0, NULL, 0, + CTL_KERN, CTL_CREATE, CTL_EOL); + if (error) { + printf("%s: failed to create kern.heartbeat: %d\n", + __func__, error); + return; + } + + /* kern.heartbeat.max_period */ + error = sysctl_createv(NULL, 0, &rnode, NULL, + CTLFLAG_PERMANENT|CTLFLAG_READWRITE, + CTLTYPE_INT, "max_period", + SYSCTL_DESCR("Max seconds between heartbeats before panic"), + &heartbeat_max_period_sysctl, 0, NULL, 0, + CTL_CREATE, CTL_EOL); + if (error) { + printf("%s: failed to create kern.heartbeat.max_period: %d\n", + __func__, error); + return; + } +} + +/* + * heartbeat_intr(cookie) + * + * Soft interrupt handler to update the local CPU's view of the + * system uptime. This runs at the same priority level as + * callouts, so if callouts are stuck on this CPU, it won't run, + * and eventually another CPU will notice that this one is stuck. + * + * Don't do spl* here -- keep it to a minimum so if anything goes + * wrong we don't end up with hard interrupts blocked and unable + * to detect a missed heartbeat. + */ +static void +heartbeat_intr(void *cookie) +{ + unsigned count = atomic_load_relaxed(&curcpu()->ci_heartbeat_count); + unsigned uptime = atomic_load_relaxed(&time_uptime); + + atomic_store_relaxed(&curcpu()->ci_heartbeat_uptime_stamp, count); + atomic_store_relaxed(&curcpu()->ci_heartbeat_uptime_cache, uptime); +} + +/* + * heartbeat_start() + * + * Start system heartbeat monitoring. + */ +void +heartbeat_start(void) +{ + const unsigned max_period = HEARTBEAT_MAX_PERIOD_DEFAULT; + + /* + * Establish a softint so we can schedule it once ready. This + * should be at the lowest softint priority level so that we + * ensure all softint priorities are making progress. + */ + heartbeat_sih = softint_establish(SOFTINT_CLOCK|SOFTINT_MPSAFE, + &heartbeat_intr, NULL); + + /* + * Now that the softint is established, kick off heartbeat + * monitoring with the default period. This will initialize + * the per-CPU state to an up-to-date cache of time_uptime. + */ + mutex_enter(&heartbeat_lock); + set_max_period(max_period); + mutex_exit(&heartbeat_lock); +} + +/* + * defibrillator(cookie) + * + * IPI handler for defibrillation. If the CPU's heart has stopped + * beating normally, but the CPU can still execute things, + * acknowledge the IPI to the doctor and then panic so we at least + * get a stack trace from whatever the current CPU is stuck doing, + * if not a core dump. + * + * (This metaphor is a little stretched, since defibrillation is + * usually administered when the heart is beating errattically but + * hasn't stopped, and causes the heart to stop temporarily, and + * one hopes it is not fatal. But we're (software) engineers, so + * we can stretch metaphors like silly putty in a blender.) + */ +static void +defibrillator(void *cookie) +{ + bool *ack = cookie; + + atomic_store_relaxed(ack, true); + panic("%s[%d %s]: heart stopped beating", cpu_name(curcpu()), + curlwp->l_lid, + curlwp->l_name ? curlwp->l_name : curproc->p_comm); +} + +/* + * defibrillate(ci, unsigned d) + * + * The patient CPU ci's heart has stopped beating after d seconds. + * Force the patient CPU ci to panic, or panic on this CPU if the + * patient CPU doesn't respond within 1sec. + */ +static void __noinline +defibrillate(struct cpu_info *ci, unsigned d) +{ + bool ack = false; + ipi_msg_t msg = { + .func = &defibrillator, + .arg = &ack, + }; + unsigned countdown = 1000; /* 1sec */ + + KASSERT(kpreempt_disabled()); + + /* + * First notify the console that the patient CPU's heart seems + * to have stopped beating. + */ + printf("%s: found %s heart stopped beating after %u seconds\n", + cpu_name(curcpu()), cpu_name(ci), d); + + /* + * Next, give the patient CPU a chance to panic, so we get a + * stack trace on that CPU even if we don't get a crash dump. + */ + ipi_unicast(&msg, ci); + + /* + * Busy-wait up to 1sec for the patient CPU to print a stack + * trace and panic. If the patient CPU acknowledges the IPI, + * or if we're panicking anyway, just give up and stop here -- + * the system is coming down soon and we should avoid getting + * in the way. + */ + while (countdown --> 0) { + if (atomic_load_relaxed(&ack) || + atomic_load_relaxed(&panicstr) != NULL) + return; + DELAY(1000); /* 1ms */ + } + + /* + * The patient CPU failed to acknowledge the panic request. + * Panic now; with any luck, we'll get a crash dump. + */ + panic("%s: found %s heart stopped beating and unresponsive", + cpu_name(curcpu()), cpu_name(ci)); +} + +/* + * select_patient() + * + * Select another CPU to check the heartbeat of. Returns NULL if + * there are no other online CPUs. Never returns curcpu(). + * Caller must have kpreemption disabled. + */ +static struct cpu_info * +select_patient(void) +{ + CPU_INFO_ITERATOR cii; + struct cpu_info *first = NULL, *patient = NULL, *ci; + bool passedcur = false; + + KASSERT(kpreempt_disabled()); + + /* + * In the iteration order of all CPUs, find the next online CPU + * after curcpu(), or the first online one if curcpu() is last + * in the iteration order. + */ + for (CPU_INFO_FOREACH(cii, ci)) { + if (ci->ci_schedstate.spc_flags & SPCF_OFFLINE) + continue; + if (passedcur) { + /* + * (...|curcpu()|ci|...) + * + * Found the patient right after curcpu(). + */ + KASSERT(patient != ci); + patient = ci; + break; + } + if (ci == curcpu()) { + /* + * (...|prev|ci=curcpu()|next|...) + * + * Note that we want next (or first, if there's + * nothing after curcpu()). + */ + passedcur = true; + continue; + } + if (first == NULL) { + /* + * (ci|...|curcpu()|...) + * + * Record ci as first in case there's nothing + * after curcpu(). + */ + first = ci; + continue; + } + } + + /* + * If we hit the end, wrap around to the beginning. + */ + if (patient == NULL) { + KASSERT(passedcur); + patient = first; + } + + return patient; +} + +/* + * heartbeat() + * + * 1. Count a heartbeat on the local CPU. + * + * 2. Panic if the system uptime doesn't seem to have advanced in + * a while. + * + * 3. Panic if the soft interrupt on this CPU hasn't advanced the + * local view of the system uptime. + * + * 4. Schedule the soft interrupt to advance the local view of the + * system uptime. + * + * 5. Select another CPU to check the heartbeat of. + * + * 6. Panic if the other CPU hasn't advanced its view of the + * system uptime in a while. + */ +void +heartbeat(void) +{ + unsigned period_ticks, period_secs; + unsigned count, uptime, cache, stamp, d; + struct cpu_info *patient; + + KASSERT(kpreempt_disabled()); + + period_ticks = atomic_load_relaxed(&heartbeat_max_period_ticks); + period_secs = atomic_load_relaxed(&heartbeat_max_period_secs); + if (__predict_false(period_ticks == 0) || + __predict_false(period_secs == 0) || + __predict_false(curcpu()->ci_schedstate.spc_flags & SPCF_OFFLINE)) + return; + + /* + * Count a heartbeat on this CPU. + */ + count = curcpu()->ci_heartbeat_count++; + + /* + * If the uptime hasn't changed, make sure that we haven't + * counted too many of our own heartbeats since the uptime last + * changed, and stop here -- we only do the cross-CPU work once + * per second. + */ + uptime = atomic_load_relaxed(&time_uptime); + cache = atomic_load_relaxed(&curcpu()->ci_heartbeat_uptime_cache); + if (__predict_true(cache == uptime)) { + /* + * Timecounter hasn't advanced by more than a second. + * Make sure the timecounter isn't stuck according to + * our heartbeats. + * + * Our own heartbeat count can't roll back, and + * time_uptime should be updated before it wraps + * around, so d should never go negative; hence no + * check for d < UINT_MAX/2. + */ + stamp = + atomic_load_relaxed(&curcpu()->ci_heartbeat_uptime_stamp); + d = count - stamp; + if (__predict_false(d > period_ticks)) { + panic("%s: time has not advanced in %u heartbeats", + cpu_name(curcpu()), d); + } + return; + } + + /* + * If the uptime has changed, make sure that it hasn't changed + * so much that softints must be stuck on this CPU. Since + * time_uptime is monotonic, this can't go negative, hence no + * check for d < UINT_MAX/2. + * + * This uses the hard timer interrupt handler on the current + * CPU to ensure soft interrupts at all priority levels have + * made progress. + */ + d = uptime - cache; + if (__predict_false(d > period_secs)) { + panic("%s: softints stuck for %u seconds", + cpu_name(curcpu()), d); + } + + /* + * Schedule a softint to update our cache of the system uptime + * so the next call to heartbeat, on this or another CPU, can + * detect progress on this one. + */ + softint_schedule(heartbeat_sih); + + /* + * Select a patient to check the heartbeat of. If there's no + * other online CPU, nothing to do. + */ + patient = select_patient(); + if (patient == NULL) + return; + + /* + * Verify that time is advancing on the patient CPU. If the + * delta exceeds UINT_MAX/2, that means it is already ahead by + * a little on the other CPU, and the subtraction went + * negative, which is OK. If the CPU has been + * offlined since we selected it, no worries. + * + * This uses the current CPU to ensure the other CPU has made + * progress, even if the other CPU's hard timer interrupt + * handler is stuck for some reason. + * + * XXX Maybe confirm it hasn't gone negative by more than + * max_period? + */ + d = uptime - atomic_load_relaxed(&patient->ci_heartbeat_uptime_cache); + if (__predict_false(d > period_secs) && + __predict_false(d < UINT_MAX/2) && + ((patient->ci_schedstate.spc_flags & SPCF_OFFLINE) == 0)) + defibrillate(patient, d); +} + +/* + * heartbeat_dump() + * + * Print the heartbeat data of all CPUs. Can be called from ddb. + */ +#ifdef DDB +static unsigned +db_read_unsigned(const unsigned *p) +{ + unsigned x; + + db_read_bytes((db_addr_t)p, sizeof(x), (char *)&x); + + return x; +} + +void +heartbeat_dump(void) +{ + struct cpu_info *ci; + + db_printf("Heartbeats:\n"); + for (ci = db_cpu_first(); ci != NULL; ci = db_cpu_next(ci)) { + db_printf("cpu%u: count %u uptime %u stamp %u\n", + db_read_unsigned(&ci->ci_index), + db_read_unsigned(&ci->ci_heartbeat_count), + db_read_unsigned(&ci->ci_heartbeat_uptime_cache), + db_read_unsigned(&ci->ci_heartbeat_uptime_stamp)); + } +} +#endif Index: src/sys/sys/heartbeat.h diff -u /dev/null src/sys/sys/heartbeat.h:1.1 --- /dev/null Fri Jul 7 12:34:50 2023 +++ src/sys/sys/heartbeat.h Fri Jul 7 12:34:50 2023 @@ -0,0 +1,53 @@ +/* $NetBSD: heartbeat.h,v 1.1 2023/07/07 12:34:50 riastradh Exp $ */ + +/*- + * Copyright (c) 2023 The NetBSD Foundation, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _SYS_HEARTBEAT_H +#define _SYS_HEARTBEAT_H + +#ifndef _KERNEL +#error No user-serviceable parts in this heart. +#endif + +#ifdef _KERNEL_OPT +#include "opt_heartbeat.h" +#endif + +#ifdef HEARTBEAT + +void heartbeat_start(void); + +void heartbeat(void); + +void heartbeat_suspend(void); +void heartbeat_resume(void); + +void heartbeat_dump(void); + +#endif + +#endif /* _SYS_HEARTBEAT_H */