On Fri, Jun 23, 2023 at 04:31:59PM -0500, Scott Cheloha wrote: > On Tue, Jun 20, 2023 at 08:35:11AM -0600, Theo de Raadt wrote: > > Claudio Jeker <clau...@openbsd.org> wrote: > > > > > On Mon, Jun 19, 2023 at 06:41:14PM -0500, Scott Cheloha wrote: > > > > > On Jun 19, 2023, at 18:07, Theo de Raadt <dera...@openbsd.org> wrote: > > > > > > > > > > ??? Make sure to STOP all kernel profiling before attempting to > > > > > suspend or hibernate your machine. Otherwise I expect it > > > > > will hang. > > > > > > > > > > It is completely acceptable if it produces wrong results, but it must > > > > > not hang the system. > > > > > > > > The hang is present in -current, with or > > > > without this patch. > > > > > > > > I am working to figure it out. > > > > > > I don't think the suspend or hibernate code has any code to disable > > > kernel profiling. This is bad since these code paths are extremly > > > sensitive and try not to do side-effects. > > > > > > So in the suspend/hibernate code path we should disable profiling early > > > on. It makes no sense to try to run gprof collection in those code paths. > > > > Yes, that's right. > > > > It will be somewhere in kern/subr_suspend.c > > > > Be careful that the "stop profiling" and "restart profiling" are at the > > correct places. The sleep_state() function has a bunch of unrolling > > goto's which are not 100% reflexive, so be careful. > > Judging from the blinking light on my laptop, the crash is in the > resume path. > > This patch appears to fix the problem on amd64: > > - Add a new guard variable, gmon_suspended, > - Toggle gmon_suspended off at the top of sleep_state(), and > - Toggle gmon_suspended back on at the bottom of sleep_state(). > > With this applied, I can suspend/resume and hibernate/unhibernate a an > amd64/GENERIC.MP kernel w/ GPROF without issue, even if kgmon(8) has > enabled kernel profiling and increased the effective statclock > frequency.
Here is a cleaner version. - Add mcount_disable(), mcount_enable() to mcount.c. There's no reason to put the interfaces in subr_prof.c, we're only concerned with _mcount() here. - Only call mcount_disable/mcount_enable on GPROF kernels. DDBPROF does something different, it isn't relevant here. - Sync sys/lib/libkern/mcount.c with lib/libc/gmon/mcount.c. As with the prior patch, this seems to fix suspend/resume and hibernate/unhibernate on amd64 GPROF kernels when kernel profiling is activated with kgmon(8). OK? Index: sys/lib/libkern/mcount.c =================================================================== RCS file: /cvs/src/sys/lib/libkern/mcount.c,v retrieving revision 1.14 diff -u -p -r1.14 mcount.c --- sys/lib/libkern/mcount.c 11 Jan 2022 09:21:34 -0000 1.14 +++ sys/lib/libkern/mcount.c 28 Jun 2023 23:01:13 -0000 @@ -33,6 +33,32 @@ #include <sys/param.h> #include <sys/gmon.h> +#ifdef _KERNEL +#ifdef SUSPEND +#include <sys/atomic.h> + +#include <lib/libkern/libkern.h> /* KASSERT */ + +volatile int mcount_disabled; + +void +mcount_disable(void) +{ + KASSERT(CPU_IS_PRIMARY(curcpu())); + mcount_disabled = 1; + membar_producer(); +} + +void +mcount_enable(void) +{ + KASSERT(CPU_IS_PRIMARY(curcpu())); + mcount_disabled = 0; + membar_producer(); +} +#endif /* SUSPEND */ +#endif /* _KERNEL */ + /* * mcount is called on entry to each function compiled with the profiling * switch set. _mcount(), which is declared in a machine-dependent way @@ -63,7 +89,10 @@ _MCOUNT_DECL(u_long frompc, u_long selfp */ if (gmoninit == 0) return; - +#ifdef SUSPEND + if (mcount_disabled) + return; +#endif if ((p = curcpu()->ci_gmon) == NULL) return; #else Index: sys/kern/subr_suspend.c =================================================================== RCS file: /cvs/src/sys/kern/subr_suspend.c,v retrieving revision 1.14 diff -u -p -r1.14 subr_suspend.c --- sys/kern/subr_suspend.c 10 Nov 2022 10:37:40 -0000 1.14 +++ sys/kern/subr_suspend.c 28 Jun 2023 23:01:13 -0000 @@ -26,6 +26,9 @@ #include <sys/mount.h> #include <sys/syscallargs.h> #include <dev/wscons/wsdisplayvar.h> +#ifdef GPROF +#include <sys/gmon.h> +#endif #ifdef HIBERNATE #include <sys/hibernate.h> #endif @@ -63,6 +66,11 @@ top: if (sleep_showstate(v, sleepmode)) return EOPNOTSUPP; + +#ifdef GPROF + /* Keep everyone out of _mcount() until we have fully resumed. */ + mcount_disable(); +#endif #if NWSDISPLAY > 0 wsdisplay_suspend(); #endif @@ -193,6 +201,9 @@ fail_hiballoc: start_periodic_resettodr(); #if NWSDISPLAY > 0 wsdisplay_resume(); +#endif +#ifdef GPROF + mcount_enable(); #endif sys_sync(curproc, NULL, NULL); if (cpu_setperf != NULL) Index: sys/sys/gmon.h =================================================================== RCS file: /cvs/src/sys/sys/gmon.h,v retrieving revision 1.9 diff -u -p -r1.9 gmon.h --- sys/sys/gmon.h 11 Jan 2022 23:59:55 -0000 1.9 +++ sys/sys/gmon.h 28 Jun 2023 23:01:13 -0000 @@ -158,6 +158,10 @@ struct gmonparam { #ifdef _KERNEL extern int gmoninit; /* Is the kernel ready for being profiled? */ +#ifdef SUSPEND +void mcount_disable(void); +void mcount_enable(void); +#endif #else /* !_KERNEL */ #include <sys/cdefs.h> Index: lib/libc/gmon/mcount.c =================================================================== RCS file: /cvs/src/lib/libc/gmon/mcount.c,v retrieving revision 1.16 diff -u -p -r1.16 mcount.c --- lib/libc/gmon/mcount.c 11 Jan 2022 09:21:34 -0000 1.16 +++ lib/libc/gmon/mcount.c 28 Jun 2023 23:01:13 -0000 @@ -31,6 +31,32 @@ #include <sys/types.h> #include <sys/gmon.h> +#ifdef _KERNEL +#ifdef SUSPEND +#include <sys/atomic.h> + +#include <lib/libkern/libkern.h> /* KASSERT */ + +volatile int mcount_disabled; + +void +mcount_disable(void) +{ + KASSERT(CPU_IS_PRIMARY(curcpu())); + mcount_disabled = 1; + membar_producer(); +} + +void +mcount_enable(void) +{ + KASSERT(CPU_IS_PRIMARY(curcpu())); + mcount_disabled = 0; + membar_producer(); +} +#endif /* SUSPEND */ +#endif /* _KERNEL */ + /* * mcount is called on entry to each function compiled with the profiling * switch set. _mcount(), which is declared in a machine-dependent way @@ -61,7 +87,10 @@ _MCOUNT_DECL(u_long frompc, u_long selfp */ if (gmoninit == 0) return; - +#ifdef SUSPEND + if (mcount_disabled) + return; +#endif if ((p = curcpu()->ci_gmon) == NULL) return; #else