On Tue, Jun 20, 2023 at 08:35:11AM -0600, Theo de Raadt wrote:
> Claudio Jeker <clau...@openbsd.org> wrote:
> 
> > On Mon, Jun 19, 2023 at 06:41:14PM -0500, Scott Cheloha wrote:
> > > > On Jun 19, 2023, at 18:07, Theo de Raadt <dera...@openbsd.org> wrote:
> > > > 
> > > > ???        Make sure to STOP all kernel profiling before attempting to
> > > >        suspend or hibernate your machine.  Otherwise I expect it
> > > >        will hang.
> > > > 
> > > > It is completely acceptable if it produces wrong results, but it must
> > > > not hang the system.
> > > 
> > > The hang is present in -current, with or
> > > without this patch.
> > > 
> > > I am working to figure it out.
> > 
> > I don't think the suspend or hibernate code has any code to disable
> > kernel profiling. This is bad since these code paths are extremly
> > sensitive and try not to do side-effects.
> > 
> > So in the suspend/hibernate code path we should disable profiling early
> > on. It makes no sense to try to run gprof collection in those code paths.
> 
> Yes, that's right.
> 
> It will be somewhere in kern/subr_suspend.c
> 
> Be careful that the "stop profiling" and "restart profiling" are at the
> correct places.  The sleep_state() function has a bunch of unrolling
> goto's which are not 100% reflexive, so be careful.

Judging from the blinking light on my laptop, the crash is in the
resume path.

This patch appears to fix the problem on amd64:

- Add a new guard variable, gmon_suspended,
- Toggle gmon_suspended off at the top of sleep_state(), and
- Toggle gmon_suspended back on at the bottom of sleep_state().

With this applied, I can suspend/resume and hibernate/unhibernate a an
amd64/GENERIC.MP kernel w/ GPROF without issue, even if kgmon(8) has
enabled kernel profiling and increased the effective statclock
frequency.

Index: sys/kern/subr_prof.c
===================================================================
RCS file: /cvs/src/sys/kern/subr_prof.c,v
retrieving revision 1.35
diff -u -p -r1.35 subr_prof.c
--- sys/kern/subr_prof.c        2 Jun 2023 17:44:29 -0000       1.35
+++ sys/kern/subr_prof.c        23 Jun 2023 21:25:51 -0000
@@ -34,6 +34,7 @@
 
 #include <sys/param.h>
 #include <sys/systm.h>
+#include <sys/atomic.h>
 #include <sys/pledge.h>
 #include <sys/proc.h>
 #include <sys/resourcevar.h>
@@ -59,6 +60,31 @@ int gmoninit = 0;
 u_int gmon_cpu_count;          /* [K] number of CPUs with profiling enabled */
 
 extern char etext[];
+
+/*
+ * The suspend and hibernate paths need to be free
+ * of side effects.  Keep all CPUs out of _mcount()
+ * while a suspend/resume is ongoing.
+ */
+#ifdef SUSPEND
+volatile int gmon_suspended;
+
+void
+prof_resume(void)
+{
+       KASSERT(CPU_IS_PRIMARY(curcpu()));
+       gmon_suspended = 0;
+       membar_producer();
+}
+
+void
+prof_suspend(void)
+{
+       KASSERT(CPU_IS_PRIMARY(curcpu()));
+       gmon_suspended = 1;
+       membar_producer();
+}
+#endif /* SUSPEND */
 
 void
 prof_init(void)
Index: sys/kern/subr_suspend.c
===================================================================
RCS file: /cvs/src/sys/kern/subr_suspend.c,v
retrieving revision 1.14
diff -u -p -r1.14 subr_suspend.c
--- sys/kern/subr_suspend.c     10 Nov 2022 10:37:40 -0000      1.14
+++ sys/kern/subr_suspend.c     23 Jun 2023 21:25:51 -0000
@@ -20,6 +20,7 @@
 #include <sys/systm.h>
 #include <sys/buf.h>
 #include <sys/clockintr.h>
+#include <sys/gmon.h>
 #include <sys/reboot.h>
 #include <sys/sensors.h>
 #include <sys/sysctl.h>
@@ -63,6 +64,11 @@ top:
 
        if (sleep_showstate(v, sleepmode))
                return EOPNOTSUPP;
+
+#if defined(GPROF) || defined(DDBPROF)
+       prof_suspend();
+#endif
+
 #if NWSDISPLAY > 0
        wsdisplay_suspend();
 #endif
@@ -193,6 +199,9 @@ fail_hiballoc:
        start_periodic_resettodr();
 #if NWSDISPLAY > 0
        wsdisplay_resume();
+#endif
+#if defined(GPROF) || defined(DDBPROF)
+       prof_resume();
 #endif
        sys_sync(curproc, NULL, NULL);
        if (cpu_setperf != NULL)
Index: sys/sys/gmon.h
===================================================================
RCS file: /cvs/src/sys/sys/gmon.h,v
retrieving revision 1.9
diff -u -p -r1.9 gmon.h
--- sys/sys/gmon.h      11 Jan 2022 23:59:55 -0000      1.9
+++ sys/sys/gmon.h      23 Jun 2023 21:25:51 -0000
@@ -158,6 +158,13 @@ struct gmonparam {
 #ifdef _KERNEL
 extern int gmoninit;           /* Is the kernel ready for being profiled? */
 
+#ifdef SUSPEND
+extern volatile int gmon_suspended;    /* Ongoing suspend/resume? */
+
+void prof_resume(void);
+void prof_suspend(void);
+#endif
+
 #else /* !_KERNEL */
 
 #include <sys/cdefs.h>
Index: sys/lib/libkern/mcount.c
===================================================================
RCS file: /cvs/src/sys/lib/libkern/mcount.c,v
retrieving revision 1.14
diff -u -p -r1.14 mcount.c
--- sys/lib/libkern/mcount.c    11 Jan 2022 09:21:34 -0000      1.14
+++ sys/lib/libkern/mcount.c    23 Jun 2023 21:25:51 -0000
@@ -64,6 +64,10 @@ _MCOUNT_DECL(u_long frompc, u_long selfp
        if (gmoninit == 0)
                return;
 
+       /* Don't profile execution during suspend/resume. */
+       if (gmon_suspended)
+               return;
+
        if ((p = curcpu()->ci_gmon) == NULL)
                return;
 #else
Index: lib/libc/gmon/mcount.c
===================================================================
RCS file: /cvs/src/lib/libc/gmon/mcount.c,v
retrieving revision 1.16
diff -u -p -r1.16 mcount.c
--- lib/libc/gmon/mcount.c      11 Jan 2022 09:21:34 -0000      1.16
+++ lib/libc/gmon/mcount.c      23 Jun 2023 21:25:51 -0000
@@ -62,6 +62,10 @@ _MCOUNT_DECL(u_long frompc, u_long selfp
        if (gmoninit == 0)
                return;
 
+       /* Don't profile execution during suspend/resume. */
+       if (prof_is_suspended())
+               return;
+
        if ((p = curcpu()->ci_gmon) == NULL)
                return;
 #else

Reply via email to