On Fri, Jun 23, 2023 at 04:31:59PM -0500, Scott Cheloha wrote:
> On Tue, Jun 20, 2023 at 08:35:11AM -0600, Theo de Raadt wrote:
> > Claudio Jeker <clau...@openbsd.org> wrote:
> > 
> > > On Mon, Jun 19, 2023 at 06:41:14PM -0500, Scott Cheloha wrote:
> > > > > On Jun 19, 2023, at 18:07, Theo de Raadt <dera...@openbsd.org> wrote:
> > > > > 
> > > > > ???        Make sure to STOP all kernel profiling before attempting to
> > > > >        suspend or hibernate your machine.  Otherwise I expect it
> > > > >        will hang.
> > > > > 
> > > > > It is completely acceptable if it produces wrong results, but it must
> > > > > not hang the system.
> > > > 
> > > > The hang is present in -current, with or
> > > > without this patch.
> > > > 
> > > > I am working to figure it out.
> > > 
> > > I don't think the suspend or hibernate code has any code to disable
> > > kernel profiling. This is bad since these code paths are extremly
> > > sensitive and try not to do side-effects.
> > > 
> > > So in the suspend/hibernate code path we should disable profiling early
> > > on. It makes no sense to try to run gprof collection in those code paths.
> > 
> > Yes, that's right.
> > 
> > It will be somewhere in kern/subr_suspend.c
> > 
> > Be careful that the "stop profiling" and "restart profiling" are at the
> > correct places.  The sleep_state() function has a bunch of unrolling
> > goto's which are not 100% reflexive, so be careful.
> 
> Judging from the blinking light on my laptop, the crash is in the
> resume path.
> 
> This patch appears to fix the problem on amd64:
> 
> - Add a new guard variable, gmon_suspended,
> - Toggle gmon_suspended off at the top of sleep_state(), and
> - Toggle gmon_suspended back on at the bottom of sleep_state().
> 
> With this applied, I can suspend/resume and hibernate/unhibernate a an
> amd64/GENERIC.MP kernel w/ GPROF without issue, even if kgmon(8) has
> enabled kernel profiling and increased the effective statclock
> frequency.

Here is a cleaner version.

- Add mcount_disable(), mcount_enable() to mcount.c.  There's no
  reason to put the interfaces in subr_prof.c, we're only
  concerned with _mcount() here.

- Only call mcount_disable/mcount_enable on GPROF kernels.  DDBPROF
  does something different, it isn't relevant here.

- Sync sys/lib/libkern/mcount.c with lib/libc/gmon/mcount.c.

As with the prior patch, this seems to fix suspend/resume and
hibernate/unhibernate on amd64 GPROF kernels when kernel profiling is
activated with kgmon(8).

OK?

Index: sys/lib/libkern/mcount.c
===================================================================
RCS file: /cvs/src/sys/lib/libkern/mcount.c,v
retrieving revision 1.14
diff -u -p -r1.14 mcount.c
--- sys/lib/libkern/mcount.c    11 Jan 2022 09:21:34 -0000      1.14
+++ sys/lib/libkern/mcount.c    28 Jun 2023 23:01:13 -0000
@@ -33,6 +33,32 @@
 #include <sys/param.h>
 #include <sys/gmon.h>
 
+#ifdef _KERNEL
+#ifdef SUSPEND
+#include <sys/atomic.h>
+
+#include <lib/libkern/libkern.h>       /* KASSERT */
+
+volatile int mcount_disabled;
+
+void
+mcount_disable(void)
+{
+       KASSERT(CPU_IS_PRIMARY(curcpu()));
+       mcount_disabled = 1;
+       membar_producer();
+}
+
+void
+mcount_enable(void)
+{
+       KASSERT(CPU_IS_PRIMARY(curcpu()));
+       mcount_disabled = 0;
+       membar_producer();
+}
+#endif /* SUSPEND */
+#endif /* _KERNEL */
+
 /*
  * mcount is called on entry to each function compiled with the profiling
  * switch set.  _mcount(), which is declared in a machine-dependent way
@@ -63,7 +89,10 @@ _MCOUNT_DECL(u_long frompc, u_long selfp
         */
        if (gmoninit == 0)
                return;
-
+#ifdef SUSPEND
+       if (mcount_disabled)
+               return;
+#endif
        if ((p = curcpu()->ci_gmon) == NULL)
                return;
 #else
Index: sys/kern/subr_suspend.c
===================================================================
RCS file: /cvs/src/sys/kern/subr_suspend.c,v
retrieving revision 1.14
diff -u -p -r1.14 subr_suspend.c
--- sys/kern/subr_suspend.c     10 Nov 2022 10:37:40 -0000      1.14
+++ sys/kern/subr_suspend.c     28 Jun 2023 23:01:13 -0000
@@ -26,6 +26,9 @@
 #include <sys/mount.h>
 #include <sys/syscallargs.h>
 #include <dev/wscons/wsdisplayvar.h>
+#ifdef GPROF
+#include <sys/gmon.h>
+#endif
 #ifdef HIBERNATE
 #include <sys/hibernate.h>
 #endif
@@ -63,6 +66,11 @@ top:
 
        if (sleep_showstate(v, sleepmode))
                return EOPNOTSUPP;
+
+#ifdef GPROF
+       /* Keep everyone out of _mcount() until we have fully resumed. */
+       mcount_disable();
+#endif
 #if NWSDISPLAY > 0
        wsdisplay_suspend();
 #endif
@@ -193,6 +201,9 @@ fail_hiballoc:
        start_periodic_resettodr();
 #if NWSDISPLAY > 0
        wsdisplay_resume();
+#endif
+#ifdef GPROF
+       mcount_enable();
 #endif
        sys_sync(curproc, NULL, NULL);
        if (cpu_setperf != NULL)
Index: sys/sys/gmon.h
===================================================================
RCS file: /cvs/src/sys/sys/gmon.h,v
retrieving revision 1.9
diff -u -p -r1.9 gmon.h
--- sys/sys/gmon.h      11 Jan 2022 23:59:55 -0000      1.9
+++ sys/sys/gmon.h      28 Jun 2023 23:01:13 -0000
@@ -158,6 +158,10 @@ struct gmonparam {
 #ifdef _KERNEL
 extern int gmoninit;           /* Is the kernel ready for being profiled? */
 
+#ifdef SUSPEND
+void mcount_disable(void);
+void mcount_enable(void);
+#endif
 #else /* !_KERNEL */
 
 #include <sys/cdefs.h>
Index: lib/libc/gmon/mcount.c
===================================================================
RCS file: /cvs/src/lib/libc/gmon/mcount.c,v
retrieving revision 1.16
diff -u -p -r1.16 mcount.c
--- lib/libc/gmon/mcount.c      11 Jan 2022 09:21:34 -0000      1.16
+++ lib/libc/gmon/mcount.c      28 Jun 2023 23:01:13 -0000
@@ -31,6 +31,32 @@
 #include <sys/types.h>
 #include <sys/gmon.h>
 
+#ifdef _KERNEL
+#ifdef SUSPEND
+#include <sys/atomic.h>
+
+#include <lib/libkern/libkern.h>       /* KASSERT */
+
+volatile int mcount_disabled;
+
+void
+mcount_disable(void)
+{
+       KASSERT(CPU_IS_PRIMARY(curcpu()));
+       mcount_disabled = 1;
+       membar_producer();
+}
+
+void
+mcount_enable(void)
+{
+       KASSERT(CPU_IS_PRIMARY(curcpu()));
+       mcount_disabled = 0;
+       membar_producer();
+}
+#endif /* SUSPEND */
+#endif /* _KERNEL */
+
 /*
  * mcount is called on entry to each function compiled with the profiling
  * switch set.  _mcount(), which is declared in a machine-dependent way
@@ -61,7 +87,10 @@ _MCOUNT_DECL(u_long frompc, u_long selfp
         */
        if (gmoninit == 0)
                return;
-
+#ifdef SUSPEND
+       if (mcount_disabled)
+               return;
+#endif
        if ((p = curcpu()->ci_gmon) == NULL)
                return;
 #else

Reply via email to