On Fri, Oct 16, 2020 at 11:51:09AM +0800, Joseph Jang wrote: > From: josephjang <josephj...@google.com>
Please use your name as spelled out like you did above in the email header. > > Add suspend timeout handler to prevent device stuck during suspend/ > resume process. Suspend timeout handler will dump disk sleep task > at first round timeout and trigger kernel panic at second round timeout. > The default timer for each round is 30 seconds. > > Note: Can use following command to simulate suspend hang for testing. > adb shell echo 1 > /sys/power/pm_hang > adb shell echo mem > /sys/power/state > Signed-off-by: josephjang <josephj...@google.com> Need a blank line before the signed-off-by: and again, spell your name the same way. > --- > include/linux/console.h | 1 + > kernel/power/Kconfig | 9 +++ > kernel/power/main.c | 66 ++++++++++++++++ > kernel/power/suspend.c | 162 ++++++++++++++++++++++++++++++++++++++++ > kernel/printk/printk.c | 5 ++ > 5 files changed, 243 insertions(+) > > diff --git a/include/linux/console.h b/include/linux/console.h > index 0670d3491e0e..ac468c602c0b 100644 > --- a/include/linux/console.h > +++ b/include/linux/console.h > @@ -192,6 +192,7 @@ static inline void console_sysfs_notify(void) > { } > #endif > extern bool console_suspend_enabled; > +extern int is_console_suspended(void); For global functions, how about: bool console_is_suspended(void); ? > > /* Suspend and resume console messages over PM events */ > extern void suspend_console(void); > diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig > index a7320f07689d..52b7a181b6d8 100644 > --- a/kernel/power/Kconfig > +++ b/kernel/power/Kconfig > @@ -207,6 +207,15 @@ config PM_SLEEP_DEBUG > def_bool y > depends on PM_DEBUG && PM_SLEEP > > +config PM_SLEEP_MONITOR > + bool "Linux kernel suspend/resume process monitor" > + depends on PM_SLEEP > + help > + This option will enable suspend/resume monitor to prevent device > + stuck during suspend/resume process. Suspend timeout handler will > + dump disk sleep task at first round timeout and trigger kernel panic > + at second round timeout. The default timer for each round is 30 seconds. Ouch, are you sure you want to panic? > + > config DPM_WATCHDOG > bool "Device suspend/resume watchdog" > depends on PM_DEBUG && PSTORE && EXPERT > diff --git a/kernel/power/main.c b/kernel/power/main.c > index 40f86ec4ab30..f25b8a47583e 100644 > --- a/kernel/power/main.c > +++ b/kernel/power/main.c > @@ -575,6 +575,69 @@ void __pm_pr_dbg(bool defer, const char *fmt, ...) > static inline void pm_print_times_init(void) {} > #endif /* CONFIG_PM_SLEEP_DEBUG */ > > +#ifdef CONFIG_PM_SLEEP_MONITOR > +/* If set, devices will stuck at suspend for verification */ > +static bool pm_hang_enabled; > + > +static int pm_notify_test(struct notifier_block *nb, > + unsigned long mode, void *_unused) > +{ > + pr_info("Jump into infinite loop now\n"); Why do you have debugging code still enabled? > + > + /* Suspend thread stuck at a loop forever */ > + for (;;) > + ; > + Don't busy spin, that will burn power. > + pr_info("Fail to stuck at loop\n"); And how can this happen? > + > + return 0; > +} > + > +static struct notifier_block pm_notify_nb = { > + .notifier_call = pm_notify_test, > +}; > + > +static ssize_t pm_hang_show(struct kobject *kobj, struct kobj_attribute > *attr, > + char *buf) > +{ > + return snprintf(buf, 10, "%d\n", pm_hang_enabled); > +} > + > +static ssize_t pm_hang_store(struct kobject *kobj, struct kobj_attribute > *attr, > + const char *buf, size_t n) > +{ > + unsigned long val; > + int result; > + > + if (kstrtoul(buf, 10, &val)) > + return -EINVAL; > + > + if (val > 1) > + return -EINVAL; > + > + pm_hang_enabled = !!val; > + > + if (pm_hang_enabled == true) { > + > + result = register_pm_notifier(&pm_notify_nb); > + if (result) > + pr_warn("Can not register suspend notifier, return > %d\n", > + result); > + > + } else { > + > + result = unregister_pm_notifier(&pm_notify_nb); > + if (result) > + pr_warn("Can not unregister suspend notifier, return > %d\n", > + result); > + } > + > + return n; > +} > + > +power_attr(pm_hang); > +#endif > + > struct kobject *power_kobj; > > /** > @@ -909,6 +972,9 @@ static struct attribute * g[] = { > &pm_wakeup_irq_attr.attr, > &pm_debug_messages_attr.attr, > #endif > +#ifdef CONFIG_PM_SLEEP_MONITOR > + &pm_hang_attr.attr, You added a sysfs file, but no Documentation/ABI/ update? That's not ok. > +#endif > #endif > #ifdef CONFIG_FREEZER > &pm_freeze_timeout_attr.attr, > diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c > index 8b1bb5ee7e5d..6f2679cfd9d1 100644 > --- a/kernel/power/suspend.c > +++ b/kernel/power/suspend.c > @@ -30,6 +30,12 @@ > #include <trace/events/power.h> > #include <linux/compiler.h> > #include <linux/moduleparam.h> > +#ifdef CONFIG_PM_SLEEP_MONITOR > +#include <linux/sched/debug.h> > +#include <linux/kthread.h> > +#include <linux/sched.h> > +#include <uapi/linux/sched/types.h> > +#endif Don't #ifdef include files. And why the uapi file? > > #include "power.h" > > @@ -61,6 +67,133 @@ static DECLARE_SWAIT_QUEUE_HEAD(s2idle_wait_head); > enum s2idle_states __read_mostly s2idle_state; > static DEFINE_RAW_SPINLOCK(s2idle_lock); > > +#ifdef CONFIG_PM_SLEEP_MONITOR > +/* Suspend monitor thread toggle reason */ > +enum toggle_reason { > + TOGGLE_NONE, > + TOGGLE_START, > + TOGGLE_STOP, > +}; > + > +#define SUSPEND_TIMER_TIMEOUT_MS 30000 > +static struct task_struct *ksuspend_mon_tsk; > +static DECLARE_WAIT_QUEUE_HEAD(power_suspend_waitqueue); > +static enum toggle_reason suspend_mon_toggle; > +static DEFINE_MUTEX(suspend_mon_lock); > + > +static void start_suspend_mon(void) > +{ > + mutex_lock(&suspend_mon_lock); > + suspend_mon_toggle = TOGGLE_START; > + mutex_unlock(&suspend_mon_lock); Why do you need a lock for a single integer? > + wake_up(&power_suspend_waitqueue); > +} > + > +static void stop_suspend_mon(void) > +{ > + mutex_lock(&suspend_mon_lock); > + suspend_mon_toggle = TOGGLE_STOP; > + mutex_unlock(&suspend_mon_lock); > + wake_up(&power_suspend_waitqueue); > +} > + > +static void suspend_timeout(int timeout_count) > +{ > + char *null_pointer = NULL; > + > + pr_info("Suspend monitor timeout (timer is %d seconds)\n", > + (SUSPEND_TIMER_TIMEOUT_MS/1000)); > + > + show_state_filter(TASK_UNINTERRUPTIBLE); > + > + if (timeout_count < 2) > + return; > + > + if (is_console_suspended()) > + resume_console(); > + > + pr_info("Trigger a panic\n"); Again, debugging code? > + > + /* Trigger a NULL pointer dereference */ > + *null_pointer = 'a'; Are you sure this will work on all platforms? We do have a panic function if you really want to do that. > + > + /* Should not reach here */ > + pr_err("Trigger panic failed!\n"); > +} > + > +static int suspend_monitor_kthread(void *arg) > +{ > + long err; > + struct sched_param param = {.sched_priority > + = MAX_RT_PRIO-1}; Ick, no, call the scheduler functions properly, don't do this "by hand" ever. > + static int timeout_count; > + static long timeout; > + > + pr_info("Init ksuspend_mon thread\n"); Again, debugging code :( > + > + sched_setscheduler(current, SCHED_FIFO, ¶m); > + > + timeout_count = 0; > + timeout = MAX_SCHEDULE_TIMEOUT; > + > + do { > + /* Wait suspend timer timeout */ > + err = wait_event_interruptible_timeout( > + power_suspend_waitqueue, > + (suspend_mon_toggle != TOGGLE_NONE), > + timeout); > + > + mutex_lock(&suspend_mon_lock); > + /* suspend monitor state change */ > + if (suspend_mon_toggle != TOGGLE_NONE) { > + if (suspend_mon_toggle == TOGGLE_START) { > + timeout = msecs_to_jiffies( > + SUSPEND_TIMER_TIMEOUT_MS); > + pr_info("Start suspend monitor\n"); > + } else if (suspend_mon_toggle == TOGGLE_STOP) { > + timeout = MAX_SCHEDULE_TIMEOUT; > + timeout_count = 0; > + pr_info("Stop suspend monitor\n"); > + } > + suspend_mon_toggle = TOGGLE_NONE; > + mutex_unlock(&suspend_mon_lock); > + continue; > + } > + mutex_unlock(&suspend_mon_lock); > + > + /* suspend monitor event handler */ > + if (err == 0) { > + timeout_count++; > + suspend_timeout(timeout_count); > + } else if (err == -ERESTARTSYS) { > + pr_info("Exit ksuspend_mon!"); > + break; > + } > + } while (1); > + > + return 0; > +} > + > +static void init_suspend_monitor_thread(void) > +{ > + int ret; > + > + ksuspend_mon_tsk = kthread_create(suspend_monitor_kthread, > + NULL, "ksuspend_mon"); > + if (IS_ERR(ksuspend_mon_tsk)) { > + ret = PTR_ERR(ksuspend_mon_tsk); > + ksuspend_mon_tsk = NULL; > + pr_err("Create suspend_monitor_kthread failed! ret = %d\n", > + ret); > + return; > + } > + > + suspend_mon_toggle = TOGGLE_NONE; > + wake_up_process(ksuspend_mon_tsk); > + > +} > +#endif > + > /** > * pm_suspend_default_s2idle - Check if suspend-to-idle is the default > suspend. > * > @@ -89,6 +222,10 @@ static void s2idle_enter(void) > { > trace_suspend_resume(TPS("machine_suspend"), PM_SUSPEND_TO_IDLE, true); > > +#ifdef CONFIG_PM_SLEEP_MONITOR > + stop_suspend_mon(); > +#endif Do not put #ifdef in .c files, that's not the proper kernel coding style. Especially for single function calls. I've stopped here... greg k-h