On Fri, 2026-06-19 at 09:21 +0200, Nam Cao wrote:
> The rtapp/sleep monitor's primary purpose is detecting common mistakes
> with user-space real-time design. Monitoring real-time issues with
> kernel threads is a bonus.
> 
> However, accomodating kernel threads complicates the monitor due to
> the edge cases which is seen by the monitor as lower-priority task
> waking higher-priority task:
> 
>   - kthread_stop() wakes up the task in order to stop it.
> 
>   - The rcu thread and migration thread can be woken by any task.
> 
>   - The ktimerd thread is woken near the end of irq_exit_rcu(), where
>     the preempt counter is "broken" and falsely says this is task
>     context. This requires the monitor to use the hardirq_context flag
>     instead of the preempt counter.
> 
> Beside complicating the monitor, the final case also requires enabling
> CONFIG_TRACE_IRQFLAGS (so that "hardirq_context" can be used). This
> adds overhead to the kernel even when the monitor is not active. This
> may be an obstacle to enabling this monitor in distros' kernels.

Very good especially for this!

> Furthermore, kernel threads usually are started before the monitor is
> enabled. Consequently, the threads' states (i.o.w. the monitor's
> atomic propositions for the threads) are not fully known to the
> monitor. As a result, the kernel threads mostly cannot be monitored.
> 
> Overall, the downsides of accomodating kernel threads outweights the
> benefits. Thus, exclude kernel threads to simplify the monitor.
> 
> Signed-off-by: Nam Cao <[email protected]>

Reviewed-by: Gabriele Monaco <[email protected]>

Thanks,
Gabriele

> ---
> Cc: Sebastian Andrzej Siewior <[email protected]>
> ---
>  Documentation/trace/rv/monitor_rtapp.rst  |  22 ++---
>  kernel/trace/rv/monitors/sleep/Kconfig    |   1 -
>  kernel/trace/rv/monitors/sleep/sleep.c    |  39 +-------
>  kernel/trace/rv/monitors/sleep/sleep.h    | 104 +++++++++-------------
>  tools/verification/models/rtapp/sleep.ltl |   7 +-
>  5 files changed, 54 insertions(+), 119 deletions(-)
> 
> diff --git a/Documentation/trace/rv/monitor_rtapp.rst
> b/Documentation/trace/rv/monitor_rtapp.rst
> index 570be67a8f3b..502d3ea412eb 100644
> --- a/Documentation/trace/rv/monitor_rtapp.rst
> +++ b/Documentation/trace/rv/monitor_rtapp.rst
> @@ -93,9 +93,9 @@ assessment.
>  
>  The monitor's specification is::
>  
> -  RULE = always ((RT and SLEEP) imply (RT_FRIENDLY_SLEEP or ALLOWLIST))
> +  RULE = always ((RT and SLEEP and USER_THREAD) imply (RT_FRIENDLY_SLEEP or
> ALLOWLIST))
>  
> -  RT_FRIENDLY_SLEEP = (RT_VALID_SLEEP_REASON or KERNEL_THREAD)
> +  RT_FRIENDLY_SLEEP = RT_VALID_SLEEP_REASON
>                    and ((not SCHEDULE_IN) until RT_FRIENDLY_WAKE)
>  
>    RT_VALID_SLEEP_REASON = FUTEX_WAIT
> @@ -110,23 +110,13 @@ The monitor's specification is::
>                    or WOKEN_BY_HARDIRQ
>                    or WOKEN_BY_NMI
>                    or ABORT_SLEEP
> -                  or KTHREAD_SHOULD_STOP
>  
>    ALLOWLIST = BLOCK_ON_RT_MUTEX
>             or FUTEX_LOCK_PI
> -           or TASK_IS_RCU
> -           or TASK_IS_MIGRATION
> -
> -Beside the scenarios described above, this specification also handle some
> -special cases:
> -
> -  - `KERNEL_THREAD`: kernel tasks do not have any pattern that can be
> recognized
> -    as valid real-time sleeping reasons. Therefore sleeping reason is not
> -    checked for kernel tasks.
> -  - `KTHREAD_SHOULD_STOP`: a non-real-time thread may stop a real-time kernel
> -    thread by waking it and waiting for it to exit (`kthread_stop()`). This
> -    wakeup is safe for real-time.
> -  - `ALLOWLIST`: to handle known false positives with the kernel.
> +
> +Beside the scenarios described above, this specification also defines an
> allow list
> +to handle some special cases:
> +
>    - `BLOCK_ON_RT_MUTEX` is included in the allowlist due to its
> implementation.
>      In the release path of rt_mutex, a boosted task is de-boosted before
> waking
>      the rt_mutex's waiter. Consequently, the monitor may see a real-time-
> unsafe
> diff --git a/kernel/trace/rv/monitors/sleep/Kconfig
> b/kernel/trace/rv/monitors/sleep/Kconfig
> index 6b7a122e7b47..d6ec3e9a91b6 100644
> --- a/kernel/trace/rv/monitors/sleep/Kconfig
> +++ b/kernel/trace/rv/monitors/sleep/Kconfig
> @@ -5,7 +5,6 @@ config RV_MON_SLEEP
>       select RV_LTL_MONITOR
>       depends on HAVE_SYSCALL_TRACEPOINTS
>       depends on RV_MON_RTAPP
> -     select TRACE_IRQFLAGS
>       default y
>       select LTL_MON_EVENTS_ID
>       bool "sleep monitor"
> diff --git a/kernel/trace/rv/monitors/sleep/sleep.c
> b/kernel/trace/rv/monitors/sleep/sleep.c
> index 638be7d8747f..aa5a984853b5 100644
> --- a/kernel/trace/rv/monitors/sleep/sleep.c
> +++ b/kernel/trace/rv/monitors/sleep/sleep.c
> @@ -43,7 +43,6 @@ static void ltl_atoms_init(struct task_struct *task, struct
> ltl_monitor *mon, bo
>       ltl_atom_set(mon, LTL_WOKEN_BY_EQUAL_OR_HIGHER_PRIO, false);
>  
>       if (task_creation) {
> -             ltl_atom_set(mon, LTL_KTHREAD_SHOULD_STOP, false);
>               ltl_atom_set(mon, LTL_NANOSLEEP_CLOCK_REALTIME, false);
>               ltl_atom_set(mon, LTL_NANOSLEEP_TIMER_ABSTIME, false);
>               ltl_atom_set(mon, LTL_CLOCK_NANOSLEEP, false);
> @@ -53,33 +52,7 @@ static void ltl_atoms_init(struct task_struct *task, struct
> ltl_monitor *mon, bo
>               ltl_atom_set(mon, LTL_BLOCK_ON_RT_MUTEX, false);
>       }
>  
> -     if (task->flags & PF_KTHREAD) {
> -             ltl_atom_set(mon, LTL_KERNEL_THREAD, true);
> -
> -             /* kernel tasks do not do syscall */
> -             ltl_atom_set(mon, LTL_FUTEX_WAIT, false);
> -             ltl_atom_set(mon, LTL_FUTEX_LOCK_PI, false);
> -             ltl_atom_set(mon, LTL_NANOSLEEP_CLOCK_REALTIME, false);
> -             ltl_atom_set(mon, LTL_NANOSLEEP_TIMER_ABSTIME, false);
> -             ltl_atom_set(mon, LTL_CLOCK_NANOSLEEP, false);
> -             ltl_atom_set(mon, LTL_EPOLL_WAIT, false);
> -
> -             if (strstarts(task->comm, "migration/"))
> -                     ltl_atom_set(mon, LTL_TASK_IS_MIGRATION, true);
> -             else
> -                     ltl_atom_set(mon, LTL_TASK_IS_MIGRATION, false);
> -
> -             if (strstarts(task->comm, "rcu"))
> -                     ltl_atom_set(mon, LTL_TASK_IS_RCU, true);
> -             else
> -                     ltl_atom_set(mon, LTL_TASK_IS_RCU, false);
> -     } else {
> -             ltl_atom_set(mon, LTL_KTHREAD_SHOULD_STOP, false);
> -             ltl_atom_set(mon, LTL_KERNEL_THREAD, false);
> -             ltl_atom_set(mon, LTL_TASK_IS_RCU, false);
> -             ltl_atom_set(mon, LTL_TASK_IS_MIGRATION, false);
> -     }
> -
> +     ltl_atom_set(mon, LTL_USER_THREAD, !(task->flags & PF_KTHREAD));
>  }
>  
>  static void handle_sched_set_state(void *data, struct task_struct *task, int
> state)
> @@ -97,7 +70,7 @@ static void handle_sched_exit(void *data, bool is_switch)
>  
>  static void handle_sched_waking(void *data, struct task_struct *task)
>  {
> -     if (this_cpu_read(hardirq_context)) {
> +     if (in_hardirq()) {
>               ltl_atom_pulse(task, LTL_WOKEN_BY_HARDIRQ, true);
>       } else if (in_task()) {
>               if (current->prio <= task->prio)
> @@ -181,12 +154,6 @@ static void handle_sys_exit(void *data, struct pt_regs
> *regs, long ret)
>       ltl_atom_update(current, LTL_CLOCK_NANOSLEEP, false);
>  }
>  
> -static void handle_kthread_stop(void *data, struct task_struct *task)
> -{
> -     /* FIXME: this could race with other tracepoint handlers */
> -     ltl_atom_update(task, LTL_KTHREAD_SHOULD_STOP, true);
> -}
> -
>  static int enable_sleep(void)
>  {
>       int retval;
> @@ -200,7 +167,6 @@ static int enable_sleep(void)
>       rv_attach_trace_probe("rtapp_sleep", sched_set_state_tp,
> handle_sched_set_state);
>       rv_attach_trace_probe("rtapp_sleep", contention_begin,
> handle_contention_begin);
>       rv_attach_trace_probe("rtapp_sleep", contention_end,
> handle_contention_end);
> -     rv_attach_trace_probe("rtapp_sleep", sched_kthread_stop,
> handle_kthread_stop);
>       rv_attach_trace_probe("rtapp_sleep", sys_enter, handle_sys_enter);
>       rv_attach_trace_probe("rtapp_sleep", sys_exit, handle_sys_exit);
>       return 0;
> @@ -213,7 +179,6 @@ static void disable_sleep(void)
>       rv_detach_trace_probe("rtapp_sleep", sched_set_state_tp,
> handle_sched_set_state);
>       rv_detach_trace_probe("rtapp_sleep", contention_begin,
> handle_contention_begin);
>       rv_detach_trace_probe("rtapp_sleep", contention_end,
> handle_contention_end);
> -     rv_detach_trace_probe("rtapp_sleep", sched_kthread_stop,
> handle_kthread_stop);
>       rv_detach_trace_probe("rtapp_sleep", sys_enter, handle_sys_enter);
>       rv_detach_trace_probe("rtapp_sleep", sys_exit, handle_sys_exit);
>  
> diff --git a/kernel/trace/rv/monitors/sleep/sleep.h
> b/kernel/trace/rv/monitors/sleep/sleep.h
> index 2fe2ec7edae8..44e593f41e6a 100644
> --- a/kernel/trace/rv/monitors/sleep/sleep.h
> +++ b/kernel/trace/rv/monitors/sleep/sleep.h
> @@ -18,15 +18,12 @@ enum ltl_atom {
>       LTL_EPOLL_WAIT,
>       LTL_FUTEX_LOCK_PI,
>       LTL_FUTEX_WAIT,
> -     LTL_KERNEL_THREAD,
> -     LTL_KTHREAD_SHOULD_STOP,
>       LTL_NANOSLEEP_CLOCK_REALTIME,
>       LTL_NANOSLEEP_TIMER_ABSTIME,
>       LTL_RT,
>       LTL_SCHEDULE_IN,
>       LTL_SLEEP,
> -     LTL_TASK_IS_MIGRATION,
> -     LTL_TASK_IS_RCU,
> +     LTL_USER_THREAD,
>       LTL_WOKEN_BY_EQUAL_OR_HIGHER_PRIO,
>       LTL_WOKEN_BY_HARDIRQ,
>       LTL_WOKEN_BY_NMI,
> @@ -43,15 +40,12 @@ static const char *ltl_atom_str(enum ltl_atom atom)
>               "ep_wa",
>               "fu_lo_pi",
>               "fu_wa",
> -             "ker_th",
> -             "kth_sh_st",
>               "na_cl_re",
>               "na_ti_ab",
>               "rt",
>               "sch_in",
>               "sle",
> -             "ta_mi",
> -             "ta_rc",
> +             "us_th",
>               "wo_eq_hi_pr",
>               "wo_ha",
>               "wo_nm",
> @@ -79,46 +73,41 @@ static void ltl_start(struct task_struct *task, struct
> ltl_monitor *mon)
>       bool woken_by_hardirq = test_bit(LTL_WOKEN_BY_HARDIRQ, mon->atoms);
>       bool woken_by_equal_or_higher_prio =
> test_bit(LTL_WOKEN_BY_EQUAL_OR_HIGHER_PRIO,
>            mon->atoms);
> -     bool task_is_rcu = test_bit(LTL_TASK_IS_RCU, mon->atoms);
> -     bool task_is_migration = test_bit(LTL_TASK_IS_MIGRATION, mon->atoms);
> +     bool user_thread = test_bit(LTL_USER_THREAD, mon->atoms);
>       bool sleep = test_bit(LTL_SLEEP, mon->atoms);
>       bool schedule_in = test_bit(LTL_SCHEDULE_IN, mon->atoms);
>       bool rt = test_bit(LTL_RT, mon->atoms);
>       bool nanosleep_timer_abstime = test_bit(LTL_NANOSLEEP_TIMER_ABSTIME,
> mon->atoms);
>       bool nanosleep_clock_realtime =
> test_bit(LTL_NANOSLEEP_CLOCK_REALTIME, mon->atoms);
> -     bool kthread_should_stop = test_bit(LTL_KTHREAD_SHOULD_STOP, mon-
> >atoms);
> -     bool kernel_thread = test_bit(LTL_KERNEL_THREAD, mon->atoms);
>       bool futex_wait = test_bit(LTL_FUTEX_WAIT, mon->atoms);
>       bool futex_lock_pi = test_bit(LTL_FUTEX_LOCK_PI, mon->atoms);
>       bool epoll_wait = test_bit(LTL_EPOLL_WAIT, mon->atoms);
>       bool clock_nanosleep = test_bit(LTL_CLOCK_NANOSLEEP, mon->atoms);
>       bool block_on_rt_mutex = test_bit(LTL_BLOCK_ON_RT_MUTEX, mon->atoms);
>       bool abort_sleep = test_bit(LTL_ABORT_SLEEP, mon->atoms);
> -     bool val41 = task_is_rcu || task_is_migration;
> -     bool val42 = futex_lock_pi || val41;
> -     bool val5 = block_on_rt_mutex || val42;
> -     bool val33 = abort_sleep || kthread_should_stop;
> -     bool val34 = woken_by_nmi || val33;
> -     bool val35 = woken_by_hardirq || val34;
> -     bool val14 = woken_by_equal_or_higher_prio || val35;
> +     bool val7 = block_on_rt_mutex || futex_lock_pi;
> +     bool val32 = woken_by_nmi || abort_sleep;
> +     bool val33 = woken_by_hardirq || val32;
> +     bool val14 = woken_by_equal_or_higher_prio || val33;
>       bool val13 = !schedule_in;
>       bool val25 = !nanosleep_clock_realtime;
>       bool val26 = nanosleep_timer_abstime && val25;
>       bool val18 = clock_nanosleep && val26;
>       bool val20 = val18 || epoll_wait;
> -     bool val9 = futex_wait || val20;
> -     bool val11 = val9 || kernel_thread;
> +     bool val11 = futex_wait || val20;
> +     bool val3 = !user_thread;
>       bool val2 = !sleep;
> +     bool val4 = val2 || val3;
>       bool val1 = !rt;
> -     bool val3 = val1 || val2;
> +     bool val5 = val1 || val4;
>  
> -     if (val3)
> +     if (val5)
>               __set_bit(S0, mon->states);
>       if (val11 && val13)
>               __set_bit(S1, mon->states);
>       if (val11 && val14)
>               __set_bit(S4, mon->states);
> -     if (val5)
> +     if (val7)
>               __set_bit(S5, mon->states);
>  }
>  
> @@ -129,130 +118,125 @@ ltl_possible_next_states(struct ltl_monitor *mon,
> unsigned int state, unsigned l
>       bool woken_by_hardirq = test_bit(LTL_WOKEN_BY_HARDIRQ, mon->atoms);
>       bool woken_by_equal_or_higher_prio =
> test_bit(LTL_WOKEN_BY_EQUAL_OR_HIGHER_PRIO,
>            mon->atoms);
> -     bool task_is_rcu = test_bit(LTL_TASK_IS_RCU, mon->atoms);
> -     bool task_is_migration = test_bit(LTL_TASK_IS_MIGRATION, mon->atoms);
> +     bool user_thread = test_bit(LTL_USER_THREAD, mon->atoms);
>       bool sleep = test_bit(LTL_SLEEP, mon->atoms);
>       bool schedule_in = test_bit(LTL_SCHEDULE_IN, mon->atoms);
>       bool rt = test_bit(LTL_RT, mon->atoms);
>       bool nanosleep_timer_abstime = test_bit(LTL_NANOSLEEP_TIMER_ABSTIME,
> mon->atoms);
>       bool nanosleep_clock_realtime =
> test_bit(LTL_NANOSLEEP_CLOCK_REALTIME, mon->atoms);
> -     bool kthread_should_stop = test_bit(LTL_KTHREAD_SHOULD_STOP, mon-
> >atoms);
> -     bool kernel_thread = test_bit(LTL_KERNEL_THREAD, mon->atoms);
>       bool futex_wait = test_bit(LTL_FUTEX_WAIT, mon->atoms);
>       bool futex_lock_pi = test_bit(LTL_FUTEX_LOCK_PI, mon->atoms);
>       bool epoll_wait = test_bit(LTL_EPOLL_WAIT, mon->atoms);
>       bool clock_nanosleep = test_bit(LTL_CLOCK_NANOSLEEP, mon->atoms);
>       bool block_on_rt_mutex = test_bit(LTL_BLOCK_ON_RT_MUTEX, mon->atoms);
>       bool abort_sleep = test_bit(LTL_ABORT_SLEEP, mon->atoms);
> -     bool val41 = task_is_rcu || task_is_migration;
> -     bool val42 = futex_lock_pi || val41;
> -     bool val5 = block_on_rt_mutex || val42;
> -     bool val33 = abort_sleep || kthread_should_stop;
> -     bool val34 = woken_by_nmi || val33;
> -     bool val35 = woken_by_hardirq || val34;
> -     bool val14 = woken_by_equal_or_higher_prio || val35;
> +     bool val7 = block_on_rt_mutex || futex_lock_pi;
> +     bool val32 = woken_by_nmi || abort_sleep;
> +     bool val33 = woken_by_hardirq || val32;
> +     bool val14 = woken_by_equal_or_higher_prio || val33;
>       bool val13 = !schedule_in;
>       bool val25 = !nanosleep_clock_realtime;
>       bool val26 = nanosleep_timer_abstime && val25;
>       bool val18 = clock_nanosleep && val26;
>       bool val20 = val18 || epoll_wait;
> -     bool val9 = futex_wait || val20;
> -     bool val11 = val9 || kernel_thread;
> +     bool val11 = futex_wait || val20;
> +     bool val3 = !user_thread;
>       bool val2 = !sleep;
> +     bool val4 = val2 || val3;
>       bool val1 = !rt;
> -     bool val3 = val1 || val2;
> +     bool val5 = val1 || val4;
>  
>       switch (state) {
>       case S0:
> -             if (val3)
> +             if (val5)
>                       __set_bit(S0, next);
>               if (val11 && val13)
>                       __set_bit(S1, next);
>               if (val11 && val14)
>                       __set_bit(S4, next);
> -             if (val5)
> +             if (val7)
>                       __set_bit(S5, next);
>               break;
>       case S1:
>               if (val11 && val13)
>                       __set_bit(S1, next);
> -             if (val13 && val3)
> +             if (val13 && val5)
>                       __set_bit(S2, next);
> -             if (val14 && val3)
> +             if (val14 && val5)
>                       __set_bit(S3, next);
>               if (val11 && val14)
>                       __set_bit(S4, next);
> -             if (val13 && val5)
> +             if (val13 && val7)
>                       __set_bit(S6, next);
> -             if (val14 && val5)
> +             if (val14 && val7)
>                       __set_bit(S7, next);
>               break;
>       case S2:
>               if (val11 && val13)
>                       __set_bit(S1, next);
> -             if (val13 && val3)
> +             if (val13 && val5)
>                       __set_bit(S2, next);
> -             if (val14 && val3)
> +             if (val14 && val5)
>                       __set_bit(S3, next);
>               if (val11 && val14)
>                       __set_bit(S4, next);
> -             if (val13 && val5)
> +             if (val13 && val7)
>                       __set_bit(S6, next);
> -             if (val14 && val5)
> +             if (val14 && val7)
>                       __set_bit(S7, next);
>               break;
>       case S3:
> -             if (val3)
> +             if (val5)
>                       __set_bit(S0, next);
>               if (val11 && val13)
>                       __set_bit(S1, next);
>               if (val11 && val14)
>                       __set_bit(S4, next);
> -             if (val5)
> +             if (val7)
>                       __set_bit(S5, next);
>               break;
>       case S4:
> -             if (val3)
> +             if (val5)
>                       __set_bit(S0, next);
>               if (val11 && val13)
>                       __set_bit(S1, next);
>               if (val11 && val14)
>                       __set_bit(S4, next);
> -             if (val5)
> +             if (val7)
>                       __set_bit(S5, next);
>               break;
>       case S5:
> -             if (val3)
> +             if (val5)
>                       __set_bit(S0, next);
>               if (val11 && val13)
>                       __set_bit(S1, next);
>               if (val11 && val14)
>                       __set_bit(S4, next);
> -             if (val5)
> +             if (val7)
>                       __set_bit(S5, next);
>               break;
>       case S6:
>               if (val11 && val13)
>                       __set_bit(S1, next);
> -             if (val13 && val3)
> +             if (val13 && val5)
>                       __set_bit(S2, next);
> -             if (val14 && val3)
> +             if (val14 && val5)
>                       __set_bit(S3, next);
>               if (val11 && val14)
>                       __set_bit(S4, next);
> -             if (val13 && val5)
> +             if (val13 && val7)
>                       __set_bit(S6, next);
> -             if (val14 && val5)
> +             if (val14 && val7)
>                       __set_bit(S7, next);
>               break;
>       case S7:
> -             if (val3)
> +             if (val5)
>                       __set_bit(S0, next);
>               if (val11 && val13)
>                       __set_bit(S1, next);
>               if (val11 && val14)
>                       __set_bit(S4, next);
> -             if (val5)
> +             if (val7)
>                       __set_bit(S5, next);
>               break;
>       }
> diff --git a/tools/verification/models/rtapp/sleep.ltl
> b/tools/verification/models/rtapp/sleep.ltl
> index 5923e58d7810..4d78fdd204c0 100644
> --- a/tools/verification/models/rtapp/sleep.ltl
> +++ b/tools/verification/models/rtapp/sleep.ltl
> @@ -1,6 +1,6 @@
> -RULE = always ((RT and SLEEP) imply (RT_FRIENDLY_SLEEP or ALLOWLIST))
> +RULE = always ((RT and SLEEP and USER_THREAD) imply (RT_FRIENDLY_SLEEP or
> ALLOWLIST))
>  
> -RT_FRIENDLY_SLEEP = (RT_VALID_SLEEP_REASON or KERNEL_THREAD)
> +RT_FRIENDLY_SLEEP = RT_VALID_SLEEP_REASON
>                  and ((not SCHEDULE_IN) until RT_FRIENDLY_WAKE)
>  
>  RT_VALID_SLEEP_REASON = FUTEX_WAIT
> @@ -15,9 +15,6 @@ RT_FRIENDLY_WAKE = WOKEN_BY_EQUAL_OR_HIGHER_PRIO
>                  or WOKEN_BY_HARDIRQ
>                  or WOKEN_BY_NMI
>                  or ABORT_SLEEP
> -                or KTHREAD_SHOULD_STOP
>  
>  ALLOWLIST = BLOCK_ON_RT_MUTEX
>           or FUTEX_LOCK_PI
> -         or TASK_IS_RCU
> -         or TASK_IS_MIGRATION


Reply via email to