On Fri, Oct 04, 2024 at 12:17:04PM +0200, Sebastian Andrzej Siewior wrote:
> A timer/ hrtimer softirq is raised in-IRQ context. With threaded
> interrupts enabled or on PREEMPT_RT this leads to waking the ksoftirqd
> for the processing of the softirq.
> Once the ksoftirqd is marked as pending (or is running) it will collect
> all raised softirqs. This in turn means that a softirq which would have
> been processed at the end of the threaded interrupt, which runs at an
> elevated priority, is now moved to ksoftirqd which runs at SCHED_OTHER
> priority and competes with every regular task for CPU resources.
> This introduces long delays on heavy loaded systems and is not desired
> especially if the system is not overloaded by the softirqs.
> 
> Split the TIMER_SOFTIRQ and HRTIMER_SOFTIRQ processing into a dedicated
> timers thread and let it run at the lowest SCHED_FIFO priority.
> Wake-ups for RT tasks happen from hardirq context so only timer_list timers
> and hrtimers for "regular" tasks are processed here. The higher priority
> ensures that wakeups are performed before scheduling SCHED_OTHER tasks.
> 
> Using a dedicated variable to store the pending softirq bits values
> ensure that the timer are not accidentally picked up by ksoftirqd and
> other threaded interrupts.
> It shouldn't be picked up by ksoftirqd since it runs at lower priority.
> However if ksoftirqd is already running while a timer fires, then
> ksoftird will be PI-boosted due to the BH-lock to ktimer's priority.
> Ideally we try to avoid having ksoftirqd running.
> 
> The timer thread can pick up pending softirqs from ksoftirqd but only
> if the softirq load is high. It is not be desired that the picked up
> softirqs are processed at SCHED_FIFO priority under high softirq load
> but this can already happen by a PI-boost by a force-threaded interrupt.
> 
> [ frede...@kernel.org: rcutorture.c fixes, storm fix by introduction of
>   local_pending_timers() for tick_nohz_next_event() ]
> 
> [ junxiao.ch...@intel.com: Ensure ktimersd gets woken up even if a
>   softirq is currently served. ]
> 
> Signed-off-by: Sebastian Andrzej Siewior <bige...@linutronix.de>

For the rcutorture pieces:

Reviewed-by: Paul E. McKenney <paul...@kernel.org>

> ---
>  include/linux/interrupt.h | 29 ++++++++++++++
>  kernel/rcu/rcutorture.c   |  6 +++
>  kernel/softirq.c          | 82 ++++++++++++++++++++++++++++++++++++++-
>  kernel/time/hrtimer.c     |  4 +-
>  kernel/time/tick-sched.c  |  2 +-
>  kernel/time/timer.c       |  2 +-
>  6 files changed, 120 insertions(+), 5 deletions(-)
> 
> diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
> index 457151f9f263d..4a4f367cd6864 100644
> --- a/include/linux/interrupt.h
> +++ b/include/linux/interrupt.h
> @@ -616,6 +616,35 @@ extern void __raise_softirq_irqoff(unsigned int nr);
>  extern void raise_softirq_irqoff(unsigned int nr);
>  extern void raise_softirq(unsigned int nr);
>  
> +#ifdef CONFIG_PREEMPT_RT
> +DECLARE_PER_CPU(struct task_struct *, timersd);
> +DECLARE_PER_CPU(unsigned long, pending_timer_softirq);
> +
> +extern void raise_timer_softirq(void);
> +extern void raise_hrtimer_softirq(void);
> +
> +static inline unsigned int local_pending_timers(void)
> +{
> +     return __this_cpu_read(pending_timer_softirq);
> +}
> +
> +#else
> +static inline void raise_timer_softirq(void)
> +{
> +     raise_softirq(TIMER_SOFTIRQ);
> +}
> +
> +static inline void raise_hrtimer_softirq(void)
> +{
> +     raise_softirq_irqoff(HRTIMER_SOFTIRQ);
> +}
> +
> +static inline unsigned int local_pending_timers(void)
> +{
> +     return local_softirq_pending();
> +}
> +#endif
> +
>  DECLARE_PER_CPU(struct task_struct *, ksoftirqd);
>  
>  static inline struct task_struct *this_cpu_ksoftirqd(void)
> diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
> index bb75dbf5c800c..609687fd742d5 100644
> --- a/kernel/rcu/rcutorture.c
> +++ b/kernel/rcu/rcutorture.c
> @@ -2440,6 +2440,12 @@ static int rcutorture_booster_init(unsigned int cpu)
>               WARN_ON_ONCE(!t);
>               sp.sched_priority = 2;
>               sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
> +#ifdef CONFIG_PREEMPT_RT
> +             t = per_cpu(timersd, cpu);
> +             WARN_ON_ONCE(!t);
> +             sp.sched_priority = 2;
> +             sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
> +#endif
>       }
>  
>       /* Don't allow time recalculation while creating a new task. */
> diff --git a/kernel/softirq.c b/kernel/softirq.c
> index d082e7840f880..2d847405e5a7f 100644
> --- a/kernel/softirq.c
> +++ b/kernel/softirq.c
> @@ -624,6 +624,24 @@ static inline void tick_irq_exit(void)
>  #endif
>  }
>  
> +#ifdef CONFIG_PREEMPT_RT
> +DEFINE_PER_CPU(struct task_struct *, timersd);
> +DEFINE_PER_CPU(unsigned long, pending_timer_softirq);
> +
> +static void wake_timersd(void)
> +{
> +     struct task_struct *tsk = __this_cpu_read(timersd);
> +
> +     if (tsk)
> +             wake_up_process(tsk);
> +}
> +
> +#else
> +
> +static inline void wake_timersd(void) { }
> +
> +#endif
> +
>  static inline void __irq_exit_rcu(void)
>  {
>  #ifndef __ARCH_IRQ_EXIT_IRQS_DISABLED
> @@ -636,6 +654,10 @@ static inline void __irq_exit_rcu(void)
>       if (!in_interrupt() && local_softirq_pending())
>               invoke_softirq();
>  
> +     if (IS_ENABLED(CONFIG_PREEMPT_RT) && local_pending_timers() &&
> +         !(in_nmi() | in_hardirq()))
> +             wake_timersd();
> +
>       tick_irq_exit();
>  }
>  
> @@ -971,12 +993,70 @@ static struct smp_hotplug_thread softirq_threads = {
>       .thread_comm            = "ksoftirqd/%u",
>  };
>  
> +#ifdef CONFIG_PREEMPT_RT
> +static void timersd_setup(unsigned int cpu)
> +{
> +     sched_set_fifo_low(current);
> +}
> +
> +static int timersd_should_run(unsigned int cpu)
> +{
> +     return local_pending_timers();
> +}
> +
> +static void run_timersd(unsigned int cpu)
> +{
> +     unsigned int timer_si;
> +
> +     ksoftirqd_run_begin();
> +
> +     timer_si = local_pending_timers();
> +     __this_cpu_write(pending_timer_softirq, 0);
> +     or_softirq_pending(timer_si);
> +
> +     __do_softirq();
> +
> +     ksoftirqd_run_end();
> +}
> +
> +static void raise_ktimers_thread(unsigned int nr)
> +{
> +     trace_softirq_raise(nr);
> +     __this_cpu_or(pending_timer_softirq, 1 << nr);
> +}
> +
> +void raise_hrtimer_softirq(void)
> +{
> +     raise_ktimers_thread(HRTIMER_SOFTIRQ);
> +}
> +
> +void raise_timer_softirq(void)
> +{
> +     unsigned long flags;
> +
> +     local_irq_save(flags);
> +     raise_ktimers_thread(TIMER_SOFTIRQ);
> +     wake_timersd();
> +     local_irq_restore(flags);
> +}
> +
> +static struct smp_hotplug_thread timer_threads = {
> +     .store                  = &timersd,
> +     .setup                  = timersd_setup,
> +     .thread_should_run      = timersd_should_run,
> +     .thread_fn              = run_timersd,
> +     .thread_comm            = "ktimers/%u",
> +};
> +#endif
> +
>  static __init int spawn_ksoftirqd(void)
>  {
>       cpuhp_setup_state_nocalls(CPUHP_SOFTIRQ_DEAD, "softirq:dead", NULL,
>                                 takeover_tasklets);
>       BUG_ON(smpboot_register_percpu_thread(&softirq_threads));
> -
> +#ifdef CONFIG_PREEMPT_RT
> +     BUG_ON(smpboot_register_percpu_thread(&timer_threads));
> +#endif
>       return 0;
>  }
>  early_initcall(spawn_ksoftirqd);
> diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
> index cddcd08ea827f..133d49f703d93 100644
> --- a/kernel/time/hrtimer.c
> +++ b/kernel/time/hrtimer.c
> @@ -1811,7 +1811,7 @@ void hrtimer_interrupt(struct clock_event_device *dev)
>       if (!ktime_before(now, cpu_base->softirq_expires_next)) {
>               cpu_base->softirq_expires_next = KTIME_MAX;
>               cpu_base->softirq_activated = 1;
> -             raise_softirq_irqoff(HRTIMER_SOFTIRQ);
> +             raise_hrtimer_softirq();
>       }
>  
>       __hrtimer_run_queues(cpu_base, now, flags, HRTIMER_ACTIVE_HARD);
> @@ -1906,7 +1906,7 @@ void hrtimer_run_queues(void)
>       if (!ktime_before(now, cpu_base->softirq_expires_next)) {
>               cpu_base->softirq_expires_next = KTIME_MAX;
>               cpu_base->softirq_activated = 1;
> -             raise_softirq_irqoff(HRTIMER_SOFTIRQ);
> +             raise_hrtimer_softirq();
>       }
>  
>       __hrtimer_run_queues(cpu_base, now, flags, HRTIMER_ACTIVE_HARD);
> diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
> index 753a184c70907..efa3181607a2b 100644
> --- a/kernel/time/tick-sched.c
> +++ b/kernel/time/tick-sched.c
> @@ -859,7 +859,7 @@ static void tick_nohz_restart(struct tick_sched *ts, 
> ktime_t now)
>  
>  static inline bool local_timer_softirq_pending(void)
>  {
> -     return local_softirq_pending() & BIT(TIMER_SOFTIRQ);
> +     return local_pending_timers() & BIT(TIMER_SOFTIRQ);
>  }
>  
>  /*
> diff --git a/kernel/time/timer.c b/kernel/time/timer.c
> index 0fc9d066a7be4..79f0dc73ac436 100644
> --- a/kernel/time/timer.c
> +++ b/kernel/time/timer.c
> @@ -2499,7 +2499,7 @@ static void run_local_timers(void)
>                */
>               if (time_after_eq(jiffies, READ_ONCE(base->next_expiry)) ||
>                   (i == BASE_DEF && tmigr_requires_handle_remote())) {
> -                     raise_softirq(TIMER_SOFTIRQ);
> +                     raise_timer_softirq();
>                       return;
>               }
>       }
> -- 
> 2.45.2
> 

Reply via email to