When enabling per-CPU posix timers, an IPI to nohz_full CPUs might be performed (to re-read the dependencies and possibly not re-enter nohz_full on a given CPU).
A common case is for applications that run on nohz_full= CPUs to not use POSIX timers (eg DPDK). This patch skips the IPI in case the task allowed mask does not intersect with nohz_full= CPU mask, when going through tick_nohz_dep_set_signal. This reduces interruptions to nohz_full= CPUs. Signed-off-by: Marcelo Tosatti <mtosa...@redhat.com> --- include/linux/tick.h | 11 +++++++---- kernel/time/posix-cpu-timers.c | 4 ++-- kernel/time/tick-sched.c | 27 +++++++++++++++++++++++++-- 3 files changed, 34 insertions(+), 8 deletions(-) Index: linux-2.6/include/linux/tick.h =================================================================== --- linux-2.6.orig/include/linux/tick.h +++ linux-2.6/include/linux/tick.h @@ -207,7 +207,8 @@ extern void tick_nohz_dep_set_task(struc enum tick_dep_bits bit); extern void tick_nohz_dep_clear_task(struct task_struct *tsk, enum tick_dep_bits bit); -extern void tick_nohz_dep_set_signal(struct signal_struct *signal, +extern void tick_nohz_dep_set_signal(struct task_struct *tsk, + struct signal_struct *signal, enum tick_dep_bits bit); extern void tick_nohz_dep_clear_signal(struct signal_struct *signal, enum tick_dep_bits bit); @@ -252,11 +253,12 @@ static inline void tick_dep_clear_task(s if (tick_nohz_full_enabled()) tick_nohz_dep_clear_task(tsk, bit); } -static inline void tick_dep_set_signal(struct signal_struct *signal, +static inline void tick_dep_set_signal(struct task_struct *tsk, + struct signal_struct *signal, enum tick_dep_bits bit) { if (tick_nohz_full_enabled()) - tick_nohz_dep_set_signal(signal, bit); + tick_nohz_dep_set_signal(tsk, signal, bit); } static inline void tick_dep_clear_signal(struct signal_struct *signal, enum tick_dep_bits bit) @@ -284,7 +286,8 @@ static inline void tick_dep_set_task(str enum tick_dep_bits bit) { } static inline void tick_dep_clear_task(struct task_struct *tsk, enum tick_dep_bits bit) { } -static inline void tick_dep_set_signal(struct signal_struct *signal, +static inline void tick_dep_set_signal(struct task_struct *tsk, + struct signal_struct *signal, enum tick_dep_bits bit) { } static inline void tick_dep_clear_signal(struct signal_struct *signal, enum tick_dep_bits bit) { } Index: linux-2.6/kernel/time/posix-cpu-timers.c =================================================================== --- linux-2.6.orig/kernel/time/posix-cpu-timers.c +++ linux-2.6/kernel/time/posix-cpu-timers.c @@ -523,7 +523,7 @@ static void arm_timer(struct k_itimer *t if (CPUCLOCK_PERTHREAD(timer->it_clock)) tick_dep_set_task(p, TICK_DEP_BIT_POSIX_TIMER); else - tick_dep_set_signal(p->signal, TICK_DEP_BIT_POSIX_TIMER); + tick_dep_set_signal(p, p->signal, TICK_DEP_BIT_POSIX_TIMER); } /* @@ -1358,7 +1358,7 @@ void set_process_cpu_timer(struct task_s if (*newval < *nextevt) *nextevt = *newval; - tick_dep_set_signal(tsk->signal, TICK_DEP_BIT_POSIX_TIMER); + tick_dep_set_signal(tsk, tsk->signal, TICK_DEP_BIT_POSIX_TIMER); } static int do_cpu_nanosleep(const clockid_t which_clock, int flags, Index: linux-2.6/kernel/time/tick-sched.c =================================================================== --- linux-2.6.orig/kernel/time/tick-sched.c +++ linux-2.6/kernel/time/tick-sched.c @@ -302,6 +302,27 @@ static void tick_nohz_dep_set_all(atomic } /* + * Set bit on nohz full dependency, kicking all cpus + * only if task can run on nohz full CPUs. + */ +static void tick_nohz_dep_set_all_cond(struct task_struct *tsk, + atomic_t *dep, + enum tick_dep_bits bit) +{ + int prev; + unsigned long flags; + + prev = atomic_fetch_or(BIT(bit), dep); + if (prev) + return; + + raw_spin_lock_irqsave(&tsk->pi_lock, flags); + if (cpumask_intersects(&tsk->cpus_mask, tick_nohz_full_mask)) + tick_nohz_full_kick_all(); + raw_spin_unlock_irqrestore(&tsk->pi_lock, flags); +} + +/* * Set a global tick dependency. Used by perf events that rely on freq and * by unstable clock. */ @@ -382,9 +403,11 @@ EXPORT_SYMBOL_GPL(tick_nohz_dep_clear_ta * Set a per-taskgroup tick dependency. Posix CPU timers need this in order to elapse * per process timers. */ -void tick_nohz_dep_set_signal(struct signal_struct *sig, enum tick_dep_bits bit) +void tick_nohz_dep_set_signal(struct task_struct *tsk, + struct signal_struct *sig, + enum tick_dep_bits bit) { - tick_nohz_dep_set_all(&sig->tick_dep_mask, bit); + tick_nohz_dep_set_all_cond(tsk, &sig->tick_dep_mask, bit); } void tick_nohz_dep_clear_signal(struct signal_struct *sig, enum tick_dep_bits bit)