From: Frederic Weisbecker <fweis...@gmail.com> Prepare the interface to implement the nohz cpuset flag. This flag, once set, will tell the system to try to shutdown the periodic timer tick when possible.
We use here a per cpu refcounter. As long as a CPU is contained into at least one cpuset that has the nohz flag set, it is part of the set of CPUs that run into adaptive nohz mode. [ include build fix from Zen Lin ] Signed-off-by: Frederic Weisbecker <fweis...@gmail.com> Cc: Alessio Igor Bogani <abog...@kernel.org> Cc: Andrew Morton <a...@linux-foundation.org> Cc: Avi Kivity <a...@redhat.com> Cc: Chris Metcalf <cmetc...@tilera.com> Cc: Christoph Lameter <c...@linux.com> Cc: Daniel Lezcano <daniel.lezc...@linaro.org> Cc: Geoff Levand <ge...@infradead.org> Cc: Gilad Ben Yossef <gi...@benyossef.com> Cc: Hakan Akkan <hakanak...@gmail.com> Cc: Ingo Molnar <mi...@kernel.org> Cc: Kevin Hilman <khil...@ti.com> Cc: Max Krasnyansky <m...@qualcomm.com> Cc: Paul E. McKenney <paul...@linux.vnet.ibm.com> Cc: Peter Zijlstra <pet...@infradead.org> Cc: Stephen Hemminger <shemmin...@vyatta.com> Cc: Steven Rostedt <rost...@goodmis.org> Cc: Sven-Thorsten Dietrich <thebigcorporat...@gmail.com> Cc: Thomas Gleixner <t...@linutronix.de> --- arch/Kconfig | 3 +++ include/linux/cpuset.h | 31 ++++++++++++++++++++++++++++ init/Kconfig | 8 ++++++++ kernel/cpuset.c | 53 +++++++++++++++++++++++++++++++++++++++++++++++- 4 files changed, 94 insertions(+), 1 deletion(-) diff --git a/arch/Kconfig b/arch/Kconfig index 366ec06..8e2162f6 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -239,6 +239,9 @@ config HAVE_ARCH_JUMP_LABEL bool config HAVE_ARCH_MUTEX_CPU_RELAX + bool + +config HAVE_CPUSETS_NO_HZ bool config HAVE_RCU_TABLE_FREE diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h index 838320f..7e7eb41 100644 --- a/include/linux/cpuset.h +++ b/include/linux/cpuset.h @@ -13,6 +13,7 @@ #include <linux/nodemask.h> #include <linux/cgroup.h> #include <linux/mm.h> +#include <linux/atomic.h> #ifdef CONFIG_CPUSETS @@ -235,4 +236,34 @@ static inline bool put_mems_allowed(unsigned int seq) #endif /* !CONFIG_CPUSETS */ +#ifdef CONFIG_CPUSETS_NO_HZ + +DECLARE_PER_CPU(atomic_t, cpu_adaptive_nohz_ref); + +static inline bool cpuset_cpu_adaptive_nohz(int cpu) +{ + atomic_t *ref = &per_cpu(cpu_adaptive_nohz_ref, cpu); + + if (atomic_add_return(0, ref) > 0) + return true; + + return false; +} + +static inline bool cpuset_adaptive_nohz(void) +{ + /* + * We probably want to do atomic_read() when we read + * locally to avoid the overhead of an ordered add. + * For that we have to do the dec of the ref locally as + * well. + */ + return cpuset_cpu_adaptive_nohz(smp_processor_id()); +} +#else +static inline bool cpuset_cpu_adaptive_nohz(int cpu) { return false; } +static inline bool cpuset_adaptive_nohz(void) { return false; } + +#endif /* CONFIG_CPUSETS_NO_HZ */ + #endif /* _LINUX_CPUSET_H */ diff --git a/init/Kconfig b/init/Kconfig index 6fdd6e3..ffdeeab 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -749,6 +749,14 @@ config PROC_PID_CPUSET depends on CPUSETS default y +config CPUSETS_NO_HZ + bool "Tickless cpusets" + depends on CPUSETS && HAVE_CPUSETS_NO_HZ + help + This options let you apply a nohz property to a cpuset such + that the periodic timer tick tries to be avoided when possible on + the concerned CPUs. + config CGROUP_CPUACCT bool "Simple CPU accounting cgroup subsystem" help diff --git a/kernel/cpuset.c b/kernel/cpuset.c index f33c715..6319d8e 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -145,6 +145,7 @@ typedef enum { CS_SCHED_LOAD_BALANCE, CS_SPREAD_PAGE, CS_SPREAD_SLAB, + CS_ADAPTIVE_NOHZ, } cpuset_flagbits_t; /* the type of hotplug event */ @@ -189,6 +190,11 @@ static inline int is_spread_slab(const struct cpuset *cs) return test_bit(CS_SPREAD_SLAB, &cs->flags); } +static inline int is_adaptive_nohz(const struct cpuset *cs) +{ + return test_bit(CS_ADAPTIVE_NOHZ, &cs->flags); +} + static struct cpuset top_cpuset = { .flags = ((1 << CS_CPU_EXCLUSIVE) | (1 << CS_MEM_EXCLUSIVE)), }; @@ -1190,6 +1196,32 @@ static void cpuset_change_flag(struct task_struct *tsk, cpuset_update_task_spread_flag(cgroup_cs(scan->cg), tsk); } +#ifdef CONFIG_CPUSETS_NO_HZ + +DEFINE_PER_CPU(atomic_t, cpu_adaptive_nohz_ref); + +static void update_nohz_cpus(struct cpuset *old_cs, struct cpuset *cs) +{ + int cpu; + int val; + + if (is_adaptive_nohz(old_cs) == is_adaptive_nohz(cs)) + return; + + for_each_cpu(cpu, cs->cpus_allowed) { + atomic_t *ref = &per_cpu(cpu_adaptive_nohz_ref, cpu); + if (is_adaptive_nohz(cs)) + atomic_inc(ref); + else + atomic_dec(ref); + } +} +#else +static inline void update_nohz_cpus(struct cpuset *old_cs, struct cpuset *cs) +{ +} +#endif + /* * update_tasks_flags - update the spread flags of tasks in the cpuset. * @cs: the cpuset in which each task's spread flags needs to be changed @@ -1255,6 +1287,8 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs, spread_flag_changed = ((is_spread_slab(cs) != is_spread_slab(trialcs)) || (is_spread_page(cs) != is_spread_page(trialcs))); + update_nohz_cpus(cs, trialcs); + mutex_lock(&callback_mutex); cs->flags = trialcs->flags; mutex_unlock(&callback_mutex); @@ -1465,6 +1499,7 @@ typedef enum { FILE_MEMORY_PRESSURE, FILE_SPREAD_PAGE, FILE_SPREAD_SLAB, + FILE_ADAPTIVE_NOHZ, } cpuset_filetype_t; static int cpuset_write_u64(struct cgroup *cgrp, struct cftype *cft, u64 val) @@ -1504,6 +1539,11 @@ static int cpuset_write_u64(struct cgroup *cgrp, struct cftype *cft, u64 val) case FILE_SPREAD_SLAB: retval = update_flag(CS_SPREAD_SLAB, cs, val); break; +#ifdef CONFIG_CPUSETS_NO_HZ + case FILE_ADAPTIVE_NOHZ: + retval = update_flag(CS_ADAPTIVE_NOHZ, cs, val); + break; +#endif default: retval = -EINVAL; break; @@ -1663,6 +1703,10 @@ static u64 cpuset_read_u64(struct cgroup *cont, struct cftype *cft) return is_spread_page(cs); case FILE_SPREAD_SLAB: return is_spread_slab(cs); +#ifdef CONFIG_CPUSETS_NO_HZ + case FILE_ADAPTIVE_NOHZ: + return is_adaptive_nohz(cs); +#endif default: BUG(); } @@ -1771,7 +1815,14 @@ static struct cftype files[] = { .write_u64 = cpuset_write_u64, .private = FILE_SPREAD_SLAB, }, - +#ifdef CONFIG_CPUSETS_NO_HZ + { + .name = "adaptive_nohz", + .read_u64 = cpuset_read_u64, + .write_u64 = cpuset_write_u64, + .private = FILE_ADAPTIVE_NOHZ, + }, +#endif { .name = "memory_pressure_enabled", .flags = CFTYPE_ONLY_ON_ROOT, -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/