On Mon, 2012-10-29 at 16:27 -0400, Steven Rostedt wrote: > #ifdef CONFIG_CPUSETS > > @@ -235,4 +236,34 @@ static inline bool put_mems_allowed(unsigned int seq) > > #endif /* !CONFIG_CPUSETS */ > > +#ifdef CONFIG_CPUSETS_NO_HZ > + > +DECLARE_PER_CPU(atomic_t, cpu_adaptive_nohz_ref); > + > +static inline bool cpuset_cpu_adaptive_nohz(int cpu) > +{ > + atomic_t *ref = &per_cpu(cpu_adaptive_nohz_ref, cpu); > + > + if (atomic_add_return(0, ref) > 0)
I'm assuming you do the atomic_add_return() for the implicit memory barrier? Yuck! Please comment this. I see that rcutree.c does the same thing without a comment. Bad Paul, bad! > + return true; > + > + return false; > +} > + > +static inline bool cpuset_adaptive_nohz(void) > +{ > + /* > + * We probably want to do atomic_read() when we read > + * locally to avoid the overhead of an ordered add. > + * For that we have to do the dec of the ref locally as > + * well. Does it matter if we miss the dec? What other synchronization is used? CPU 1 CPU 2 ------ ----- var = atomic_add_return(0, ref) atomic_dec(ref); if (var > 0) returns true. For local cases, as this seems to be in a fast path, we should use this_cpu_read() as well. > + */ > + return cpuset_cpu_adaptive_nohz(smp_processor_id()); > +} > +#else > +static inline bool cpuset_cpu_adaptive_nohz(int cpu) { return false; } > +static inline bool cpuset_adaptive_nohz(void) { return false; } > + > +#endif /* CONFIG_CPUSETS_NO_HZ */ > + > #endif /* _LINUX_CPUSET_H */ > diff --git a/init/Kconfig b/init/Kconfig > index 6fdd6e3..ffdeeab 100644 > --- a/init/Kconfig > +++ b/init/Kconfig > @@ -749,6 +749,14 @@ config PROC_PID_CPUSET > depends on CPUSETS > default y > > +config CPUSETS_NO_HZ > + bool "Tickless cpusets" > + depends on CPUSETS && HAVE_CPUSETS_NO_HZ > + help > + This options let you apply a nohz property to a cpuset such > + that the periodic timer tick tries to be avoided when possible on > + the concerned CPUs. > + > config CGROUP_CPUACCT > bool "Simple CPU accounting cgroup subsystem" > help > diff --git a/kernel/cpuset.c b/kernel/cpuset.c > index f33c715..6319d8e 100644 > --- a/kernel/cpuset.c > +++ b/kernel/cpuset.c > @@ -145,6 +145,7 @@ typedef enum { > CS_SCHED_LOAD_BALANCE, > CS_SPREAD_PAGE, > CS_SPREAD_SLAB, > + CS_ADAPTIVE_NOHZ, > } cpuset_flagbits_t; > > /* the type of hotplug event */ > @@ -189,6 +190,11 @@ static inline int is_spread_slab(const struct cpuset *cs) > return test_bit(CS_SPREAD_SLAB, &cs->flags); > } > > +static inline int is_adaptive_nohz(const struct cpuset *cs) > +{ > + return test_bit(CS_ADAPTIVE_NOHZ, &cs->flags); > +} We can move this into the #ifdef CONFIG_CPUSETS_NO_HZ as well, and have the #else version just return zero. Why use test_bit() when we already know the answer? > + > static struct cpuset top_cpuset = { > .flags = ((1 << CS_CPU_EXCLUSIVE) | (1 << CS_MEM_EXCLUSIVE)), > }; > @@ -1190,6 +1196,32 @@ static void cpuset_change_flag(struct task_struct *tsk, > cpuset_update_task_spread_flag(cgroup_cs(scan->cg), tsk); > } > > +#ifdef CONFIG_CPUSETS_NO_HZ > + > +DEFINE_PER_CPU(atomic_t, cpu_adaptive_nohz_ref); > + > +static void update_nohz_cpus(struct cpuset *old_cs, struct cpuset *cs) > +{ > + int cpu; > + int val; > + > + if (is_adaptive_nohz(old_cs) == is_adaptive_nohz(cs)) > + return; > + > + for_each_cpu(cpu, cs->cpus_allowed) { > + atomic_t *ref = &per_cpu(cpu_adaptive_nohz_ref, cpu); > + if (is_adaptive_nohz(cs)) > + atomic_inc(ref); > + else > + atomic_dec(ref); > + } > +} > +#else > +static inline void update_nohz_cpus(struct cpuset *old_cs, struct cpuset *cs) > +{ > +} > +#endif > + -- Steve -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/