From: Peter Zijlstra <pet...@infradead.org> Introduce the basic infrastructure to have a core wide rq->lock.
Signed-off-by: Peter Zijlstra (Intel) <pet...@infradead.org> Signed-off-by: Julien Desfossez <jdesfos...@digitalocean.com> Signed-off-by: Vineeth Remanan Pillai <vpil...@digitalocean.com> --- Changes in v3 ------------- - Fixes a crash during cpu offline/offline with coresched enabled - Vineeth Pillai kernel/Kconfig.preempt | 7 ++- kernel/sched/core.c | 113 ++++++++++++++++++++++++++++++++++++++++- kernel/sched/sched.h | 31 +++++++++++ 3 files changed, 148 insertions(+), 3 deletions(-) diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt index 0fee5fe6c899..02fe0bf26676 100644 --- a/kernel/Kconfig.preempt +++ b/kernel/Kconfig.preempt @@ -57,4 +57,9 @@ config PREEMPT endchoice config PREEMPT_COUNT - bool + bool + +config SCHED_CORE + bool + default y + depends on SCHED_SMT diff --git a/kernel/sched/core.c b/kernel/sched/core.c index b883c70674ba..b1ce33f9b106 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -60,6 +60,70 @@ __read_mostly int scheduler_running; */ int sysctl_sched_rt_runtime = 950000; +#ifdef CONFIG_SCHED_CORE + +DEFINE_STATIC_KEY_FALSE(__sched_core_enabled); + +/* + * The static-key + stop-machine variable are needed such that: + * + * spin_lock(rq_lockp(rq)); + * ... + * spin_unlock(rq_lockp(rq)); + * + * ends up locking and unlocking the _same_ lock, and all CPUs + * always agree on what rq has what lock. + * + * XXX entirely possible to selectively enable cores, don't bother for now. + */ +static int __sched_core_stopper(void *data) +{ + bool enabled = !!(unsigned long)data; + int cpu; + + for_each_online_cpu(cpu) + cpu_rq(cpu)->core_enabled = enabled; + + return 0; +} + +static DEFINE_MUTEX(sched_core_mutex); +static int sched_core_count; + +static void __sched_core_enable(void) +{ + // XXX verify there are no cookie tasks (yet) + + static_branch_enable(&__sched_core_enabled); + stop_machine(__sched_core_stopper, (void *)true, NULL); +} + +static void __sched_core_disable(void) +{ + // XXX verify there are no cookie tasks (left) + + stop_machine(__sched_core_stopper, (void *)false, NULL); + static_branch_disable(&__sched_core_enabled); +} + +void sched_core_get(void) +{ + mutex_lock(&sched_core_mutex); + if (!sched_core_count++) + __sched_core_enable(); + mutex_unlock(&sched_core_mutex); +} + +void sched_core_put(void) +{ + mutex_lock(&sched_core_mutex); + if (!--sched_core_count) + __sched_core_disable(); + mutex_unlock(&sched_core_mutex); +} + +#endif /* CONFIG_SCHED_CORE */ + /* * __task_rq_lock - lock the rq @p resides on. */ @@ -5790,8 +5854,15 @@ int sched_cpu_activate(unsigned int cpu) /* * When going up, increment the number of cores with SMT present. */ - if (cpumask_weight(cpu_smt_mask(cpu)) == 2) + if (cpumask_weight(cpu_smt_mask(cpu)) == 2) { static_branch_inc_cpuslocked(&sched_smt_present); +#ifdef CONFIG_SCHED_CORE + if (static_branch_unlikely(&__sched_core_enabled)) { + rq->core_enabled = true; + } +#endif + } + #endif set_cpu_active(cpu, true); @@ -5839,8 +5910,16 @@ int sched_cpu_deactivate(unsigned int cpu) /* * When going down, decrement the number of cores with SMT present. */ - if (cpumask_weight(cpu_smt_mask(cpu)) == 2) + if (cpumask_weight(cpu_smt_mask(cpu)) == 2) { +#ifdef CONFIG_SCHED_CORE + struct rq *rq = cpu_rq(cpu); + if (static_branch_unlikely(&__sched_core_enabled)) { + rq->core_enabled = false; + } +#endif static_branch_dec_cpuslocked(&sched_smt_present); + + } #endif if (!sched_smp_initialized) @@ -5865,6 +5944,28 @@ static void sched_rq_cpu_starting(unsigned int cpu) int sched_cpu_starting(unsigned int cpu) { +#ifdef CONFIG_SCHED_CORE + const struct cpumask *smt_mask = cpu_smt_mask(cpu); + struct rq *rq, *core_rq = NULL; + int i; + + for_each_cpu(i, smt_mask) { + rq = cpu_rq(i); + if (rq->core && rq->core == rq) + core_rq = rq; + } + + if (!core_rq) + core_rq = cpu_rq(cpu); + + for_each_cpu(i, smt_mask) { + rq = cpu_rq(i); + + WARN_ON_ONCE(rq->core && rq->core != core_rq); + rq->core = core_rq; + } +#endif /* CONFIG_SCHED_CORE */ + sched_rq_cpu_starting(cpu); sched_tick_start(cpu); return 0; @@ -5893,6 +5994,9 @@ int sched_cpu_dying(unsigned int cpu) update_max_interval(); nohz_balance_exit_idle(rq); hrtick_clear(rq); +#ifdef CONFIG_SCHED_CORE + rq->core = NULL; +#endif return 0; } #endif @@ -6091,6 +6195,11 @@ void __init sched_init(void) #endif /* CONFIG_SMP */ hrtick_rq_init(rq); atomic_set(&rq->nr_iowait, 0); + +#ifdef CONFIG_SCHED_CORE + rq->core = NULL; + rq->core_enabled = 0; +#endif } set_load_weight(&init_task, false); diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index a024dd80eeb3..eb38063221d0 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -952,6 +952,12 @@ struct rq { /* Must be inspected within a rcu lock section */ struct cpuidle_state *idle_state; #endif + +#ifdef CONFIG_SCHED_CORE + /* per rq */ + struct rq *core; + unsigned int core_enabled; +#endif }; #ifdef CONFIG_FAIR_GROUP_SCHED @@ -979,11 +985,36 @@ static inline int cpu_of(struct rq *rq) #endif } +#ifdef CONFIG_SCHED_CORE +DECLARE_STATIC_KEY_FALSE(__sched_core_enabled); + +static inline bool sched_core_enabled(struct rq *rq) +{ + return static_branch_unlikely(&__sched_core_enabled) && rq->core_enabled; +} + +static inline raw_spinlock_t *rq_lockp(struct rq *rq) +{ + if (sched_core_enabled(rq)) + return &rq->core->__lock; + + return &rq->__lock; +} + +#else /* !CONFIG_SCHED_CORE */ + +static inline bool sched_core_enabled(struct rq *rq) +{ + return false; +} + static inline raw_spinlock_t *rq_lockp(struct rq *rq) { return &rq->__lock; } +#endif /* CONFIG_SCHED_CORE */ + #ifdef CONFIG_SCHED_SMT extern void __update_idle_core(struct rq *rq); -- 2.17.1