I need a little rework of this patch. I'll send it later. Sorry for the noise.
Kirill 27.10.2012, 14:36, "Kirill Tkhai" <tk...@yandex.ru>: > The current throttling logic always skips RT class if rq->rt is throttled. > It doesn't handle the special case when RT tasks are the only running tasks > in the rq. So it's possible CPU picks idle task up when RTs are available. > > This patch aims to avoid the above situation. The modified > _pick_next_task_rt() looks at the number of total rq->rt tasks(with the sum > of all child rt_rq's) and compares it with the number of all running tasks > of the rq. If they are equal then scheduler picks the highest rq->rt task > (children are considered too). > > Later, the first unthrottled rq_rt will replace this task. The case > of appearance of fair task is handled in check_preempt_curr() function. > > The patch changes the logic of pick_rt_task() and pick_next_highest_task_rt(). > Now the negative cpu always makes task "picked". But there are no another > users of this posibility and nobody is touched by this change. > > Signed-off-by: Kirill V Tkhai <tk...@yandex.ru> > CC: Steven Rostedt <rost...@goodmis.org> > CC: Ingo Molnar <mi...@kernel.org> > CC: Peter Zijlstra <pet...@infradead.org> > > --- > kernel/sched/core.c | 6 +++- > kernel/sched/rt.c | 97 > ++++++++++++++++++++++++++++++++------------------ > kernel/sched/sched.h | 3 +- > 3 files changed, 69 insertions(+), 37 deletions(-) > diff --git a/kernel/sched/core.c b/kernel/sched/core.c > index bf41f82..ecc9833 100644 > --- a/kernel/sched/core.c > +++ b/kernel/sched/core.c > @@ -901,7 +901,9 @@ void check_preempt_curr(struct rq *rq, struct task_struct > *p, int flags) > { > const struct sched_class *class; > > - if (p->sched_class == rq->curr->sched_class) { > + if (rq->curr->sched_class == rq->extended_class) { > + resched_task(rq->curr); > + } else if (p->sched_class == rq->curr->sched_class) { > rq->curr->sched_class->check_preempt_curr(rq, p, flags); > } else { > for_each_class(class) { > @@ -2771,6 +2773,7 @@ static void put_prev_task(struct rq *rq, struct > task_struct *prev) > if (prev->on_rq || rq->skip_clock_update < 0) > update_rq_clock(rq); > prev->sched_class->put_prev_task(rq, prev); > + rq->extended_class = NULL; > } > > /* > @@ -6892,6 +6895,7 @@ void __init sched_init(void) > rq->calc_load_update = jiffies + LOAD_FREQ; > init_cfs_rq(&rq->cfs); > init_rt_rq(&rq->rt, rq); > + rq->extended_class = NULL; > #ifdef CONFIG_FAIR_GROUP_SCHED > root_task_group.shares = ROOT_TASK_GROUP_LOAD; > INIT_LIST_HEAD(&rq->leaf_cfs_rq_list); > diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c > index 418feb0..6f6da20 100644 > --- a/kernel/sched/rt.c > +++ b/kernel/sched/rt.c > @@ -274,15 +274,8 @@ static void update_rt_migration(struct rt_rq *rt_rq) > > static void inc_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq > *rt_rq) > { > - struct task_struct *p; > - > - if (!rt_entity_is_task(rt_se)) > - return; > - > - p = rt_task_of(rt_se); > - rt_rq = &rq_of_rt_rq(rt_rq)->rt; > + struct task_struct *p = rt_task_of(rt_se); > > - rt_rq->rt_nr_total++; > if (p->nr_cpus_allowed > 1) > rt_rq->rt_nr_migratory++; > > @@ -291,15 +284,8 @@ static void inc_rt_migration(struct sched_rt_entity > *rt_se, struct rt_rq *rt_rq) > > static void dec_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq > *rt_rq) > { > - struct task_struct *p; > - > - if (!rt_entity_is_task(rt_se)) > - return; > - > - p = rt_task_of(rt_se); > - rt_rq = &rq_of_rt_rq(rt_rq)->rt; > + struct task_struct *p = rt_task_of(rt_se); > > - rt_rq->rt_nr_total--; > if (p->nr_cpus_allowed > 1) > rt_rq->rt_nr_migratory--; > > @@ -467,6 +453,16 @@ static int rt_se_boosted(struct sched_rt_entity *rt_se) > return p->prio != p->normal_prio; > } > > +static void extended_rt_unthrottles(struct rq *rq, struct rt_rq *rt_rq) > +{ > + struct task_struct *curr = rq->curr; > + > + if (rt_rq_of_se(&curr->rt) == rt_rq) > + rq->extended_class = NULL; > + else > + resched_task(curr); > +} > + > #ifdef CONFIG_SMP > static inline const struct cpumask *sched_rt_period_mask(void) > { > @@ -826,6 +822,9 @@ static int do_sched_rt_period_timer(struct rt_bandwidth > *rt_b, int overrun) > */ > if (rt_rq->rt_nr_running && rq->curr == > rq->idle) > rq->skip_clock_update = -1; > + > + if (rq->extended_class == &rt_sched_class) > + extended_rt_unthrottles(rq, rt_rq); > } > if (rt_rq->rt_time || rt_rq->rt_nr_running) > idle = 0; > @@ -1071,8 +1070,14 @@ void inc_rt_tasks(struct sched_rt_entity *rt_se, > struct rt_rq *rt_rq) > WARN_ON(!rt_prio(prio)); > rt_rq->rt_nr_running++; > > + if (rt_entity_is_task(rt_se)) { > + struct rt_rq *rt = &rq_of_rt_rq(rt_rq)->rt; > + > + rt->rt_nr_total++; > + inc_rt_migration(rt_se, rt); > + } > + > inc_rt_prio(rt_rq, prio); > - inc_rt_migration(rt_se, rt_rq); > inc_rt_group(rt_se, rt_rq); > } > > @@ -1083,8 +1088,15 @@ void dec_rt_tasks(struct sched_rt_entity *rt_se, > struct rt_rq *rt_rq) > WARN_ON(!rt_rq->rt_nr_running); > rt_rq->rt_nr_running--; > > + if (rt_entity_is_task(rt_se)) { > + struct rt_rq *rt = &rq_of_rt_rq(rt_rq)->rt; > + > + WARN_ON(!rt->rt_nr_total); > + rt->rt_nr_total--; > + dec_rt_migration(rt_se, rt); > + } > + > dec_rt_prio(rt_rq, rt_se_prio(rt_se)); > - dec_rt_migration(rt_se, rt_rq); > dec_rt_group(rt_se, rt_rq); > } > > @@ -1362,28 +1374,41 @@ static struct sched_rt_entity > *pick_next_rt_entity(struct rq *rq, > return next; > } > > +static struct task_struct *pick_next_highest_task_rt(struct rq *rq, int cpu); > + > static struct task_struct *_pick_next_task_rt(struct rq *rq) > { > - struct sched_rt_entity *rt_se; > - struct task_struct *p; > struct rt_rq *rt_rq; > + struct task_struct *p; > + int running, rt_total; > > rt_rq = &rq->rt; > + running = rt_rq->rt_nr_running; > > - if (!rt_rq->rt_nr_running) > - return NULL; > + /* If rq->rt is suitable to get tasks */ > + if (running && !rt_rq_throttled(rt_rq)) { > + struct sched_rt_entity *rt_se; > > - if (rt_rq_throttled(rt_rq)) > + do { > + rt_se = pick_next_rt_entity(rq, rt_rq); > + BUG_ON(!rt_se); > + rt_rq = group_rt_rq(rt_se); > + } while (rt_rq); > + > + return rt_task_of(rt_se); > + } > + > + rt_total = rt_rq->rt_nr_total; > + > + /* If rq has no-RT tasks OR rt_rq and its children are empty */ > + if (rt_total != rq->nr_running || !rt_total) > return NULL; > > - do { > - rt_se = pick_next_rt_entity(rq, rt_rq); > - BUG_ON(!rt_se); > - rt_rq = group_rt_rq(rt_se); > - } while (rt_rq); > + /* All running tasks are RT. Let's avoid idle wasting CPU time */ > + p = pick_next_highest_task_rt(rq, -1); > + rq->extended_class = &rt_sched_class; > > - p = rt_task_of(rt_se); > - p->se.exec_start = rq->clock_task; > + WARN_ON(!p || rq->cfs.h_nr_running); > > return p; > } > @@ -1392,9 +1417,11 @@ static struct task_struct *pick_next_task_rt(struct rq > *rq) > { > struct task_struct *p = _pick_next_task_rt(rq); > > - /* The running task is never eligible for pushing */ > - if (p) > + if (p) { > + /* The running task is never eligible for pushing */ > dequeue_pushable_task(rq, p); > + p->se.exec_start = rq->clock_task; > + } > > #ifdef CONFIG_SMP > /* > @@ -1426,9 +1453,9 @@ static void put_prev_task_rt(struct rq *rq, struct > task_struct *p) > > static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu) > { > - if (!task_running(rq, p) && > - (cpu < 0 || cpumask_test_cpu(cpu, tsk_cpus_allowed(p))) && > - (p->nr_cpus_allowed > 1)) > + if (cpu < 0 || (!task_running(rq, p) > + && (cpumask_test_cpu(cpu, tsk_cpus_allowed(p)) > + && p->nr_cpus_allowed > 1))) > return 1; > return 0; > } > diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h > index 508e77e..9fdacef 100644 > --- a/kernel/sched/sched.h > +++ b/kernel/sched/sched.h > @@ -294,6 +294,7 @@ static inline int rt_bandwidth_enabled(void) > struct rt_rq { > struct rt_prio_array active; > unsigned int rt_nr_running; > + unsigned long rt_nr_total; > #if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED > struct { > int curr; /* highest queued rt task prio */ > @@ -304,7 +305,6 @@ struct rt_rq { > #endif > #ifdef CONFIG_SMP > unsigned long rt_nr_migratory; > - unsigned long rt_nr_total; > int overloaded; > struct plist_head pushable_tasks; > #endif > @@ -396,6 +396,7 @@ struct rq { > #ifdef CONFIG_RT_GROUP_SCHED > struct list_head leaf_rt_rq_list; > #endif > + const struct sched_class *extended_class; > > /* > * This is part of a global counter where only the total sum -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/