Re: [git pull request] scheduler updates

Ingo Molnar Tue, 28 Aug 2007 07:50:52 -0700

* Mike Galbraith <[EMAIL PROTECTED]> wrote:

> On Tue, 2007-08-28 at 13:32 +0200, Ingo Molnar wrote:
> > Linus, please pull the latest scheduler git tree from:
> > 
> >   git://git.kernel.org/pub/scm/linux/kernel/git/mingo/linux-2.6-sched.git
> > 
> > no big changes - 5 small fixes and 1 small cleanup:
> 
> FWIW, I spent a few hours testing these patches with various loads, 
> and all was peachy here.  No multimedia or interactivity aberrations 
> noted.


great! Btw., there's another refinement Peter and me are working on (see 
the patch below): to place new tasks into the existing 'scheduling flow' 
in a more seemless way. In practice this should mean less firefox spikes 
during a kbuild workload. If you have some time to try it, could you add 
the patch below to your tree too, and see what happens during fork-happy 
workloads? It does not seem to be overly urgent to apply at the moment, 
but it is a nice touch i think.

        Ingo

------------------------>
Subject: sched: place new tasks in the middle of the task pool
From: Peter Zijlstra <[EMAIL PROTECTED]>

Place new tasks in the middle of the wait_runtime average. This smoothes 
out latency spikes caused by freshly started tasks, without being unfair 
to those tasks. Basically new tasks start right into the 'flow' of 
wait_runtime that exists in the system at that moment.

[ [EMAIL PROTECTED]: changed it to use cfs_rq->wait_runtime ]

Signed-off-by: Peter Zijlstra <[EMAIL PROTECTED]>
Signed-off-by: Ingo Molnar <[EMAIL PROTECTED]>
---
 kernel/sched.c      |    1 
 kernel/sched_fair.c |   59 +++++++++++++++++++++++++++++-----------------------
 2 files changed, 33 insertions(+), 27 deletions(-)

Index: linux/kernel/sched.c
===================================================================
--- linux.orig/kernel/sched.c
+++ linux/kernel/sched.c
@@ -858,7 +858,6 @@ static void dec_nr_running(struct task_s
 
 static void set_load_weight(struct task_struct *p)
 {
-       task_rq(p)->cfs.wait_runtime -= p->se.wait_runtime;
        p->se.wait_runtime = 0;
 
        if (task_has_rt_policy(p)) {
Index: linux/kernel/sched_fair.c
===================================================================
--- linux.orig/kernel/sched_fair.c
+++ linux/kernel/sched_fair.c
@@ -86,8 +86,8 @@ unsigned int sysctl_sched_features __rea
                SCHED_FEAT_SLEEPER_AVG          *0 |
                SCHED_FEAT_SLEEPER_LOAD_AVG     *1 |
                SCHED_FEAT_PRECISE_CPU_LOAD     *1 |
-               SCHED_FEAT_START_DEBIT          *1 |
-               SCHED_FEAT_SKIP_INITIAL         *0;
+               SCHED_FEAT_START_DEBIT          *0 |
+               SCHED_FEAT_SKIP_INITIAL         *1;
 
 extern struct sched_class fair_sched_class;
 
@@ -194,6 +194,8 @@ __enqueue_entity(struct cfs_rq *cfs_rq, 
        update_load_add(&cfs_rq->load, se->load.weight);
        cfs_rq->nr_running++;
        se->on_rq = 1;
+
+       cfs_rq->wait_runtime += se->wait_runtime;
 }
 
 static inline void
@@ -205,6 +207,8 @@ __dequeue_entity(struct cfs_rq *cfs_rq, 
        update_load_sub(&cfs_rq->load, se->load.weight);
        cfs_rq->nr_running--;
        se->on_rq = 0;
+
+       cfs_rq->wait_runtime -= se->wait_runtime;
 }
 
 static inline struct rb_node *first_fair(struct cfs_rq *cfs_rq)
@@ -326,9 +330,9 @@ __add_wait_runtime(struct cfs_rq *cfs_rq
 static void
 add_wait_runtime(struct cfs_rq *cfs_rq, struct sched_entity *se, long delta)
 {
-       schedstat_add(cfs_rq, wait_runtime, -se->wait_runtime);
+       cfs_rq->wait_runtime -= se->wait_runtime;
        __add_wait_runtime(cfs_rq, se, delta);
-       schedstat_add(cfs_rq, wait_runtime, se->wait_runtime);
+       cfs_rq->wait_runtime += se->wait_runtime;
 }
 
 /*
@@ -574,7 +578,6 @@ static void __enqueue_sleeper(struct cfs
 
        prev_runtime = se->wait_runtime;
        __add_wait_runtime(cfs_rq, se, delta_fair);
-       schedstat_add(cfs_rq, wait_runtime, se->wait_runtime);
        delta_fair = se->wait_runtime - prev_runtime;
 
        /*
@@ -662,7 +665,6 @@ dequeue_entity(struct cfs_rq *cfs_rq, st
                        if (tsk->state & TASK_UNINTERRUPTIBLE)
                                se->block_start = rq_of(cfs_rq)->clock;
                }
-               cfs_rq->wait_runtime -= se->wait_runtime;
 #endif
        }
        __dequeue_entity(cfs_rq, se);
@@ -671,7 +673,7 @@ dequeue_entity(struct cfs_rq *cfs_rq, st
 /*
  * Preempt the current task with a newly woken task if needed:
  */
-static int
+static void
 __check_preempt_curr_fair(struct cfs_rq *cfs_rq, struct sched_entity *se,
                          struct sched_entity *curr, unsigned long granularity)
 {
@@ -684,9 +686,8 @@ __check_preempt_curr_fair(struct cfs_rq 
         */
        if (__delta > niced_granularity(curr, granularity)) {
                resched_task(rq_of(cfs_rq)->curr);
-               return 1;
+               curr->prev_sum_exec_runtime = curr->sum_exec_runtime;
        }
-       return 0;
 }
 
 static inline void
@@ -762,8 +763,7 @@ static void entity_tick(struct cfs_rq *c
        if (delta_exec > ideal_runtime)
                gran = 0;
 
-       if (__check_preempt_curr_fair(cfs_rq, next, curr, gran))
-               curr->prev_sum_exec_runtime = curr->sum_exec_runtime;
+       __check_preempt_curr_fair(cfs_rq, next, curr, gran);
 }
 
 /**************************************************
@@ -1087,6 +1087,8 @@ static void task_tick_fair(struct rq *rq
        }
 }
 
+#define swap(a,b) do { __typeof__(a) tmp = (a); (a) = (b); (b)=tmp; } while (0)
+
 /*
  * Share the fairness runtime between parent and child, thus the
  * total amount of pressure for CPU stays equal - new tasks
@@ -1102,14 +1104,27 @@ static void task_new_fair(struct rq *rq,
        sched_info_queued(p);
 
        update_curr(cfs_rq);
-       update_stats_enqueue(cfs_rq, se);
+       if ((long)cfs_rq->wait_runtime < 0)
+               se->wait_runtime = (long)cfs_rq->wait_runtime /
+                               (long)cfs_rq->nr_running;
        /*
-        * Child runs first: we let it run before the parent
-        * until it reschedules once. We set up the key so that
-        * it will preempt the parent:
+        * The statistical average of wait_runtime is about
+        * -granularity/2, so initialize the task with that:
         */
-       se->fair_key = curr->fair_key -
-               niced_granularity(curr, sched_granularity(cfs_rq)) - 1;
+       if (sysctl_sched_features & SCHED_FEAT_START_DEBIT) {
+               __add_wait_runtime(cfs_rq, se,
+                       -niced_granularity(se, sched_granularity(cfs_rq))/2);
+       }
+
+       update_stats_enqueue(cfs_rq, se);
+
+       if (sysctl_sched_child_runs_first && (se->fair_key > curr->fair_key)) {
+               dequeue_entity(cfs_rq, curr, 0);
+               swap(se->wait_runtime, curr->wait_runtime);
+               update_stats_enqueue(cfs_rq, se);
+               enqueue_entity(cfs_rq, curr, 0);
+       }
+
        /*
         * The first wait is dominated by the child-runs-first logic,
         * so do not credit it with that waiting time yet:
@@ -1117,16 +1132,8 @@ static void task_new_fair(struct rq *rq,
        if (sysctl_sched_features & SCHED_FEAT_SKIP_INITIAL)
                se->wait_start_fair = 0;
 
-       /*
-        * The statistical average of wait_runtime is about
-        * -granularity/2, so initialize the task with that:
-        */
-       if (sysctl_sched_features & SCHED_FEAT_START_DEBIT) {
-               se->wait_runtime = -(sched_granularity(cfs_rq) / 2);
-               schedstat_add(cfs_rq, wait_runtime, se->wait_runtime);
-       }
-
        __enqueue_entity(cfs_rq, se);
+       __check_preempt_curr_fair(cfs_rq, __pick_next_entity(cfs_rq), curr, 0);
 }
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [git pull request] scheduler updates

Reply via email to